Java Apache Beam PCollections and how to make them work? - java

First of all let me describe the scenario.
Step 1. I have to read from a file, line by line. The file is a .json and each line has the following format:
{
"schema":{Several keys that are to be deleted},
"payload":{"key1":20001,"key2":"aaaa","key3":"bbbb","key4":"USD","key5":"100"}
}
Step 2. Delete schema object and end up with (added more examples for the sake of the next steps):
{"key1":20001,"key2":"aaaa","key3":"bbbb","key4":"USD","key5":"100"}
{"key1":20001,"key2":"aaaa","key3":"bbbb","key4":"US","key5":"90"}
{"key1":2002,"key2":"cccc","key3":"hhhh","key4":"CN","key5":"80"}
Step 3. Split these values into key and value by making them json in memory and use the strings as keys and values with map
{"key1":20001,"key2":"aaaa","key3":"bbbb"} = {"key4":"USD","key5":"100"}
{"key1":20001,"key2":"aaaa","key3":"bbbb"} = {"key4":"US","key5":"90"}
{"key1":2002,"key2":"cccc","key3":"hhhh"} = {"key4":"CN","key5":"80"}
Step 4, and the one I can't work out due to my lack of knowledge in Pcollections. I need to grab all the lines read and do a GroupByKey so that it would end up like:
{"key1":20001,"key2":"aaaa","key3":"bbbb"} = [
{"key4":"USD","key5":"100"},
{"key4":"US","key5":"90"} ]
{"key1":2002,"key2":"cccc","key3":"hhhh"} = {"key4":"CN","key5":"80"}
Righ now my code looks like this:
static void runSimplePipeline(PipelineOptionsCustom options) {
Pipeline p = Pipeline.create(options);
p.apply("ReadLines", TextIO.read().from(options.getInputFile()))
.apply("TransformData", ParDo.of(new DoFn<String, String>() {
#ProcessElement
public void processElement(ProcessContext c) {
Gson gson = new GsonBuilder().create();
ObjectMapper oMapper = new ObjectMapper();
JSONObject obj_key = new JSONObject();
JSONObject obj_value = new JSONObject();
List<String> listMainKeys = Arrays.asList(new String[]{"Key1", "Key2", "Key3"});
HashMap<String, Object> parsedMap = gson.fromJson(c.element().toString(), HashMap.class);
parsedMap.remove("schema");
Map<String, String> map = oMapper.convertValue(parsedMap.get("payload"), Map.class);
for (Map.Entry<String,String> entry : map.entrySet()) {
if (listMainKeys.contains(entry.getKey())) {
obj_key.put(entry.getKey(),entry.getValue());
} else {
obj_value.put(entry.getKey(),entry.getValue());
}
}
KV objectKV = KV.of(obj_key.toJSONString(), obj_value.toJSONString());
System.out.print(obj_key.toString() + " : " + obj_value.toString() +"\n");
}
})); <------- RIGHT HERE
p.run().waitUntilFinish();
}
Now the obvious part is that on where it says "RIGHT HERE" I should have another apply with CountByKey however that requires a full PCollection and that's what I do not really understand.

Here's the code, thanks to Guillem Xercavins's linked Github:
static void runSimplePipeline(PipelineOptionsCustom options) {
Pipeline p = Pipeline.create(options);
PCollection<Void> results = p.apply("ReadLines", TextIO.read().from(options.getInputFile()))
.apply("TransformData", ParDo.of(new DoFn<String, KV<String, String>>() {
#ProcessElement
public void processElement(ProcessContext c) {
Gson gson = new GsonBuilder().create();
ObjectMapper oMapper = new ObjectMapper();
JSONObject obj_key = new JSONObject();
JSONObject obj_value = new JSONObject();
List<String> listMainKeys = Arrays
.asList(new String[] { "EBELN", "AEDAT", "BATXT", "EKOTX", "Land1", "WAERS" });
HashMap<String, Object> parsedMap = gson.fromJson(c.element().toString(), HashMap.class);
parsedMap.remove("schema");
Map<String, String> map = oMapper.convertValue(parsedMap.get("payload"), Map.class);
for (Map.Entry<String, String> entry : map.entrySet()) {
if (listMainKeys.contains(entry.getKey())) {
obj_key.put(entry.getKey(), entry.getValue());
} else {
obj_value.put(entry.getKey(), entry.getValue());
}
}
KV objectKV = KV.of(obj_key.toJSONString(), obj_value.toJSONString());
c.output(objectKV);
}
})).apply("Group By Key", GroupByKey.<String, String>create())
.apply("Continue Processing", ParDo.of(new DoFn<KV<String, Iterable<String>>, Void>() {
#ProcessElement
public void processElement(ProcessContext c) {
System.out.print(c.element());
}
}));
p.run().waitUntilFinish();
}

Related

Restructuring JSON file in JAVA

I am having the following sample from a JSON file:
[
{
"0":
{
"File":"file1.java",
"Class":"com.ETransitionActionType",
"Method":"values",
"Annotation":"Not Found"
}
},
{
"1":
{
"File":"file2.java",
"Class":"com.ETransitionParams",
"Method":"values",
"Annotation":"Not Found"
}
},
{
"2":
{
"File":"file3.java",
"Class":"com.phloc.commons.id.IHasID",
"Method":"getID",
"Annotation":"Not Found"
}
},
{
"4":
{
"File":"file3.java",
"Class":"com.ExecuteTransitionActionHandler",
"Method":"createBadRequestResponse",
"Annotation":"Not Found"
}
},
{
"5":
{
"File":"file3.java",
"Class":"com.ExecuteTransitionActionHandler",
"Method":"extractParametersFromAction",
"Annotation":"Not Found"
}
}]
How can I restructure this file using java so that it looks like:
[{
"file1.java": {
"com.ETransitionActionType": {
"values": {
"Annotation": "Not Found"
}
}
}
},
{
"file2.java": {
"com.ETransitionParams": {
"values": {
"Annotation": "Not Found"
}
}
}
},
{
"file3.java": {
"com.phloc.commons.id.IHasID": {
"getID": {
"Annotation": "Not Found"
}
},
"com.ExecuteTransitionActionHandler": {
"getID": {
"Annotation": "Not Found"
},
"extractParametersFromAction": {
"Annotation": "Not Found"
}
}
}
}
]
i.e. Going through the JSON file, searching it, and wherever the "File" attribute has the same value("file3.java" for example), we list all the relevant classes and methods inside and the same applies for the "Class" attribute, if it has the same name, we list all the methods inside it(So it's like comparing and sorting the values for the "File" and "Class" attributes).
I started with JSON simple library and wrote like the code below, but don't know how to go further!
Object object = (JSONArray)parser.parse(new FileReader("rawOutput.json"));
JSONArray jsonArray = (JSONArray) object;
for(int i = 0; i < jsonArray.size(); i++) {
System.out.println(jsonArray.get(i));
JSONObject jsonObject = (JSONObject)jsonArray.get(i);
String c = jsonObject.get("" + i + "").toString();
}
Any ideas? Your help is really appreciated!!!
I wrote a code to do what do you need but first you have to add this library to your project if you don't have already org.json.zip library, because I didn't have a library for parsing Json texts so I used this library for formatting the Json data, and I'm sorry if you don't understand the code completely because your request isn't so easy as yourself know and I created three functions to get the result and although I wrote some comments to understand easily, this is the code:-
Edit
...
import org.json.*;
...
...
public static void main(String[] args) throws JSONException {
System.out.println(getFormattedJson("json text"));
}
private static String getFormattedJson(String text) throws JSONException{
JSONArray result = new JSONArray();
JSONArray jsonArray = null;
//get the json array
jsonArray = new JSONArray(text);
//loop through items in the array and insert them formatted to the result
for (int i = 0; i < jsonArray.length(); i++) {
//get object inside the number
JSONObject object = getJsonChild(jsonArray.getJSONObject(i));
//get these attributes
String file = object.getString("File");
String clas = object.getString("Class");
String meth = object.getString("Method");
String anno = object.getString("Annotation");
//create a custom type of the object's attributes
Map<String, String> map = new HashMap<>();
map.put("Annotation", anno);
Map<String, Object> map1 = new HashMap<>();
map1.put(meth, map);
Map<String, Object> map2 = new HashMap<>();
map2.put(clas, map1);
Map<String, Object> map3 = new HashMap<>();
map3.put(file, map2);
//loop through repeating values to also add them to one value as you expected
for (int j = jsonArray.length() - 1; j > i; j--) {
JSONObject obj = getJsonChild(jsonArray.getJSONObject(j));
String file1 = obj.getString("File");
String clas1 = obj.getString("Class");
String meth1 = obj.getString("Method");
String anno1 = obj.getString("Annotation");
if (file1.equals(file)) {
if (map2.containsKey(clas1)) {
if (childrenContains(map2, meth1)) {
//if the difference was annotation value
map.put("Annotation", anno1);
} else {
//if the difference was method names
Map<String, String> map_ = new HashMap<>();
map_.put("Annotation", anno1);
((Map<String, Object>) map2.get(clas1)).put(meth1, map_);
}
} else {
//if the difference was class names
Map<String, String> map_ = new HashMap<>();
map_.put("Annotation", anno1);
Map<String, Object> map1_ = new HashMap<>();
map1_.put(meth1, map_);
map2.put(clas1, map1_);
}
//remove the (value added) object
jsonArray.remove(j);
}
}
//add the map to the result
result.put(map3);
}
return result.toString(4);
}
private static boolean childrenContains(Map<String, Object> map1, String meth1) {
for (String childKey : map1.keySet()) {
Map<String, Object> child = (Map<String, Object>) map1.get(childKey);
if (child.containsKey(meth1))
return true;
}
return false;
}
private static JSONObject getJsonChild(JSONObject object) throws JSONException {
Iterator<String> keys = object.keys();
String key = "";
while (keys.hasNext()) {
key = (String) keys.next();
}
return object.getJSONObject(key);
}
And the result for your sample using my code is:-
[
{"file1.java": {"com.ETransitionActionType": {"values": {"Annotation": "Not Found"}}}},
{"file2.java": {"com.ETransitionParams": {"values": {"Annotation": "Not Found"}}}},
{"file3.java": {
"com.ExecuteTransitionActionHandler": {
"createBadRequestResponse": {"Annotation": "Not Found"},
"extractParametersFromAction": {"Annotation": "Not Found"}
},
"com.phloc.commons.id.IHasID": {"getID": {"Annotation": "Not Found"}}
}}
]
And if you want to get the json data from a file so use the following function to create the JSONArray easily:-
private static JSONArray readFromFile(String filePath){
try {
BufferedReader br = new BufferedReader(new FileReader(filePath));
StringBuilder sb = new StringBuilder();
String line = br.readLine();
while (line != null) {
sb.append(line);
sb.append(System.lineSeparator());
line = br.readLine();
}
return new JSONArray(sb.toString());
} catch (Exception e) {
System.out.println(e.getMessage());
return null;
}
}
And use it instead the text json data:-
...
//get the json array
jsonArray = readFromFile("FilePath");
...
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
public class Foo {
public static void main(String... args) throws IOException {
String json = formatJson(new FileReader("rawOutput.json"));
System.out.println(json);
}
public static String formatJson(Reader reader) throws IOException {
// group array items by fileName
final Function<List<Map<String, Object>>, Map<String, List<Object>>> groupByFileName =
data -> data.stream().collect(Collectors.groupingBy(map -> (String)map.get("File"), TreeMap::new,
Collectors.mapping(Function.identity(), Collectors.toList())));
// convert source item structure into required
final Function<Map.Entry<String, List<Object>>, Map<String, Object>> convert = entry -> {
Map<String, Map<String, Map<String, String>>> tmp = new LinkedHashMap<>();
entry.getValue().stream()
.map(value -> (Map<String, String>)value)
.forEach(map -> {
Map<String, Map<String, String>> classes = tmp.computeIfAbsent(map.get("Class"), cls -> new TreeMap<>());
Map<String, String> methods = classes.computeIfAbsent(map.get("Method"), method -> new TreeMap<>());
map.entrySet().stream()
.filter(e -> !"Class".equals(e.getKey()) && !"Method".equals(e.getKey()) && !"File".equals(e.getKey()))
.forEach(e -> methods.put(e.getKey(), e.getValue()));
});
return Collections.singletonMap(entry.getKey(), tmp);
};
ObjectMapper mapper = new ObjectMapper();
// read json as array of Maps
List<Map<String, Object>> data = Arrays.stream(mapper.readValue(reader, Map[].class))
.map(map -> map.values().iterator().next())
.map(item -> (Map<String, Object>)item)
.collect(Collectors.toList());
return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(groupByFileName.apply(data).entrySet().stream()
.map(convert).collect(Collectors.toList()));
}
}
You could create a map of maps to represent your grouping by "File" and "Class" for your list of (inner) JSON objects. It might look similar to
final Function<JSONObject, String> fileFunction = (JSONObject jsonObject) -> jsonObject.getString("File");
final Function<JSONObject, String> classFunction = (JSONObject jsonObject) -> jsonObject.getString("Class");
final Map<String, Map<String, List<JSONObject>>> groupedJsonObjects = jsonObjects.stream()
.collect(Collectors.groupingBy(fileFunction, Collectors.groupingBy(classFunction)));

Sync to BigQuery in Java "Repeated record added outside of an array"

I am trying to insert a row to BigQuery using java.
The entity I was inserting has a field which is double nested.
Generating the entity suitable for BigQuery:
ObjectMapper mapper = new ObjectMapper();
String barcodeDetailsJSON = order.getBarcodeDetailsJSON();
List<StateForBQ> stateForBQList = new ArrayList<StateForBQ>();
for (State state : order.getStates()) {
StateForBQ stateForBQ = new StateForBQ(state);
stateForBQ.setSetOn(new Date(stateForBQ.getSetOn().getTime()/1000));
stateForBQList.add(stateForBQ);
}
List<BarcodeDetailForBQ> barcodeDetailForBQList = getBarcodeDetailsFromBarcodeDetailsJSON(barcodeDetailsJSON, order.getIsGrouped());
Without the following, state is getting set as null. (State is nested entity)
List<Map<String, Object>> stateMap =
mapper.convertValue(stateForBQList, new TypeReference<List<Map<String, Object>>>() {});
Without the following, barcodeDetails is getting set as null. (BarcodeDetails is double nested entity)
List<Map<String, Object>> barcodeMapList =
mapper.convertValue(barcodeDetailForBQList, new TypeReference<List<Map<String, Object>>>() {});
Without the follwing, productPriceDetails, productDetails, cgst, sgst are getting set as null
for (Map<String, Object> barcodeMap : barcodeMapList) {
barcodeMap.put("productPriceDetails", mapper.convertValue(barcodeMap.get("productPriceDetails"), new TypeReference<Map<String, Object>>() {}));
barcodeMap.put("productDetails", mapper.convertValue(barcodeMap.get("productDetails"), new TypeReference<Map<String, Object>>() {}));
barcodeMap.put("cgst", mapper.convertValue(barcodeMap.get("cgst"), new TypeReference<Map<String, Object>>() {}));
barcodeMap.put("sgst", mapper.convertValue(barcodeMap.get("sgst"), new TypeReference<Map<String, Object>>() {}));
}
Preparing the rowcontent
Map<String, Object> rowContent = new HashMap<>();
rowContent.put("orderId", order.getOrderId());
rowContent.put("customerId", order.getCustomerId());
rowContent.put("barcodeDetails", barcodeMapList);
rowContent.put("states", stateMap);
Inserting to BigQuery
Gson gson = new Gson();
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
String datasetName = "Latest_Data";
String tableName= "ORDER_TEMP";
// [START bigquery_table_insert_rows]
TableId tableId = TableId.of(datasetName, tableName);
// Values of the row to insert
String barcodeDetailsJSON = order.getBarcodeDetailsJSON();
List<BarcodeDetailForBQ> barcodeDetailForBQList = new OrderForBQ().getBarcodeDetailsFromBarcodeDetailsJSON(barcodeDetailsJSON, order.getIsGrouped());
String recordsContentString = gson.toJson(rowContent);
InsertAllResponse response =
bigquery.insertAll(
InsertAllRequest.newBuilder(tableId)
.addRow(""+orderId, rowContent)
// More rows can be added in the same RPC by invoking .addRow() on the builder
.build());
if (response.hasErrors()) {
// If any of the insertions failed, this lets you inspect the errors
for (Entry<Long, List<BigQueryError>> entry : response.getInsertErrors().entrySet()) {
// inspect row error
}
}
Following is the response I am getting.
{
insertErrors: {
0: [
{
reason: "invalid",
location: "",
message: "Repeated record added outside of an array."
}
]
}
}

Parsing JSON into a object

I'm trying to parse this JSON using gson:
{"hvs1":{"16191":[["TestFile3","C",["A"]],["TestFile3","-",["1G","1A"]]],"16193":[["TestFile3","C",["G"]]]},"hvs2":{"25":[["TestFile3","-",["1A"]]]},"outros":{"16423":[["TestFile3","A",["T"]]]}}
Into this object
public class Results {
private String regiaoAfetada;
private String posicaoReferencia;
private String nomeDoArquivo;
private String baseAlteradaReferencia;
private List<String> mutacaoEncontrada;
//get and set
}
And my test class to try to achive this, but I'm getting a error.
public class JsonTeste {
public static void main(String[] args) {
Gson gson = new Gson();
try (Reader reader = new FileReader("foobar.json")) {
Type type = new TypeToken<TreeMap<String, TreeMap>>() {
}.getType();
TreeMap<String, TreeMap<String, List<List<List<String>>>>> map = gson.fromJson(reader, type);
List<Results> listaMutacoes = new ArrayList<Results>();
for (Entry<String, TreeMap<String, List<List<List<String>>>>> regioesMap : map.entrySet()) {
TreeMap<String, List<List<List<String>>>> regiaoUm = regioesMap.getValue();
for (Entry<String, List<List<List<String>>>> regiaoUmResult : regiaoUm.entrySet()) {
List<List<List<String>>> resultados = regiaoUmResult.getValue();
for (List<List<String>> list : resultados) {
Results resultado = new Results();
resultado.setRegiaoAfetada(regioesMap.getKey());
resultado.setPosicaoReferencia(regiaoUmResult.getKey());
resultado.setNomeDoArquivo(list.get(0).toString());
resultado.setBaseAlteradaReferencia(list.get(1).toString());
resultado.setMutacaoEncontrada(list.get(2));
listaMutacoes.add(resultado);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
The problem is when I try to parse this part
[
"TestFile3",
"-",
[
"1G",
"1A"
]
]
Because I have two Strings and a Array inside, so the problem Is when I try to place "TestFile3" into setNomeDoArquivo, but even if I comment this line, i get the same error in the second line.
resultado.setNomeDoArquivo(list.get(0).toString());
resultado.setBaseAlteradaReferencia(list.get(1).toString());
java.lang.ClassCastException: java.lang.String cannot be cast to java.util.List
Can you guys help me?
The List resultados is of List<string> or List<List<String>>.When you get the item of resultados it can be one of them. So to generalized declare it as List<List<Object>>
Try The below Code :
Gson gson = new Gson();
try (Reader reader = new FileReader("foobar.json")) {
Type type = new TypeToken<TreeMap<String, TreeMap>>() {
}.getType();
TreeMap<String, TreeMap<String, List<List<Object>>>> map = gson.fromJson(reader, type);
List<Results> listaMutacoes = new ArrayList<>();
for (Map.Entry<String, TreeMap<String, List<List<Object>>>> regioesMap : map.entrySet()) {
TreeMap<String, List<List<Object>>> regiaoUm = regioesMap.getValue();
for (Map.Entry<String, List<List<Object>>> regiaoUmResult : regiaoUm.entrySet()) {
List<List<Object>> resultados = regiaoUmResult.getValue();
for (List<Object> list : resultados) {
System.out.println(list);
Results resultado = new Results();
resultado.setRegiaoAfetada(regioesMap.getKey());
resultado.setPosicaoReferencia(regiaoUmResult.getKey());
resultado.setNomeDoArquivo((String) list.get(0));
resultado.setBaseAlteradaReferencia((String) list.get(1));
resultado.setMutacaoEncontrada((List<String>) list.get(2));
listaMutacoes.add(resultado);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}

get the image name CodeNameOne

I want to get a list of objects from database
i'm 100% that i retreive the data but the list so my php code seems to be good
public ArrayList<Categorie> getListCategorie() {
ArrayList<Categorie> listcategories = new ArrayList<>();
ConnectionRequest con2 = new ConnectionRequest();
con2.setUrl("http://localhost/pidev2017/selectcategorie.php");
con2.addResponseListener(new ActionListener<NetworkEvent>() {
#Override
public void actionPerformed(NetworkEvent evt) {
try {
JSONParser j = new JSONParser();
Map<String, Object> catefories = j.parseJSON(new CharArrayReader(new String(con2.getResponseData()).toCharArray()));
List<Map<String, Object>> list = (List<Map<String, Object>>) catefories.get("Categorie");
for (Map<String, Object> obj : list) {
Categorie categorie = new Categorie();
categorie.setId(Integer.parseInt(obj.get("id").toString()));
categorie.setNomCategorie(obj.get("nomCategorie").toString());
listcategories.add(categorie);
}
} catch (IOException ex) {
}
}
});
NetworkManager.getInstance().addToQueue(con2);
return listcategories;
}
when i want to fetch my result "listcategories" i found that is empty
Change
NetworkManager.getInstance().addToQueue(con2);
to
NetworkManager.getInstance().addToQueueAndWait(con2);
It's possible that you try to get the result before the data has been fetched.

Serialize JSON document using Jackson

I am trying to serialize JSON document using Jackson library. Below is my JSON document that I have created by hand. Now I need to serialize this document using Jackson
Example-A
{
"v" : {
"site_id" : 0,
"price_score" : 0.5,
"confidence_score" : 0.2,
"categories": {
"123" : {
"price_score": "0.5",
"confidence_score": "0.2"
},
"321" : {
"price_score": "0.2",
"confidence_score": "0.4"
}
}
}
}
I am able to make this part of JSON document till now with my below code and using Jackson-
Example-B
{
"v" : {
"site_id" : 0,
"price_score" : 0.5,
"confidence_score" : 0.2
}
}
Now, I am not able to understand how do I add the list of categories (as shown in Example-A) portion in my Example-B JSON document with my below code?
public static void main(String[] args) {
Map<String, Object> props = new HashMap<String, Object>();
props.put("site-id", 0);
props.put("price-score", 0.5);
props.put("confidence-score", 0.2);
AttributeValue av = new AttributeValue();
av.setProperties(props);
/**
* this will print out the JSON document like I shown in my Example-B
* but I need to make it look like as in Example-A. I am not sure how
* to achieve that?
*/
System.out.println(av);
// serialize it
try {
String jsonStr = JsonMapperFactory.get().writeValueAsString(attr);
System.out.println(jsonStr);
} catch (JsonGenerationException e) {
e.printStackTrace();
} catch (JsonMappingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
Can anybody help me with that?
Solution 1
In you case you can do it with
Map<String, Object> props = new HashMap<String, Object>();
props.put("site-id", 0);
props.put("price-score", 0.5);
props.put("confidence-score", 0.2);
Map<String, String> category123 = new HashMap<String, String>();
category123.put("price_score", "0.5");
category123.put("confidence_score", "0.2");
Map<String, String> category321 = new HashMap<String, String>();
category123.put("price_score", "0.2");
category123.put("confidence_score", "0.4");
Map<String, Object> categories = new HashMap<String, Object>();
categories.put("123", category123);
categories.put("321", category321);
props.put("categories", categories);
Solution 2:
Or you can simplify it with additional classes, e.g.:
public class Category
{
private String price_score;
private String confidence_score;
public Category(String price_score, String confidence_score)
{
this.price_score = price_score;
this.confidence_score = confidence_score;
}
public Category()
{
}
// getters/setters
}
main method
Map<String, Object> props = new HashMap<String, Object>();
props.put("site-id", 0);
props.put("price-score", 0.5);
props.put("confidence-score", 0.2);
Map<String, Category> categories = new HashMap<String, Category>();
categories.put("123", new Category("0.4", "0.2"));
categories.put("321", new Category("0.2", "0.5"));
props.put("categories", categories);

Categories

Resources