Convert avro file into csv in java web application

Convert avro file into csv in java web application - java

We have a requirement where we would like to convert an avro file which we download from our third party vendor API in our java web application. I tried going through some of the resources where all i could find was command s to execute with help of avro-tools.jar But i am looking for a way to achieve this within Java web application. Any help greatly appreciated.

You can use avro-tools to read the avro records , get Schema and records from the file
Attaching a rough draft :
I'm using JSON as intermediary ,You can modify it to any format of your choice
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.json.CDL;
import org.json.JSONArray;
import org.json.JSONException;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.commons.io.FileUtils;
public class AvroToCSV {
public static void readAvro(File file) {
// Read Avro ,parse Schema to get field names and parse it to json
try {
GenericDatumReader<GenericData.Record> datum = new GenericDatumReader<GenericData.Record>();
DataFileReader<GenericData.Record> reader = new DataFileReader<GenericData.Record>(file, datum);
GenericData.Record record = new GenericData.Record(reader.getSchema());
Schema schema = reader.getSchema();
List<String> fieldValues = new ArrayList<>();
JSONArray jsonArray = new JSONArray();
for (Field field : schema.getFields()) {
fieldValues.add(field.name());
}
while (reader.hasNext()) {
reader.next(record);
Map<String, String> jsonFileds = new HashMap<String, String>();
for (String item : fieldValues) {
System.out.println(item);
jsonFileds.put(item, record.get(item).toString());
}
jsonArray.put(jsonFileds);
}
System.out.println(jsonArray.toString());
reader.close();
jsonToCSV(jsonArray);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void jsonToCSV(JSONArray json) {
File file = new File("avroToJson.csv");
String csv;
try {
csv = CDL.toString(json);
FileUtils.writeStringToFile(file, csv);
} catch (JSONException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
File f = new File("test.avro");
readAvro(f);
}
}

Related

Reading XSD from URL using Java

Objective : I want to read a WSDL and print the services in the WSDL, complex types and Complex type definitions.
Worked : I've used WSDL4J for reading WSDL and successfully able to print the services and their parameters (complex types). Now I want to read the complex type definitions which is available in XSD. I'm unable to read XSD .Is ther any way to do it ?
I'm getting XSModel as null
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import javax.wsdl.BindingOperation;
import javax.wsdl.Definition;
import javax.wsdl.WSDLException;
import javax.wsdl.xml.WSDLReader;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import com.ibm.wsdl.BindingImpl;
import com.ibm.wsdl.xml.WSDLReaderImpl;
import com.sun.org.apache.xerces.internal.impl.xs.XSImplementationImpl;
import com.sun.org.apache.xerces.internal.xs.XSLoader;
import com.sun.org.apache.xerces.internal.xs.XSModel;
public class WSDLDetails {
public static void main(String[] args) {
try {
String wsdlURL = "https://abc.xyz.com/webservice/MessagingSevice?WSDL";
String xsdURL = "https://abc.xyz.com/webservice/MessagingSevice?xsd=1";
java.lang.System.setProperty("https.protocols", "TLSv1.2");
getAllBindingOperation(wsdlURL);
readXSD(xsdURL);
} catch (Exception e) {
e.printStackTrace();
}
}
public static List<String> getAllBindingOperation(String wsdlUrl) {
List<BindingOperation> operationList = new ArrayList();
List<String> nameList = new ArrayList();
try {
WSDLReader reader = new WSDLReaderImpl();
reader.setFeature("javax.wsdl.verbose", false);
Definition definition = reader.readWSDL(wsdlUrl.toString());
Map<String, BindingImpl> defMap = definition.getAllBindings();
Collection<BindingImpl> collection = defMap.values();
for (BindingImpl binding : collection) {
operationList.addAll(binding.getBindingOperations());
}
for (BindingOperation operation:operationList) {
nameList.add(operation.getName());
System.out.println("Name :: " + operation.getName());
System.out.println("Request :: " + operation.getBindingInput());
System.out.println("Response :: " + operation.getBindingOutput());
}
} catch (WSDLException e) {
System.out.println("get wsdl operation fail.");
e.printStackTrace();
}
return nameList;
}
public static void readXSD(String xsdURL) {
try {
System.setProperty(DOMImplementationRegistry.PROPERTY, "com.sun.org.apache.xerces.internal.dom.DOMXSImplementationSourceImpl");
DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
com.sun.org.apache.xerces.internal.impl.xs.XSImplementationImpl impl = (XSImplementationImpl) registry.getDOMImplementation("XS-Loader");
XSLoader schemaLoader = impl.createXSLoader(null);
XSModel model = schemaLoader.loadURI(xsdURL);
System.out.println(model);
} catch (Exception e) {
e.printStackTrace();
}
}

You can use xsd2java plugin with maven
https://github.com/qaware/xsd2java-gradle-plugin

Here is an example showing how to retrieve the XSModel from an XSD URL, and print the complex types declared therein.
import org.apache.xerces.impl.xs.XMLSchemaLoader;
import org.apache.xerces.impl.xs.XSComplexTypeDecl;
import org.apache.xerces.impl.xs.XSElementDecl;
import org.apache.xerces.xs.XSConstants;
import org.apache.xerces.xs.XSModel;
import org.apache.xerces.xs.XSNamedMap;
import org.apache.xerces.xs.XSTypeDefinition;
public class Test {
public static void main(String[] args) {
try {
String xsdURL = "http://fsharp.github.io/FSharp.Data/data/po.xsd";
XMLSchemaLoader xsLoader = new XMLSchemaLoader();
XSModel xsModel = xsLoader.loadURI(xsdURL);
// print global element declarations
System.out.println("\nGlobal Element Declarations:");
XSNamedMap globalElemDecls = xsModel.getComponents(XSConstants.ELEMENT_DECLARATION);
globalElemDecls.forEach((k,v) -> System.out.println((XSElementDecl) v));
// print global complex type declarations
System.out.println("\nGlobal Complex Type Declarations:");
XSNamedMap globalComplexTypeDecls = xsModel.getComponents(XSTypeDefinition.COMPLEX_TYPE);
globalComplexTypeDecls.forEach((k,v) -> System.out.println((XSComplexTypeDecl) v));
} catch (Exception e) {
e.printStackTrace();
}
}
}
If you got null at xsLoader.loadURI(xsdURL), it is likely there are some flaws in the given XSD file. For example, "White spaces are required between publicId and systemId". You might need to fix these flaws first.

How to write extracted image from pdf to a file

Hopefully this is simple.
I am using pdfbox to extract images from a pdf. I want to write the images to a folder. I don't seem to get any output (the folder has read and write privileges).
I am probably not writing the output stream properly I think.
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
public final class JavaImgExtactor
{
public static void main(String[] args) throws IOException{
Stuff();
}
#SuppressWarnings("resource")
public static void Stuff() throws IOException{
File inFile = new File("/Users/sebastianzeki/Documents/Images Captured with Proc Data Audit.pdf");
PDDocument document = new PDDocument();
//document=null;
try {
document = PDDocument.load(inFile);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
List pages = document.getDocumentCatalog().getAllPages();
Iterator iter = pages.iterator();
while (iter.hasNext()) {
PDPage page = (PDPage) iter.next();
System.out.println("page"+page);
PDResources resources = page.getResources();
Map pageImages = resources.getImages();
if (pageImages != null) {
Iterator imageIter = pageImages.keySet().iterator();
System.out.println("Success"+imageIter);
while (imageIter.hasNext()) {
String key = (String) imageIter.next();
PDXObjectImage image = (PDXObjectImage) pageImages.get(key);
FileOutputStream out = new FileOutputStream("/Users/sebastianzeki/Documents/ImgPDF.jpg");
try {
image.write2OutputStream(out);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}
}

You are not closing the output stream, and the file name is always the same.
try (FileOutputStream out = new FileOutputStream("/Users/sebastianzeki/Documents/ImgPDF" + key + ".jpg") {
write2OutputStream(out);
} (Exception e) {
printStackTrace();
}
try-with-resources will automatically close out. Not sure whether key is usable as file name part.

image.write2OutputStream(out); writes the bytes from the image object to the out FileOutputStream object but it doesn't flush the buffer of out .
Add it should do the job :
out.flush();

Parsing JSON file with Jackson for Android (Java)

Title says all. Trying to simply parse a JSON file I added to this android project via Jackson to use throughout my app.
I was able to to do this successfully in a regular Java Application, but not in an Android project for an unknown reason. Will post both codes.
Android Code:
public class MainActivity extends ActionBarActivity {
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
ObjectMapper mapper = new ObjectMapper();
InputStream is = MainActivity.class.getResourceAsStream("contacts.json");
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
List<Contact> contacts = null;
Contact contact = null;
try {
// THIS IS THE LINE THAT BREAKS
contacts = mapper.readValue(is, TypeFactory.defaultInstance().constructCollectionType(List.class, Contact.class));
contact = contacts.get(0);
} catch (JsonParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JsonMappingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Again, here's the line that breaks:
contacts = mapper.readValue(is, TypeFactory.defaultInstance().constructCollectionType(List.class, Contact.class));
This is the LogCat of the error:
com.fasterxml.jackson.databind.JsonMappingException: No content to map due to end-of-input
I've searched this plenty and have not found a solution for this trivial task. I'd like to also know why this code works in a Java Application but not in an Android Project. If onCreate could throw those exceptions like Main can in the Java App I think it would work, but obviously it can't. Thanks for the solution in advance.
Just for reference, here is my working Java Application code of using Jackson to parse a JSON file.
package test;
// file with accessors and mutators for JSON
import test.Contact;
import java.io.IOException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.type.TypeFactory;
import com.fasterxml.jackson.core.JsonParseException;
import java.io.InputStream;
import java.util.List;
public class test {
public static void main(String[] args) throws JsonParseException, JsonMappingException, IOException {
ObjectMapper mapper = new ObjectMapper();
InputStream is = test.class.getResourceAsStream("contacts.json");
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
List<Contact> contacts = mapper.readValue(is, TypeFactory.defaultInstance().constructCollectionType(List.class, Contact.class));
// point to first contact
Contact contact = contacts.get(0);
for (int i = 0; i < contacts.size(); i++) {
contact = contacts.get(i);
System.out.println(contact.getName());
System.out.println(contact.getEmployeeId());
System.out.println(contact.getCompany());
System.out.println(contact.getDetailsURL());
System.out.println(contact.getSmallImageURL());
System.out.println(contact.getBirthdate());
System.out.println(contact.getPhone().getWork());
System.out.println(contact.getPhone().getHome());
System.out.println(contact.getPhone().getMobile());
}
}
}

see the content of a .bson file using java

I have a very large .bson file.
Now I have two question:
How can I see the content of that file? (I know it can do with "bsondump", but this command is slow, specialy for large database) (In fact I want to see the structure of that file)
How can I see the content of that file using java?

You can easily read/parse a bson file in Java using a BSONDecoder instance such as BasicBSONDecoder or DefaultBSONDecoder. These classes are included in mongo-java-driver.
Here's a simple example of a Java implementation of bsondump.
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.bson.BSONDecoder;
import org.bson.BSONObject;
import org.bson.BasicBSONDecoder;
public class BsonDump {
public void bsonDump(String filename) throws FileNotFoundException {
File file = new File(filename);
InputStream inputStream = new BufferedInputStream(new FileInputStream(file));
BSONDecoder decoder = new BasicBSONDecoder();
int count = 0;
try {
while (inputStream.available() > 0) {
BSONObject obj = decoder.readObject(inputStream);
if(obj == null){
break;
}
System.out.println(obj);
count++;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
try {
inputStream.close();
} catch (IOException e) {
}
}
System.err.println(String.format("%s objects read", count));
}
public static void main(String args[]) throws Exception {
if (args.length < 1) {
//TODO usage
throw new IllegalArgumentException("Expected <bson filename> argument");
}
String filename = args[0];
BsonDump bsonDump = new BsonDump();
bsonDump.bsonDump(filename);
}
}

Unable to search journal article in liferay portlet using search util

I am using the below code but it is not able to search the journal article/web content in liferay 6.1
package com.abp.portlets;
import java.io.IOException;
import javax.portlet.PortletException;
import javax.portlet.RenderRequest;
import javax.portlet.RenderResponse;
import com.liferay.portal.kernel.search.BooleanClauseOccur;
import com.liferay.portal.kernel.search.BooleanQuery;
import com.liferay.portal.kernel.search.BooleanQueryFactoryUtil;
import com.liferay.portal.kernel.search.Field;
import com.liferay.portal.kernel.search.Hits;
import com.liferay.portal.kernel.search.ParseException;
import com.liferay.portal.kernel.search.SearchContext;
import com.liferay.portal.kernel.search.SearchEngineUtil;
import com.liferay.portal.kernel.search.SearchException;
import com.liferay.portal.kernel.util.Validator;
import com.liferay.portal.kernel.util.WebKeys;
import com.liferay.portal.theme.ThemeDisplay;
import com.liferay.util.bridges.mvc.MVCPortlet;
/**
* Portlet implementation class Search
*/
public class Search extends MVCPortlet {
public void doView(RenderRequest renderRequest, RenderResponse renderResponse)throws IOException, PortletException
{
ThemeDisplay themeDisplay = (ThemeDisplay)
renderRequest.getAttribute(WebKeys.THEME_DISPLAY);
SearchContext searchContext = new SearchContext();
searchContext.setSearchEngineId(SearchEngineUtil.SYSTEM_ENGINE_ID);
BooleanQuery contextQuery = BooleanQueryFactoryUtil.create(searchContext);
contextQuery.addRequiredTerm(Field.COMPANY_ID, themeDisplay.getCompanyId());
contextQuery.addRequiredTerm(Field.GROUP_ID, themeDisplay.getScopeGroupId());
BooleanQuery fullQuery = BooleanQueryFactoryUtil.create(searchContext);
String keywords = "mridul test";
BooleanQuery searchQuery = BooleanQueryFactoryUtil.create(searchContext);
if (Validator.isNotNull(keywords)) {
keywords = keywords.trim();
try {
searchQuery.addTerm(Field.TITLE, keywords,true);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// BooleanQuery fullQuery = BooleanQueryFactoryUtil.create(searchContext);
//fullQuery.add(contextQuery, BooleanClauseOccur.MUST);
// if (searchQuery.clauses().size() > 0) {
// fullQuery.add(searchQuery, BooleanClauseOccur.MUST);
// }
System.out.println("fullQuery===============>>"+fullQuery);
try {
fullQuery.add(searchQuery, BooleanClauseOccur.MUST);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
Hits hits = SearchEngineUtil.search(searchContext, fullQuery);
for(int i=0;i<hits.getLength();i++)
{
System.out.println(hits.snippet(i).toString());
}
} catch (SearchException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
The output I am getting is...
fullQuery===============>>+(+((title:mridul title:test)))
Please help..

Lucene uses fields to index data.
searchQuery.addTerm(**Field.CONTENT**, keywords,true);
Or
searchQuery.addTerms(new String[]{Field.TITLE,Field.DESCRIPTION,Field.CONTENT}, keywords, true)

It looks like you are searching for the exact phrase "mridul test". I think you probably want to search for "mridul" and "test". If so, give this a spin:
String[] terms = keywords.split(" ");
for(String term : terms){
searchQuery.addTerm(Field.TITLE, term,true);
}

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Convert avro file into csv in java web application - java

Related

Reading XSD from URL using Java

How to write extracted image from pdf to a file

Parsing JSON file with Jackson for Android (Java)

see the content of a .bson file using java

Unable to search journal article in liferay portlet using search util

Categories

Resources