Java: Manipulate docx document saved in Clob/Blob as text

Java: Manipulate docx document saved in Clob/Blob as text - java

I have a word document saved in Oracle Clob or mysql Blob I wrote the following code to read from DB --> save into .docx --> manipulate text inside docx document. my question is there any way to manipulate the text inside docx document without writing the data on docx document?
Thanks :)
private static String url = "jdbc:mysql://localhost/test";
private static String username = "root";
private static String password = "root";
public static void main( String[] args) throws ClassNotFoundException, SQLException, IOException
{
Connection conn = null;
Class.forName("com.mysql.jdbc.Driver");
conn = DriverManager.getConnection(url, username, password);
String sql = "SELECT name, description, data FROM documents ";
PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet resultSet = stmt.executeQuery();
while (resultSet.next()) {
String name = resultSet.getString(1);
System.out.println("Name = " + name);
String description = resultSet.getString(2);
System.out.println("Description = " + description);
//
// Get the character stream of our CLOB data
//
Blob blob = resultSet.getBlob(3);
// System.out.println(convertLOB(blob));//convertLOB(blob).toString());
OutputStream fwriter = new FileOutputStream("C:\\The Appfuce Primer.docx");
readFromBlob(blob,fwriter);
String target = "C:\\The Appfuce Primer.docx";
File document = new File(target);
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try {
parser.parse(new FileInputStream(document), handler, metadata, new ParseContext());
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (TikaException e) {
e.printStackTrace();
}
System.out.println(metadata);
System.out.println(handler.toString());
}
}
final static int bBufLen = 4 * 8192;
public static long readFromBlob(Blob blob, OutputStream out)
throws SQLException, IOException {
InputStream in = blob.getBinaryStream();
int length = -1;
long read = 0;
byte[] buf = new byte[bBufLen];
while ((length = in.read(buf)) != -1) {
out.write(buf, 0, length);
read += length;
}
in.close();
return read;
}

You can use the Apache POI project to get access to the content of your .docx document.
https://poi.apache.org/document/quick-guide-xwpf.html

Maybe you can call parser.parse directly using blob.getBinaryStream():
...
parser.parse(blob.getBinaryStream(), handler, metadata, new ParseContext());
...
So you don't have to create a temporary file containing the docx document.

Related

not able to read content pdf which is open in url

i want to read a content of the pdf which is open in url:-https://dms.careerbuilder.com/viewer?Token=4aeea5b52d6e48a7beca13a992540a66&key=7b6184962856e016a5cdfcb3e27c7c30b34b5caaa6607d7d4e408f4b2ebf9dfd
try {
String pdfContent = readPdfContent(perfecturl);
Assert.assertTrue(pdfContent.contains("Test Kumar"));
Assert.assertTrue(pdfContent.contains("XXXXX"));
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
public static String readPdfContent(String url) throws IOException {
URL pdfUrl = new URL(url);
InputStream in = pdfUrl.openStream();
BufferedInputStream bf = new BufferedInputStream(in);
PDDocument doc = PDDocument.load(bf);
int numberOfPages = getPageCount(doc);
System.out.println("The total number of pages "+numberOfPages);
String content = new PDFTextStripper().getText(doc);
doc.close();
return content;
}
public static int getPageCount(PDDocument doc) {
//get the total number of pages in the pdf document
int pageCount = doc.getNumberOfPages();
return pageCount;
}
it throws me exception:-
Error: End-of-File, expected line
at org.apache.pdfbox.pdfparser.BaseParser.readLine(BaseParser.java:1093)
at org.apache.pdfbox.pdfparser.COSParser.parseHeader(COSParser.java:2580)
at org.apache.pdfbox.pdfparser.COSParser.parsePDFHeader(COSParser.java:2551)
at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:219)
at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1228)
at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1128)
pdfbox not able to read the pdf and this url is valid PDF so can any one helep me to get this resolve.

Result set closed exception when trying to read a pdf file from sqlite table

I am trying to read a pdf file that has been stored in a table on SQLite database. when I run the Code it says ' Resultset is closed'.
public void syllabusAttach(){
String selectSQL = "SELECT Image_Reg FROM "+getRegulation()+" WHERE SubjectCode="+getSubCode()+"";
ResultSet rs = null;
FileOutputStream fos = null;
Connection conn = null;
Statement stmt = null;
try {
conn = connect();
stmt = conn.createStatement();
rs = stmt.executeQuery(selectSQL);
// write binary stream into file
InputStream is =rs.getBinaryStream("Image_Reg");
File file = new File("syllabus_"+getRegulation()+".pdf");
OutputStream os = new FileOutputStream(file);
System.out.println("Writing BLOB to file " + file.getAbsolutePath());
byte[] content = new byte[1024];
int size = 0;
while((size = is.read(content)) !=-1){
os.write(content,0,size);
}
} catch (SQLException | IOException e) {
System.out.println(e.getMessage());
} finally {
try {
if (rs != null) {
rs.close();
}
if (stmt != null) {
stmt.close();
}
if (conn != null) {
conn.close();
}
if (fos != null) {
fos.close();
}
} catch (SQLException | IOException e) {
System.out.println(e.getMessage());
}
}
}
write binary stream into file
File file = new File("syllabus_"+getRegulation()+".pdf");
fos = new FileOutputStream(file);
System.out.println("Writing BLOB to file " + file.getAbsolutePath());
while (rs.next()) {
InputStream input = rs.getBinaryStream("Image_Reg");
byte[] buffer = new byte[1024];
while (input.read(buffer) > 0) {
fos.write(buffer);
}
}
When I did the above changes to the code, I could see that the files are being generated but when I open them it says "this file cannot be opened"

I found that the getSubCode() in the SQl query is not returning a the desired string which is leading to the error.

How to populate mysql database table with metadata of uploaded file

I created servlet for uploading files to MySQL database:
#WebServlet("/UploadFileController") #MultipartConfig(maxFileSize = 16177215)
public class UploadFileController extends HttpServlet {
#Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException
{
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();
InputStream inputStream = null;
Random rand = new Random();
int n = rand.nextInt(9999) + 1;
String idTemp=(String.valueOf(n));
String title=(request.getParameter("title"));
Part filePart = request.getPart("file_uploaded");
if (filePart != null)
{
System.out.println(filePart.getName());
System.out.println(filePart.getSize());
System.out.println(filePart.getContentType());
inputStream = filePart.getInputStream();
}
try
{
Db_Connection dbconn=new Db_Connection();
Connection conn= dbconn.Connection();
String sql = "INSERT INTO books (id, title, author, keywords, publication_year, filename, MIME, file) values (?, ?, ?, ?, ?, ?, ?, ?)";
PreparedStatement statement = conn.prepareStatement(sql);
statement.setString(1, idTemp);
statement.setString(2, title);
statement.setString(3, "Goran");
statement.setString(4, "Analisis");
statement.setString(5, "1995");
statement.setString(6, "OrgFileName");
statement.setString(7, "pdf");
if (inputStream != null)
{
statement.setBinaryStream(8, inputStream, (int) filePart.getSize());
}
int row = statement.executeUpdate();
if (row > 0)
{
out.println("File uploaded!!!");
conn.close();
RequestDispatcher rs = request.getRequestDispatcher("upload_form.jsp");
rs.include(request, response);
}
else
{
out.println("Couldn't upload your file!!!");
conn.close();
RequestDispatcher rs = request.getRequestDispatcher("upload_form.jsp");
rs.include(request, response);
}
}catch(Exception e){e.printStackTrace();}
}
}
Right now, I input manually some strings in database instead of real metadata of uploaded file.
I'm trying to take metadata by TIKA.
public class MetadataViewer {
public static void main(final String[] args) throws IOException, TikaException {
//Assume that boy.jpg is in your current directory
File file = new File("C:/Users/goran/Desktop/zadatak_grupa_B.pdf");
//Parser method parameters
Parser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
FileInputStream inputstream = new FileInputStream(file);
ParseContext context = new ParseContext();
try {
parser.parse(inputstream, handler, metadata, context);
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(handler.toString());
//getting the list of all meta data elements
String[] metadataNames = metadata.names();
for(String name : metadataNames) {
System.out.println(name + ": " + metadata.get(name));
}
}
}
Obvious, currently this class, right now, work only with spcified file as here, for example: File file = new File("C:/Users/goran/Desktop/zadatak_grupa_B.pdf"); and write meta data in Console.
How to read metadata of uploaded file and input them into database?

How to upload file in database?

I am trying to upload an image file with the code below, but the file is not being uploaded. The console still shows the message "1 Record Successfully Inserted."
Create table image
(
name varchar2(20),
photo blob
);
import java.sql.*;
import java.io.*;
public class ImageWriter {
static Connection connection = null;
static CallableStatement pstat = null;
static String connectionURL = null;
public static void main(String[] args) {
try{
Class.forName("oracle.jdbc.driver.OracleDriver");
connection = DriverManager.getConnection("jdbc:oracle:thin:#localhost:1521:xe", "SYSTEM", "SYSTEM");
PreparedStatement pstat = connection.prepareStatement("insert into image(name,photo) values(?,?)");
FileInputStream fin = new FileInputStream("E:\\test.jpg");
pstat.setString(1, "ABC");
pstat.setBinaryStream(2, fin,fin.available());
int result = pstat.executeUpdate();
System.out.println(result + " Record Successfully Inserted");
connection.close();
}
catch(Exception e){
e.printStackTrace();
}
}
}

The above code works fine.
I dont know how you verified the contents of database.
Here is my code to verify the db(blob column): Try with this method. I used your code to insert the image and I could retrieve the image successfully. (note : file extension should be same)
public static void getPic() {
try {
Class.forName("oracle.jdbc.driver.OracleDriver");
Connection conn = DriverManager.getConnection(
"jdbc:oracle:thin:#localhost:1521:orcl", "sys as sysdba",
"Oracle123");
ResultSet rs = null;
Statement stmt = null;
oracle.sql.BLOB photo = null;
conn.setAutoCommit(false);
stmt = conn.createStatement();
String name="ABC";
rs = stmt.executeQuery("select photo from image where name = '" + name + "'" );
rs.next();
photo = ((OracleResultSet) rs).getBLOB(1);
File f = new File("E:/image2.jpg");
f.getParentFile().mkdirs();
f.createNewFile();
InputStream in = photo.getBinaryStream();
ByteArrayOutputStream out = new ByteArrayOutputStream();
OutputStream outputStream = new FileOutputStream(f);
int bufferSize = 1024;
int length = (int) photo.length();
byte[] buffer = new byte[bufferSize];
while((length = in.read(buffer)) != -1) {
out.write(buffer,0,length);
}
out.writeTo(outputStream);
System.out.println("Image Retrieved");
out.close();
rs.close();
stmt.close();
conn.close();
} catch (SQLException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}

(how) can I download an image using JSoup?

I already know where the image is, but for simplicity's sake I wanted to download the image using JSoup itself. (This is to simplify getting cookies, referrer, etc.)
This is what I have so far:
//Open a URL Stream
Response resultImageResponse = Jsoup.connect(imageLocation).cookies(cookies).ignoreContentType(true).execute();
// output here
OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(new java.io.File(outputFolder + name));
//BufferedWriter out = new BufferedWriter(new FileWriter(outputFolder + name));
out.write(resultImageResponse.body()); // resultImageResponse.body() is where the image's contents are.
out.close();

I didn't even finish writing the question before I found the answer via JSoup and a little experimentation.
//Open a URL Stream
Response resultImageResponse = Jsoup.connect(imageLocation).cookies(cookies)
.ignoreContentType(true).execute();
// output here
FileOutputStream out = (new FileOutputStream(new java.io.File(outputFolder + name)));
out.write(resultImageResponse.bodyAsBytes()); // resultImageResponse.body() is where the image's contents are.
out.close();

Simply you can use these methods-
public static String storeImageIntoFS(String imageUrl, String fileName, String relativePath) {
String imagePath = null;
try {
byte[] bytes = Jsoup.connect(imageUrl).ignoreContentType(true).execute().bodyAsBytes();
ByteBuffer buffer = ByteBuffer.wrap(bytes);
String rootTargetDirectory = IMAGE_HOME + "/"+relativePath;
imagePath = rootTargetDirectory + "/"+fileName;
saveByteBufferImage(buffer, rootTargetDirectory, fileName);
} catch (IOException e) {
e.printStackTrace();
}
return imagePath;
}
public static void saveByteBufferImage(ByteBuffer imageDataBytes, String rootTargetDirectory, String savedFileName) {
String uploadInputFile = rootTargetDirectory + "/"+savedFileName;
File rootTargetDir = new File(rootTargetDirectory);
if (!rootTargetDir.exists()) {
boolean created = rootTargetDir.mkdirs();
if (!created) {
System.out.println("Error while creating directory for location- "+rootTargetDirectory);
}
}
String[] fileNameParts = savedFileName.split("\\.");
String format = fileNameParts[fileNameParts.length-1];
File file = new File(uploadInputFile);
BufferedImage bufferedImage;
InputStream in = new ByteArrayInputStream(imageDataBytes.array());
try {
bufferedImage = ImageIO.read(in);
ImageIO.write(bufferedImage, format, file);
} catch (IOException e) {
e.printStackTrace();
}
}

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Java: Manipulate docx document saved in Clob/Blob as text - java

You can use the Apache POI project to get access to the content of your .docx document. https://poi.apache.org/document/quick-guide-xwpf.html

Maybe you can call parser.parse directly using blob.getBinaryStream(): ... parser.parse(blob.getBinaryStream(), handler, metadata, new ParseContext()); ... So you don't have to create a temporary file containing the docx document.

Related

not able to read content pdf which is open in url

Result set closed exception when trying to read a pdf file from sqlite table

How to populate mysql database table with metadata of uploaded file

How to upload file in database?

(how) can I download an image using JSoup?

Categories

Resources