I created a software based on two sources (stated in the code below) which detect HTML copied to the clipboard and change local images to base 64 ones.
This code works perfectly when I run it in Eclipse, but not using a JAR.
Initially, I was not using the method getHtmlDataFlavor, but I added it when I tried the software as a JAR. Then, I had to ensure in HtmlSelection.getTransferData to have if (flavor.getRepresentationClass() == java.io.Reader.class) otherwise it would crash. But using the JAR, I'm only getting the plain text version! Though, it stills works when ran in Eclipse.
Does someone have an idea ?
I am running on Windows 10.
Executing in command line using : java -jar ClipboardImageToBase64-1.0.0-jar-with-dependencies.jar
GitHub project :
https://github.com/djon2003/ClipboardImageToBase64
/**
* Library from HTML parsing : https://jsoup.org
*
* Code based on :
* - http://stackoverflow.com/a/14226456/214898
* - http://elliotth.blogspot.ca/2005/01/copying-html-to-clipboard-from-java.html
*/
import java.awt.Toolkit;
import java.awt.datatransfer.Clipboard;
import java.awt.datatransfer.ClipboardOwner;
import java.awt.datatransfer.DataFlavor;
import java.awt.datatransfer.Transferable;
import java.awt.datatransfer.UnsupportedFlavorException;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.FileUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ClipBoardListener extends Thread implements ClipboardOwner {
Clipboard sysClip = Toolkit.getDefaultToolkit().getSystemClipboard();
private static DataFlavor HTML_FLAVOR = new DataFlavor("text/html;class=java.io.Reader", "HTML");
private int nbImagesConverted = 0;
private Transferable currentTransferable;
#Override
public void run() {
Transferable trans = sysClip.getContents(this);
TakeOwnership(trans);
}
#Override
public void lostOwnership(Clipboard c, Transferable t) {
System.out.println("Copy to clipboard detected");
try {
ClipBoardListener.sleep(250); // waiting e.g for loading huge
// elements like word's etc.
} catch (Exception e) {
System.out.println("Exception: " + e);
}
Transferable contents = sysClip.getContents(this);
try {
process_clipboard(contents, c);
} catch (Exception ex) {
Logger.getLogger(ClipBoardListener.class.getName()).log(Level.SEVERE, null, ex);
}
TakeOwnership(currentTransferable);
}
void TakeOwnership(Transferable t) {
sysClip.setContents(t, this);
}
private void getHtmlDataFlavor(Transferable t) {
DataFlavor df = null;
for (DataFlavor tDf : t.getTransferDataFlavors()) {
if (tDf.getMimeType().contains("text/html")) {
if (tDf.getRepresentationClass() == java.io.Reader.class) {
df = tDf;
break;
}
}
}
HTML_FLAVOR = df;
}
public void process_clipboard(Transferable t, Clipboard c) {
String tempText = "";
Transferable trans = t;
currentTransferable = t;
getHtmlDataFlavor(t);
if (HTML_FLAVOR == null) {
System.out.println("No HTML flavor detected");
return;
}
nbImagesConverted = 0;
try {
if (trans != null ? trans.isDataFlavorSupported(HTML_FLAVOR) : false) {
if (trans.isDataFlavorSupported(DataFlavor.stringFlavor)) {
tempText = (String) trans.getTransferData(DataFlavor.stringFlavor);
}
java.io.Reader r = (java.io.Reader) trans.getTransferData(HTML_FLAVOR);
StringBuilder content = getReaderContent(r);
String newHtml = changeImages(content);
currentTransferable = new HtmlSelection(newHtml, tempText);
System.out.println("Converted " + nbImagesConverted + " images");
} else {
System.out.println("Not converted:" + trans.isDataFlavorSupported(HTML_FLAVOR));
System.out.println(trans.getTransferData(HTML_FLAVOR));
/*
for (DataFlavor tt : trans.getTransferDataFlavors()) {
if (tt.getMimeType().contains("text/html")) {
System.out.println("-------");
System.out.println(tt.toString());
}
}
*/
}
} catch (Exception e) {
currentTransferable = t;
System.out.println("Conversion error");
e.printStackTrace();
}
}
private String changeImages(StringBuilder content) throws RuntimeException, IOException {
Document doc = Jsoup.parse(content.toString());
Elements imgs = doc.select("img");
for (Element img : imgs) {
String filePath = img.attr("src");
filePath = filePath.replace("file:///", "");
filePath = filePath.replace("file://", "");
File file = new File(filePath);
if (file.exists()) {
String encoded = Base64.encodeBase64String(FileUtils.readFileToByteArray(file));
String extension = file.getName();
extension = extension.substring(extension.lastIndexOf(".") + 1);
String dataURL = "data:image/" + extension + ";base64," + encoded;
img.attr("src", dataURL); // or whatever
nbImagesConverted++;
}
}
String html = doc.outerHtml();
html = html.replaceAll("(?s)<!--.*?-->", ""); //Remove html comments
return html; // returns the modified HTML
}
private StringBuilder getReaderContent(java.io.Reader r) throws IOException {
char[] arr = new char[8 * 1024];
StringBuilder buffer = new StringBuilder();
int numCharsRead;
while ((numCharsRead = r.read(arr, 0, arr.length)) != -1) {
buffer.append(arr, 0, numCharsRead);
}
r.close();
return buffer;
}
private static class HtmlSelection implements Transferable {
private String html;
private String plainText;
public HtmlSelection(String html, String plainText) {
this.html = html;
this.plainText = plainText;
}
public DataFlavor[] getTransferDataFlavors() {
DataFlavor[] dfs = {HTML_FLAVOR, DataFlavor.stringFlavor};
return dfs;
}
public boolean isDataFlavorSupported(DataFlavor flavor) {
return flavor.getMimeType().contains("text/html") || flavor.getMimeType().contains("text/plain");
}
public Object getTransferData(DataFlavor flavor) throws UnsupportedFlavorException {
if (flavor.getMimeType().contains("text/html")) {
if (flavor.getRepresentationClass() == java.io.Reader.class) {
return new StringReader(html);
} else {
return html;
}
} else {
return plainText;
}
//throw new UnsupportedFlavorException(flavor);
}
}
}
I finally fixed all my problems. I created a GitHub project for those who are interested.
https://github.com/djon2003/ClipboardImageToBase64
Related
I have two project, a Java+Spring server and a unity project written in C#.
The unity project will be export into WebGl(it is not a game).
Now here is my problem:
I must load textures from the Java server.
How can i solve this problem?
My current solution, greatly simplified (not work in browser):
Server:
#SpringBootApplication
public class Application {
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
}
-
#RestController
public class MyController {
#Autowired
private Loader2 loader2;
#RequestMapping("/skull2")
public ByteObj skull2(#RequestParam(value = "name", defaultValue = "World") String name) {
return loader2.getImages("Skull");
}
}
-
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.tomcat.util.codec.binary.Base64;
import org.springframework.stereotype.Controller;
#Controller
public class Loader2 {
String mainPath = "Path to images folder"; // in the future i will load many textures;
public List<String> getFileNameInPath(String foldername) {
List<String> result = new ArrayList<String>();
File folder = new File(mainPath + foldername);
File[] listOfFiles = folder.listFiles();
for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].isFile()) {
System.out.println("file " + listOfFiles[i].getName());
result.add(listOfFiles[i].getName());
} else if (listOfFiles[i].isDirectory()) {
System.out.println("Directory " + listOfFiles[i].getName());
}
}
return result;
}
public ByteObj getImages(String folder) {
List<String> images = getFileNameInPath(folder);
for (String string : images) {
try {
File file = new File(mainPath + folder + "//" + string);
FileInputStream imageInFile = new FileInputStream(file);
byte imageData[] = new byte[(int) file.length()];
imageInFile.read(imageData);
// Converting Image byte array into Base64 String
String imageDataString = encodeImage(imageData);
imageInFile.close();
return new ByteObj(imageDataString);
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
public String encodeImage(byte[] imageByteArray) {
return Base64.encodeBase64String(imageByteArray);
}
}
-
import java.io.Serializable;
public class ByteObj implements Serializable {
public String obj;
public ByteObj(String obj) {
this.obj = obj;
}
public ByteObj() {
}
}
Client(load from server and decode)
The Unity stage contains a 3D cube and a camera(with script)
// C#
public class ByteObj {
public string obj;
public ByteObj(string obj) {
this.obj = obj;
}
public ByteObj() {
}
}
-
using UnityEngine;
using UnityEngine.UI;
using System.Collections;
using System;
using System.Text;
public class MyTestClass: MonoBehaviour {
public GameObject cube;
public float speed = 10f;
IEnumerator Start() {
yield return StartCoroutine( UploadImages() );
}
void Update () {
cube.transform.Rotate(Vector3.up, speed * Time.deltaTime);
}
IEnumerator UploadImages()
{
string aux = "";
WWW www = new WWW("http://localhost:8080/skull2");
yield return www;
aux = www.text;
if (www.text == null)
{
Debug.Log("Images not found");
}
else {
ByteObj list = JsonUtility.FromJson<ByteObj>(aux);
var myObject = list.obj;
//instruction.text = list.obj;
string s = myObject.Trim().Replace(" ", "+");
if (s.Length % 4 > 0)
s = s.PadRight(s.Length + 4 - s.Length % 4, '=');
byte[] b64_bytes = System.Convert.FromBase64String(myObject);
var decodedBytes = Convert.FromBase64String(myObject);
var tex = new Texture2D(2, 2);
tex.LoadImage(decodedBytes);
cube.GetComponent<Renderer>().material.mainTexture = tex;
}
}
}
It works as a Desktop app, but after export to WebGl I see only the cube(without textures)
Thank you for your help
PS. Sorry for my english
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 7 years ago.
Improve this question
I am developing a web crawler application. When i run the program i am getting these error messages below:
i've got these errors after running the program for more that 3 hours. I tried to allocate memory by changing eclipse.ini setting to 2048 MB of ram as it was answered in this topic but still get the same errors after 3 hours or less. I should run the program for more that 2-3 days non-stopping to get analyse the results.
Can you tell me what i am missing here to get these error below ?
These are my classes:
seeds.txt
http://www.stanford.edu
http://www.archive.org
WebCrawler.java
package pkg.crawler;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.jsoup.HttpStatusException;
import org.jsoup.UnsupportedMimeTypeException;
import org.joda.time.DateTime;
public class WebCrawler {
public static Queue <LinkNodeLight> queue = new PriorityBlockingQueue <> (); // priority queue
public static final int n_threads = 5; // amount of threads
private static Set<String> processed = new LinkedHashSet <> (); // set of processed urls
private PrintWriter out; // output file
private PrintWriter err; // error file
private static Integer cntIntra = new Integer (0); // counters for intra- links in the queue
private static Integer cntInter = new Integer (0); // counters for inter- links in the queue
private static Integer dub = new Integer (0); // amount of skipped urls
public static void main(String[] args) throws Exception {
System.out.println("Running web crawler: " + new Date());
WebCrawler webCrawler = new WebCrawler();
webCrawler.createFiles();
try (Scanner in = new Scanner(new File ("seeds.txt"))) {
while (in.hasNext()) {
webCrawler.enque(new LinkNode (in.nextLine().trim()));
}
} catch (IOException e) {
e.printStackTrace();
return;
}
webCrawler.processQueue();
webCrawler.out.close();
webCrawler.err.close();
}
public void processQueue(){
/* run in threads */
Runnable r = new Runnable() {
#Override
public void run() {
/* queue may be empty but process is not finished, that's why we need to check if any links are being processed */
while (true) {
LinkNode link = deque();
if (link == null)
continue;
link.setStartTime(new DateTime());
boolean process = processLink(link);
link.setEndTime(new DateTime());
if (!process)
continue;
/* print the data to the csv file */
if (link.getStatus() != null && link.getStatus().equals(LinkNodeStatus.OK)) {
synchronized(out) {
out.println(getOutputLine(link));
out.flush();
}
} else {
synchronized(err) {
err.println(getOutputLine(link));
err.flush();
}
}
}
}
};
/* run n_threads threads which perform dequeue and process */
LinkedList <Thread> threads = new LinkedList <> ();
for (int i = 0; i < n_threads; i++) {
threads.add(new Thread(r));
threads.getLast().start();
}
for (Thread thread : threads) {
try {
thread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
/* returns true if link was actually processed */
private boolean processLink(LinkNode inputLink) {
String url = getUrlGeneralForm(inputLink);
boolean process = true;
synchronized (processed) {
if (processed.contains(url)) {
process = false;
synchronized (dub) {dub++;}
} else
processed.add(url);
}
/* start processing only if the url have not been processed yet or not being processed */
if (process) {
System.out.println("Processing url " + url);
List<LinkNodeLight> outputLinks = parseAndWieghtResults(inputLink);
for (LinkNodeLight outputLink : outputLinks) {
String getUrlGeneralForumOutput = getUrlGeneralForm(outputLink);
/* add the new link to the queue only if it has not been processed yet */
process = true;
synchronized (processed) {
if (processed.contains(getUrlGeneralForumOutput)) {
process = false;
synchronized (dub) {dub++;}
}
}
if (process) {
enque(outputLink);
}
}
return true;
}
return false;
}
void enque(LinkNodeLight link){
link.setEnqueTime(new DateTime());
/* the add method requires implicit priority */
synchronized (queue) {
if (link.interLinks)
synchronized (cntInter) {cntInter++;}
else
synchronized (cntIntra) {cntIntra++;}
//queue.add(link, 100 - (int)(link.getWeight() * 100.f));
queue.add(link);
}
}
/**
* Picks an element from the queue
* #return top element from the queue or null if the queue is empty
*/
LinkNode deque(){
/* link must be checked */
LinkNode link = null;
synchronized (queue) {
link = (LinkNode) queue.poll();
if (link != null) {
link.setDequeTime(new DateTime());
if (link.isInterLinks())
synchronized (cntInter) {cntInter--;}
else
synchronized (cntIntra) {cntIntra--;}
}
}
return link;
}
private void createFiles() {
/* create output file */
try {
out = new PrintWriter(new BufferedWriter(new FileWriter("CrawledURLS.csv", false)));
out.println(generateHeaderFile());
} catch (IOException e) {
System.err.println(e);
}
/* create error file */
try {
err = new PrintWriter(new BufferedWriter(new FileWriter("CrawledURLSERROR.csv", false)));
err.println(generateHeaderFile());
} catch (IOException e) {
System.err.println(e);
}
}
/**
* formats the string so it can be valid entry in csv file
* #param s
* #return
*/
private static String format(String s) {
// replace " by ""
String ret = s.replaceAll("\"", "\"\"");
// put string into quotes
return "\"" + ret + "\"";
}
/**
* Creates the line that needs to be written in the outputfile
* #param link
* #return
*/
public static String getOutputLine(LinkNode link){
StringBuilder builder = new StringBuilder();
builder.append(link.getParentLink()!=null ? format(link.getParentLink().getUrl()) : "");
builder.append(",");
builder.append(link.getParentLink()!=null ? link.getParentLink().getIpAdress() : "");
builder.append(",");
builder.append(link.getParentLink()!=null ? link.getParentLink().linkProcessingDuration() : "");
builder.append(",");
builder.append(format(link.getUrl()));
builder.append(",");
builder.append(link.getDomain());
builder.append(",");
builder.append(link.isInterLinks());
builder.append(",");
builder.append(Util.formatDate(link.getEnqueTime()));
builder.append(",");
builder.append(Util.formatDate(link.getDequeTime()));
builder.append(",");
builder.append(link.waitingInQueue());
builder.append(",");
builder.append(queue.size());
/* Inter and intra links in queue */
builder.append(",");
builder.append(cntIntra.toString());
builder.append(",");
builder.append(cntInter.toString());
builder.append(",");
builder.append(dub);
builder.append(",");
builder.append(new Date ());
/* URL size*/
builder.append(",");
builder.append(link.getSize());
/* HTML file
builder.append(",");
builder.append(link.getFileName());*/
/* add HTTP error */
builder.append(",");
if (link.getParseException() != null) {
if (link.getParseException() instanceof HttpStatusException)
builder.append(((HttpStatusException) link.getParseException()).getStatusCode());
if (link.getParseException() instanceof SocketTimeoutException)
builder.append("Time out");
if (link.getParseException() instanceof MalformedURLException)
builder.append("URL is not valid");
if (link.getParseException() instanceof UnsupportedMimeTypeException)
builder.append("Unsupported mime type: " + ((UnsupportedMimeTypeException)link.getParseException()).getMimeType());
}
return builder.toString();
}
/**
* generates the Header for the file
* #param link
* #return
*/
private String generateHeaderFile(){
StringBuilder builder = new StringBuilder();
builder.append("Seed URL");
builder.append(",");
builder.append("Seed IP");
builder.append(",");
builder.append("Process Duration");
builder.append(",");
builder.append("Link URL");
builder.append(",");
builder.append("Link domain");
builder.append(",");
builder.append("Link IP");
builder.append(",");
builder.append("Enque Time");
builder.append(",");
builder.append("Deque Time");
builder.append(",");
builder.append("Waiting in the Queue");
builder.append(",");
builder.append("QueueSize");
builder.append(",");
builder.append("Intra in queue");
builder.append(",");
builder.append("Inter in queue");
builder.append(",");
builder.append("Dublications skipped");
/* time was printed, but no header was */
builder.append(",");
builder.append("Time");
/* URL size*/
builder.append(",");
builder.append("Size bytes");
/* HTTP errors */
builder.append(",");
builder.append("HTTP error");
return builder.toString();
}
String getUrlGeneralForm(LinkNodeLight link){
String url = link.getUrl();
if (url.endsWith("/")){
url = url.substring(0, url.length() - 1);
}
return url;
}
private List<LinkNodeLight> parseAndWieghtResults(LinkNode inputLink) {
List<LinkNodeLight> outputLinks = HTMLParser.parse(inputLink);
if (inputLink.hasParseException()) {
return outputLinks;
} else {
return URLWeight.weight(inputLink, outputLinks);
}
}
}
HTMLParser.java
package pkg.crawler;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.math.BigInteger;
import java.util.Formatter;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.logging.Logger;
import java.security.*;
import java.nio.file.Path;
import java.nio.file.Paths;
public class HTMLParser {
private static final int READ_TIMEOUT_IN_MILLISSECS = (int) TimeUnit.MILLISECONDS.convert(30, TimeUnit.SECONDS);
private static HashMap <String, Integer> filecounter = new HashMap<> ();
public static List<LinkNodeLight> parse(LinkNode inputLink){
List<LinkNodeLight> outputLinks = new LinkedList<>();
try {
inputLink.setIpAdress(IpFromUrl.getIp(inputLink.getUrl()));
String url = inputLink.getUrl();
if (inputLink.getIpAdress() != null) {
url.replace(URLWeight.getHostName(url), inputLink.getIpAdress());
}
Document parsedResults = Jsoup
.connect(url)
.timeout(READ_TIMEOUT_IN_MILLISSECS)
.get();
inputLink.setSize(parsedResults.html().length());
/* IP address moved here in order to speed up the process */
inputLink.setStatus(LinkNodeStatus.OK);
inputLink.setDomain(URLWeight.getDomainName(inputLink.getUrl()));
if (true) {
/* save the file to the html */
String filename = parsedResults.title();//digestBig.toString(16) + ".html";
if (filename.length() > 24) {
filename = filename.substring(0, 24);
}
filename = filename.replaceAll("[^\\w\\d\\s]", "").trim();
filename = filename.replaceAll("\\s+", " ");
if (!filecounter.containsKey(filename)) {
filecounter.put(filename, 1);
} else {
Integer tmp = filecounter.remove(filename);
filecounter.put(filename, tmp + 1);
}
filename = filename + "-" + (filecounter.get(filename)).toString() + ".html";
filename = Paths.get("downloads", filename).toString();
inputLink.setFileName(filename);
/* use md5 of url as file name */
try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(filename)))) {
out.println("<!--" + inputLink.getUrl() + "-->");
out.print(parsedResults.html());
out.flush();
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
String tag;
Elements tagElements;
List<LinkNode> result;
tag = "a[href";
tagElements = parsedResults.select(tag);
result = toLinkNodeObject(inputLink, tagElements, tag);
outputLinks.addAll(result);
tag = "area[href";
tagElements = parsedResults.select(tag);
result = toLinkNodeObject(inputLink, tagElements, tag);
outputLinks.addAll(result);
} catch (IOException e) {
inputLink.setParseException(e);
inputLink.setStatus(LinkNodeStatus.ERROR);
}
return outputLinks;
}
static List<LinkNode> toLinkNodeObject(LinkNode parentLink, Elements tagElements, String tag) {
List<LinkNode> links = new LinkedList<>();
for (Element element : tagElements) {
if(isFragmentRef(element)){
continue;
}
String absoluteRef = String.format("abs:%s", tag.contains("[") ? tag.substring(tag.indexOf("[") + 1, tag.length()) : "href");
String url = element.attr(absoluteRef);
if(url!=null && url.trim().length()>0) {
LinkNode link = new LinkNode(url);
link.setTag(element.tagName());
link.setParentLink(parentLink);
links.add(link);
}
}
return links;
}
static boolean isFragmentRef(Element element){
String href = element.attr("href");
return href!=null && (href.trim().startsWith("#") || href.startsWith("mailto:"));
}
}
Util.java
package pkg.crawler;
import java.util.Date;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
public class Util {
private static DateTimeFormatter formatter;
static {
formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss:SSS");
}
public static String linkToString(LinkNode inputLink){
return String.format("%s\t%s\t%s\t%s\t%s\t%s",
inputLink.getUrl(),
inputLink.getWeight(),
formatDate(inputLink.getEnqueTime()),
formatDate(inputLink.getDequeTime()),
differenceInMilliSeconds(inputLink.getEnqueTime(), inputLink.getDequeTime()),
inputLink.getParentLink()==null?"":inputLink.getParentLink().getUrl()
);
}
public static String linkToErrorString(LinkNode inputLink){
return String.format("%s\t%s\t%s\t%s\t%s\t%s",
inputLink.getUrl(),
inputLink.getWeight(),
formatDate(inputLink.getEnqueTime()),
formatDate(inputLink.getDequeTime()),
inputLink.getParentLink()==null?"":inputLink.getParentLink().getUrl(),
inputLink.getParseException().getMessage()
);
}
public static String formatDate(DateTime date){
return formatter.print(date);
}
public static long differenceInMilliSeconds(DateTime dequeTime, DateTime enqueTime){
return (dequeTime.getMillis()- enqueTime.getMillis());
}
public static int differenceInSeconds(Date enqueTime, Date dequeTime){
return (int)((dequeTime.getTime()/1000) - (enqueTime.getTime()/1000));
}
public static int differenceInMinutes(Date enqueTime, Date dequeTime){
return (int)((dequeTime.getTime()/60000) - (enqueTime.getTime()/60000));
}
}
URLWeight.java
package pkg.crawler;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
public class URLWeight {
public static List<LinkNodeLight> weight(LinkNode sourceLink, List<LinkNodeLight> links) {
List<LinkNodeLight> interLinks = new LinkedList<>();
List<LinkNodeLight> intraLinks = new LinkedList<>();
for (LinkNodeLight link : links) {
if (isIntraLink(sourceLink, link)) {
intraLinks.add(link);
link.setInterLinks(false);
} else {
interLinks.add(link);
link.setInterLinks(true);
}
}
static boolean isIntraLink(LinkNodeLight sourceLink, LinkNodeLight link){
String parentDomainName = getHostName(sourceLink.getUrl());
String childDomainName = getHostName(link.getUrl());
return parentDomainName.equalsIgnoreCase(childDomainName);
}
public static String getHostName(String url) {
if(url == null){
// System.out.println("Deneme");
return "";
}
String domainName = new String(url);
int index = domainName.indexOf("://");
if (index != -1) {
domainName = domainName.substring(index + 3);
}
for (int i = 0; i < domainName.length(); i++)
if (domainName.charAt(i) == '?' || domainName.charAt(i) == '/') {
domainName = domainName.substring(0, i);
break;
}
/*if (index != -1) {
domainName = domainName.substring(0, index);
}*/
/* have to keep www in order to do replacements with IP */
//domainName = domainName.replaceFirst("^www.*?\\.", "");
return domainName;
}
public static String getDomainName(String url) {
String [] tmp= getHostName(url).split("\\.");
if (tmp.length == 0)
return "";
return tmp[tmp.length - 1];
}
}
PingTaskManager.java
package pkg.crawler;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class PingTaskManager {
private static ExecutorService executor = Executors.newFixedThreadPool(100);
public static void ping (LinkNode e) {
executor.submit(new PingTaks(e));
}
}
class PingTaks implements Runnable {
private LinkNode link;
public PingTaks( LinkNode link ) {
}
#Override
public void run() {
/* link.ping(); */
}
}
LinkNodeStatus.java
package pkg.crawler;
public enum LinkNodeStatus {
OK,
ERROR
}
LinkNodeLight.java
package pkg.crawler;
import org.joda.time.DateTime;
public class LinkNodeLight implements Comparable<LinkNodeLight> {
protected String url;
protected float weight;
protected DateTime enqueTime;
protected boolean interLinks;
public String getUrl() {
return url;
}
public float getWeight() {
return weight;
}
public void setWeight(float weight) {
this.weight = weight;
}
public DateTime getEnqueTime() {
return enqueTime;
}
public LinkNodeLight(String url) {
this.url = url;
}
public void setEnqueTime(DateTime enqueTime) {
this.enqueTime = enqueTime;
}
#Override
public int compareTo(LinkNodeLight link) {
if (this.weight < link.weight) return 1;
else if (this.weight > link.weight) return -1;
return 0;
}
}
LinkNode.java
package pkg.crawler;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.Socket;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Date;
import org.joda.time.DateTime;
public class LinkNode extends LinkNodeLight{
public LinkNode(String url) {
super(url);
}
private String tag;
private LinkNode parentLink;
private IOException parseException = null; // initialize parse Exception with null
private float weight;
private DateTime dequeTime;
private DateTime startTime;
private DateTime endTime;
private LinkNodeStatus status;
private String ipAdress;
private int size;
private String filename;
private String domain;
public DateTime getStartTime() {
return startTime;
}
public void setStartTime(DateTime startTime) {
this.startTime = startTime;
}
public DateTime getEndTime() {
return endTime;
}
public void setEndTime(DateTime endTime) {
this.endTime = endTime;
}
public DateTime getDequeTime() {
return dequeTime;
}
public String getTag() {
return tag;
}
public LinkNode getParentLink() {
return parentLink;
}
public Exception getParseException() {
return parseException;
}
public boolean hasParseException(){
return parseException!=null;
}
public void setDequeTime(DateTime dequeTime) {
this.dequeTime = dequeTime;
}
public void setTag(String tag) {
this.tag = tag;
}
public void setParentLink(LinkNode parentLink) {
this.parentLink = parentLink;
}
public void setParseException(IOException parseException) {
this.parseException = parseException;
}
#Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LinkNode link = (LinkNode) o;
if (url != null ? !url.equals(link.url) : link.url != null) {
return false;
}
return true;
}
#Override
public int hashCode() {
return url != null ? url.hashCode() : 0;
}
public long waitingInQueue(){
return Util.differenceInMilliSeconds( dequeTime,enqueTime );
}
public long linkProcessingDuration(){
return Util.differenceInMilliSeconds( endTime,startTime );
}
#Override
public String toString() {
StringBuilder sb = new StringBuilder("LinkNode{");
sb.append("url='").append(url).append('\'');
sb.append(", score=").append(weight);
sb.append(", enqueTime=").append(enqueTime);
sb.append(", dequeTime=").append(dequeTime);
sb.append(", tag=").append(tag);
if(parentLink!=null) {
sb.append(", parentLink=").append(parentLink.getUrl());
}
sb.append('}');
return sb.toString();
}
public void setStatus(LinkNodeStatus status) {
this.status = status;
}
public LinkNodeStatus getStatus(){
if (status == null) {
status = LinkNodeStatus.ERROR;
}
return status;
}
// check server link is it exist or not
/* this method gives fake errors
public LinkNodeStatus ping () {
boolean reachable = false;
String sanitizeUrl = url.replaceFirst("^https", "http");
try {
HttpURLConnection connection = (HttpURLConnection) new URL(sanitizeUrl).openConnection();
connection.setConnectTimeout(1000);
connection.setRequestMethod("HEAD");
int responseCode = connection.getResponseCode();
System.err.println(url + " " + responseCode);
reachable = (200 <= responseCode && responseCode <= 399);
} catch (IOException exception) {
}
return reachable?LinkNodeStatus.OK: LinkNodeStatus.ERROR;
}*/
public String getIpAdress() {
return ipAdress;
}
public void setIpAdress(String ipAdress) {
this.ipAdress = ipAdress;
}
/* methods for controlling url size */
public void setSize(int size) {
this.size = size;
}
public int getSize() {
return this.size;
}
public void setFileName(String filename) {
this.filename = filename;
}
public String getFileName() {
return this.filename;
}
public String getDomain() {
return domain;
}
public void setDomain(String domain) {
this.domain = domain;
}
}
I tried to allocate memory by changing eclipse.ini setting to 2048 MB of ram as it was answered in this topic but still get the same errors after 3 hours or less.
I hate to repeat myself(*), but in eclipse.ini you set up the memory for Eclipse, which has nothing to do with the memory for your crawler.
When using command line, you need to start it via java -Xmx2G pkg.crawler.WebCrawler.
When starting from Eclipse, you need to add -Xmx2G to the run configuration ("VM arguments" rather than "Program arguments").
(*) Link to a deleted question; requires some reputation to view.
How we can convert a dicom file(.dcm) to a jpeg image using java?
Here is my code:
import java.io.File;
import java.io.IOException;
import org.dcm4che2.tool.dcm2jpg.Dcm2Jpg;
public class MainClass {
public static void main(String[] args) throws IOException{
Dcm2Jpg conv = new Dcm2Jpg();
conv.convert(new File("C:\\Users\\lijo.joseph\\Desktop\\Dicom\\IM-0001-0001.dcm"), new File("C:\\Users\\lijo.joseph\\Desktop\\Dicom\\IM-0001-0001.jpg"));
}
}
and i am getting the following error while running the project
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/commons/cli/ParseException
at MainClass.main(MainClass.java:7)
Caused by: java.lang.ClassNotFoundException: org.apache.commons.cli.ParseException
at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 1 more
please help and thanks in advance
Here is the link Converting DICOM to JPEG using dcm4che 2
Following is my code which works perfectly.I have placed it with imports so it might be use-full.
import java.awt.image.BufferedImage;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
import org.dcm4che2.imageio.plugins.dcm.DicomImageReadParam;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
public class Examplke1 {
static BufferedImage myJpegImage=null;
public static void main(String[] args) {
File file = new File("test5/12840.dcm");
Iterator<ImageReader> iterator =ImageIO.getImageReadersByFormatName("DICOM");
while (iterator.hasNext()) {
ImageReader imageReader = (ImageReader) iterator.next();
DicomImageReadParam dicomImageReadParam = (DicomImageReadParam) imageReader.getDefaultReadParam();
try {
ImageInputStream iis = ImageIO.createImageInputStream(file);
imageReader.setInput(iis,false);
myJpegImage = imageReader.read(0, dicomImageReadParam);
iis.close();
if(myJpegImage == null){
System.out.println("Could not read image!!");
}
} catch (IOException e) {
e.printStackTrace();
}
File file2 = new File("/test.jpg");
try {
OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(file2));
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(outputStream);
encoder.encode(myJpegImage);
outputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Completed");
}
}
}
Jars Used to Run it
dcm4che-imageio-2.0.28.jar
dcm4che-image-2.0.28.jar
jai_imageio-1.1.jar
dcm4che-core-2.0.28.jar
slf4j-api-1.7.7.jar
slf4j-log4j12-1.7.7.jar
apache-logging-log4j.jar
Hope it helps.
This Code is used for Converting Dicom Image to JPG Image
import java.io.File;
import java.io.IOException;
public class Dcm2JpgTest {
public static void main(String[] args) throws IOException {
try{
File src = new File("d:\\Test.dcm");
File dest = new File("d:\\Test.jpg");
Dcm2Jpeg dcm2jpg= new Dcm2Jpeg();
dcm2jpg.convert(src, dest);
System.out.println("Completed");
} catch(IOException e){
e.printStackTrace();
} catch(Exception e){
e.printStackTrace();
}
}
}
Dcm2Jpeg.java File
import java.awt.image.BufferedImage;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.dcm4che2.data.DicomObject;
import org.dcm4che2.imageio.plugins.dcm.DicomImageReadParam;
import org.dcm4che2.io.DicomInputStream;
import org.dcm4che2.util.CloseUtils;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
public class Dcm2Jpeg {
private static final String USAGE =
"dcm2jpg [Options] <dcmfile> <jpegfile>\n" +
"or dcm2jpg [Options] <dcmfile>... <outdir>\n" +
"or dcm2jpg [Options] <indir>... <outdir>";
private static final String DESCRIPTION =
"Convert DICOM image(s) to JPEG(s)\nOptions:";
private static final String EXAMPLE = null;
private int frame = 1;
private float center;
private float width;
private String vlutFct;
private boolean autoWindowing;
private DicomObject prState;
private short[] pval2gray;
private String fileExt = ".jpg";
private void setFrameNumber(int frame) {
this.frame = frame;
}
private void setWindowCenter(float center) {
this.center = center;
}
private void setWindowWidth(float width) {
this.width = width;
}
public final void setVoiLutFunction(String vlutFct) {
this.vlutFct = vlutFct;
}
private final void setAutoWindowing(boolean autoWindowing) {
this.autoWindowing = autoWindowing;
}
private final void setPresentationState(DicomObject prState) {
this.prState = prState;
}
private final void setPValue2Gray(short[] pval2gray) {
this.pval2gray = pval2gray;
}
public final void setFileExt(String fileExt) {
this.fileExt = fileExt;
}
public void convert(File src, File dest) throws IOException {
Iterator<ImageReader> iter = ImageIO.getImageReadersByFormatName("DICOM");
ImageReader reader = iter.next();
DicomImageReadParam param =
(DicomImageReadParam) reader.getDefaultReadParam();
param.setWindowCenter(center);
param.setWindowWidth(width);
param.setVoiLutFunction(vlutFct);
param.setPresentationState(prState);
param.setPValue2Gray(pval2gray);
param.setAutoWindowing(autoWindowing);
ImageInputStream iis = ImageIO.createImageInputStream(src);
BufferedImage bi;
OutputStream out = null;
try {
reader.setInput(iis, false);
bi = reader.read(frame - 1, param);
if (bi == null) {
System.out.println("\nError: " + src + " - couldn't read!");
return;
}
out = new BufferedOutputStream(new FileOutputStream(dest));
JPEGImageEncoder enc = JPEGCodec.createJPEGEncoder(out);
enc.encode(bi);
} finally {
CloseUtils.safeClose(iis);
CloseUtils.safeClose(out);
}
//System.out.print('.');
}
public int mconvert(List<String> args, int optind, File destDir)
throws IOException {
int count = 0;
for (int i = optind, n = args.size() - 1; i < n; ++i) {
File src = new File(args.get(i));
count += mconvert(src, new File(destDir, src2dest(src)));
}
return count;
}
private String src2dest(File src) {
String srcname = src.getName();
return src.isFile() ? srcname + this.fileExt : srcname;
}
public int mconvert(File src, File dest) throws IOException {
if (!src.exists()) {
System.err.println("WARNING: No such file or directory: " + src
+ " - skipped.");
return 0;
}
if (src.isFile()) {
try {
convert(src, dest);
} catch (Exception e) {
System.err.println("WARNING: Failed to convert " + src + ":");
e.printStackTrace(System.err);
System.out.print('F');
return 0;
}
System.out.print('.');
return 1;
}
File[] files = src.listFiles();
if (files.length > 0 && !dest.exists()) {
dest.mkdirs();
}
int count = 0;
for (int i = 0; i < files.length; ++i) {
count += mconvert(files[i], new File(dest, src2dest(files[i])));
}
return count;
}
#SuppressWarnings("unchecked")
public static void main(String args[]) throws Exception {
CommandLine cl = parse(args);
Dcm2Jpeg dcm2jpg = new Dcm2Jpeg();
if (cl.hasOption("f")) {
dcm2jpg.setFrameNumber(
parseInt(cl.getOptionValue("f"),
"illegal argument of option -f",
1, Integer.MAX_VALUE));
}
if (cl.hasOption("p")) {
dcm2jpg.setPresentationState(loadDicomObject(
new File(cl.getOptionValue("p"))));
}
if (cl.hasOption("pv2gray")) {
dcm2jpg.setPValue2Gray(loadPVal2Gray(
new File(cl.getOptionValue("pv2gray"))));
}
if (cl.hasOption("c")) {
dcm2jpg.setWindowCenter(
parseFloat(cl.getOptionValue("c"),
"illegal argument of option -c"));
}
if (cl.hasOption("w")) {
dcm2jpg.setWindowWidth(
parseFloat(cl.getOptionValue("w"),
"illegal argument of option -w"));
}
if (cl.hasOption("sigmoid")) {
dcm2jpg.setVoiLutFunction(DicomImageReadParam.SIGMOID);
}
dcm2jpg.setAutoWindowing(!cl.hasOption("noauto"));
if (cl.hasOption("jpgext")) {
dcm2jpg.setFileExt(cl.getOptionValue("jpgext"));
}
final List<String> argList = cl.getArgList();
int argc = argList.size();
File dest = new File(argList.get(argc-1));
long t1 = System.currentTimeMillis();
int count = 1;
if (dest.isDirectory()) {
count = dcm2jpg.mconvert(argList, 0, dest);
} else {
File src = new File(argList.get(0));
if (argc > 2 || src.isDirectory()) {
exit("dcm2jpg: when converting several files, "
+ "last argument must be a directory\n");
}
dcm2jpg.convert(src, dest);
}
long t2 = System.currentTimeMillis();
System.out.println("\nconverted " + count + " files in " + (t2 - t1)
/ 1000f + " s.");
}
private static DicomObject loadDicomObject(File file) {
DicomInputStream in = null;
try {
in = new DicomInputStream(file);
return in.readDicomObject();
} catch (IOException e) {
exit(e.getMessage());
throw new RuntimeException();
} finally {
CloseUtils.safeClose(in);
}
}
private static short[] loadPVal2Gray(File file) {
BufferedReader r = null;
try {
r = new BufferedReader(new InputStreamReader(new FileInputStream(
file)));
short[] pval2gray = new short[256];
int n = 0;
String line;
while ((line = r.readLine()) != null) {
try {
int val = Integer.parseInt(line.trim());
if (n == pval2gray.length) {
if (n == 0x10000) {
exit("Number of entries in " + file + " > 2^16");
}
short[] tmp = pval2gray;
pval2gray = new short[n << 1];
System.arraycopy(tmp, 0, pval2gray, 0, n);
}
pval2gray[n++] = (short) val;
} catch (NumberFormatException nfe) {
// ignore lines where Integer.parseInt fails
}
}
if (n != pval2gray.length) {
exit("Number of entries in " + file + ": " + n
+ " != 2^[8..16]");
}
return pval2gray;
} catch (IOException e) {
exit(e.getMessage());
throw new RuntimeException();
} finally {
CloseUtils.safeClose(r);
}
}
private static CommandLine parse(String[] args) {
Options opts = new Options();
OptionBuilder.withArgName("frame");
OptionBuilder.hasArg();
OptionBuilder.withDescription(
"frame to convert, 1 (= first frame) by default");
opts.addOption(OptionBuilder.create("f"));
OptionBuilder.withArgName("prfile");
OptionBuilder.hasArg();
OptionBuilder.withDescription(
"file path of presentation state to apply");
opts.addOption(OptionBuilder.create("p"));
OptionBuilder.withArgName("center");
OptionBuilder.hasArg();
OptionBuilder.withDescription("Window Center");
opts.addOption(OptionBuilder.create("c"));
OptionBuilder.withArgName("width");
OptionBuilder.hasArg();
OptionBuilder.withDescription("Window Width");
opts.addOption(OptionBuilder.create("w"));
opts.addOption("sigmoid", false,
"apply sigmoid VOI LUT function with given Window Center/Width");
opts.addOption("noauto", false,
"disable auto-windowing for images w/o VOI attributes");
OptionBuilder.withArgName("file");
OptionBuilder.hasArg();
OptionBuilder.withDescription(
"file path of P-Value to gray value map");
opts.addOption(OptionBuilder.create("pv2gray"));
OptionBuilder.withArgName(".xxx");
OptionBuilder.hasArg();
OptionBuilder.withDescription(
"jpeg file extension used with destination directory argument,"
+ " default: '.jpg'.");
opts.addOption(OptionBuilder.create("jpgext"));
opts.addOption("h", "help", false, "print this message");
opts.addOption("V", "version", false,
"print the version information and exit");
CommandLine cl = null;
try {
cl = new GnuParser().parse(opts, args);
} catch (ParseException e) {
exit("dcm2jpg: " + e.getMessage());
throw new RuntimeException("unreachable");
}
if (cl.hasOption('V')) {
Package p = Dcm2Jpeg.class.getPackage();
System.out.println("dcm2jpg v" + p.getImplementationVersion());
System.exit(0);
}
if (cl.hasOption('h') || cl.getArgList().size() < 2) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(USAGE, DESCRIPTION, opts, EXAMPLE);
System.exit(0);
}
return cl;
}
private static int parseInt(String s, String errPrompt, int min, int max) {
try {
int i = Integer.parseInt(s);
if (i >= min && i <= max)
return i;
} catch (NumberFormatException e) {
// parameter is not a valid integer; fall through to exit
}
exit(errPrompt);
throw new RuntimeException();
}
private static float parseFloat(String s, String errPrompt) {
try {
return Float.parseFloat(s);
} catch (NumberFormatException e) {
exit(errPrompt);
throw new RuntimeException();
}
}
private static void exit(String msg) {
System.err.println(msg);
System.err.println("Try 'dcm2jpg -h' for more information.");
System.exit(1);
}
}
Jars Files Used to Run this code
dcm4che-core-2.0.23.jar
dcm4che-image-2.0.23.jar
dcm4che-imageio-2.0.23.jar
dcm4che-imageio-rle-2.0.23.jar
slf4j-log4j12-1.5.0.jar
slf4j-api-1.5.0.jar
log4j-1.2.13.jar
commons-cli-1.2.jar
If you don't want to use direct Dcm2Jpg.java file then you can include below jar file.
dcm4che-tool-dcm2jpg-2.0.23.jar
In this jar you can import org.dcm4che2.tool.dcm2jpg.Dcm2Jpg this java file
I have one html file where I have kept all the URLs(Download link for CSV files).I want a tool/program that has to go through each url one by one and download the file, Then keep the file in the specified folder which will be written in the same html file itself.
html file is a table with 3 columns
File name,File location and download URL
Url will download the CSV file after opening a new window (target=_blank).Also after download it will close the child window automatically if there is no error.
I have tried the automation(Selenium using java)
But there are some challenges as follows.
It should wait until the download completes
Sometimes the url may show error,in that case it should close the child window and return to parent window
I have resolved the 1st case by keeping a watcher which will check whether the file is downloaded or not each second(by counting the number of csv files in the folder)
I can switch to child window and check whether there is any error but if there is no error my driver is got stuck over there.
How to resolve this
Code to check whether error is there in child window
public boolean foundError(FirefoxDriver driver) {
System.out.println(browser.getWindowHandle() + "Parent" + parentHandle);
String child = "";
int numberOfWindows = 0;
//return true;
if (driver.getWindowHandles().size() > 1) {
for (String winHandle : driver.getWindowHandles()) {
numberOfWindows++;
if (!parentHandle.equals(winHandle)) {
child = winHandle;
System.out.println("Child" + winHandle);
}
}
}
if (numberOfWindows > 1) {
System.out.println("tostring1" + driver.toString());
if (!parentHandle.equals(child)) {
driver.switchTo().window(child);
}
System.out.println("Switched to child");
Set set = driver.getWindowHandles();
System.out.println("Number of windows=" + set.size());
// System.out.println("Number of windows="+set.size()+"driver url"+driver.getCurrentUrl());
// System.out.println("tostring2"+driver.toString());
try {
// WebDriverWait wait1 = new WebDriverWait(driver, 5);
System.out.println("Body text" + driver.findElementByTagName("body").getText());/////////////////////////////Here driver will get stuck
//System.out.println("text"+driver.findElementByClassName("body").toString());
// List<WebElement> elements=driver.findElementsByClassName("ErrorBody");elements.size()>0
if (!driver.findElementByTagName("body").getText().equals("")) {
driver.close();
driver.switchTo().window(parentHandle);
return true;
}
System.out.println("No error");
driver.switchTo().window(parentHandle);
System.out.println("Switched to parent");
} catch (Exception e) {
System.out.println("Error Catch block page time out:" + e);
driver.switchTo().window(parentHandle);
return false;
// driver.switchTo().window(parentHandle);
}
}
return false;
}
I used different method
using Jsoup to parse the html file and downloading
import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* #author nudanesh
*/
public class URLDownload {
private Document doc;
String url = "", folder, file;
private final File sourceFile;
int i = 1;
int r = 1, c = 1;
int anchorCol = 3;
Library lib;
URLDownload() {
lib = new Library();
sourceFile = new File("Download.html");
try {
doc = Jsoup.parse(sourceFile, "UTF-8");
} catch (IOException ex) {
Logger.getLogger(URLDownload.class.getName()).log(Level.SEVERE, null, ex);
}
//Elements links = doc.select("a[href]");
Elements rows = doc.select("tr");
System.out.println("Size=" + rows.size());
for (Element row : rows) {
Elements cols = row.getElementsByTag("td");
c = 1;
for (Element col : cols) {
System.out.println("Row"+r);
if (c == 1) {
file = col.text();//System.out.println("File in main"+file);
} else if (c == 2) {
folder = col.text();//System.out.println("Folder in main"+folder);
} else {
try {
url = col.getElementsByTag("a").attr("href");
} catch (Exception e) {
System.out.print("-");
}
}
c++;
}
if (!url.equals("")) {
lib.setLocation(file,folder);
lib.downloadFile(url);
}
url = "";
i++;
r++;
}
}
public static void main(String arg[]) {
new URLDownload();
}
}
and following is the Library class file
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* #author nudanesh
*/
public class Library {
boolean downloaded = false;
Thread t;
int waitTime = 0;
String baseLoc = "";
int size = 1024, ByteWritten = 0;
URL url;
URLConnection uCon = null;
String folderLoc = "", file = "firstFile.csv";
File loc;
private OutputStream outStream;
private InputStream is=null;
private byte[] buf;
private int ByteRead;
private int FolderInUrl = 4;
private boolean rootFolder = true;
private File resultFile;
private FileOutputStream fileResult;
private XSSFWorkbook workbookResult;
private XSSFSheet sheetResult;
private int updateExcelRowNum = -1;
private int updateExcelColNum = -1;
String date;
private int waitLimit = 900000;
Library() {
/*System.out.print(Calendar.getInstance().toString());
Date d=new Date();
String date=d.toString();
System.out.println(date);*/
//t = new Thread(this);
// t.start();
date = new SimpleDateFormat("yyyy_MM_dd_HH_mm_ss").format(Calendar.getInstance().getTime());
System.out.print(date);
baseLoc = date + "/";
WriteDataToExcel();
baseLoc += "Business Reports/";
createRowExcel(updateExcelRowNum);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Report Name");
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Path");
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Status");
updateExcel();
}
public void setLocation(String a, String b) {
file = a;
file += ".csv";
folderLoc = baseLoc + getFolderPath(b);
// System.out.println("File Name: "+file);
// System.out.println("Folder loc: "+folderLoc);
}
public String getFolderPath(String b) {
String path = "";
try {
System.out.println("path" + b);
path = b;
// path = java.net.URLDecoder.decode(b, "UTF-8");
String p[] = path.split("/");
path = "";
for (int i = FolderInUrl; i < p.length - 1; i++) {
rootFolder = false;
p[i] = removeSpacesAtEnd(p[i]);
path = path + p[i] + "/";
}
} catch (Exception ex) {
Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
}
return path;
}
public void downloadFile(String urlString) {
// System.out.println("Started");
try {
url = new URL(urlString);
} catch (MalformedURLException ex) {
Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
}
try {
loc = new File(folderLoc);
if (!loc.exists()) {
loc.mkdirs();
}
outStream = new BufferedOutputStream(new FileOutputStream(folderLoc + file));
uCon = url.openConnection();
uCon.setReadTimeout(waitLimit);
is = uCon.getInputStream();
downloaded=true;
buf = new byte[size];
while ((ByteRead = is.read(buf)) != -1) {
System.out.println("while executing" + ByteRead);
outStream.write(buf, 0, ByteRead);
ByteWritten += ByteRead;
}
//System.out.println("Downloaded" + ByteWritten);
resetCounters();
createRowExcel(updateExcelRowNum);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
if (ByteWritten < 1000) {
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
} else {
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
}
updateExcel();
} catch (Exception e) {
System.out.println("error catch" + e);
resetCounters();
createRowExcel(updateExcelRowNum);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Rejected the Download after waiting " + (waitLimit / 60000) + " minutes");
updateExcel();
waitTime = 0;
} finally {
try {
System.out.println("Error in streams");
if(downloaded)
is.close();
outStream.close();
downloaded= false;
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void moveToFolder(String reportName, String path) {
try {
File repo = new File(folderLoc + "/" + reportName + ".csv");
path = folderLoc + "/" + path;
File pathFolder = new File(path);
if (!pathFolder.exists()) {
pathFolder.mkdirs();
}
pathFolder = new File(path + reportName + ".csv");
System.out.println("Path=" + pathFolder.getAbsolutePath() + "\nReport path=" + repo.getAbsolutePath());
System.out.println("Source" + repo.getAbsolutePath());
//System.out.println("Status" + repo.renameTo(new File(pathFolder.getAbsolutePath())));
System.out.println("Status" + Files.move(repo.toPath(), new File(pathFolder.getAbsolutePath()).toPath(), REPLACE_EXISTING));
//Files.
} catch (Exception e) {
System.out.println("error while moving" + e);
}
}
public String changeSpecialCharacters(String report) {
report = report.replaceAll(":", "_");
return report;
}
public String removeSpacesAtEnd(String inputPath) {
for (int i = inputPath.length() - 1; i >= 0; i--) {
if (inputPath.charAt(i) != ' ') {
break;
} else {
System.out.println("Before string is" + inputPath);
inputPath = inputPath.substring(0, i);
System.out.println("AFter string is" + inputPath);
}
}
return inputPath;
}
public void WriteDataToExcel() {
try {
// file = new FileInputStream(new File("config.xlsx"));
// File resultFolder = new File("Results");
// if (resultFolder.exists()) {
// deleteDirectory(resultFolder);
// }
// resultFolder.mkdirs();
if (!new File(baseLoc).exists()) {
new File(baseLoc).mkdirs();
}
resultFile = new File(baseLoc + "Reports info " + date + ".xlsx");
System.out.println("Path" + resultFile.getAbsolutePath());
resultFile.createNewFile();
// rFilePath = resultFile.getAbsolutePath();
fileResult = new FileOutputStream(resultFile);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//Get the workbook instance for XLS file
// System.out.println("file success");
XSSFWorkbook workbook = null;
try {
workbookResult = new XSSFWorkbook();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("Opening the browser");
//Get first sheet from the workbook
sheetResult = workbookResult.createSheet();
//sheetResult.set
//Get iterator to all the rows in current sheet
//Get iterator to all cells of current row
//ar.add(folderLocation);
// ar.add(firefoxProfileLocation);
}
public void updateExcel() {
try {
//fileResult.close();
fileResult = new FileOutputStream(resultFile);
workbookResult.write(fileResult);
fileResult.close();
} catch (Exception e) {
System.out.println(e);
}
}
public void createRowExcel(int num) {
updateExcelRowNum++;
num = updateExcelRowNum;
sheetResult.createRow(num);
}
public void updateRowColExcel(int rnum, int cnum, String value) {
updateExcelColNum++;
cnum = updateExcelColNum;
sheetResult.getRow(rnum).createCell(cnum);
XSSFCell cell = sheetResult.getRow(rnum).getCell(cnum);
cell.setCellValue(value);
}
public void updateColumn(int rnum, int cnum, String value) {
XSSFCell cell = sheetResult.getRow(rnum).getCell(cnum);
cell.setCellValue(value);
}
public void resetCounters() {
updateExcelColNum = -1;
}
/* #Override
public void run() {
while (true) {
if (true) {
waitTime += 1000;
System.out.println(waitTime);
if (waitTime > waitLimit) {
try {
is.close();
outStream.close();
//downloaded=false;
// cancelDownload=true;
} catch (Exception ex) {
Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
try {
Thread.sleep(1000);
} catch (Exception e) {
}
}
}*/
}
I would like to build an application which converts PDF Screenplays in HTML. Screenplays are very simple texts with no image nor other kind of objects, but formatting is very important.
Fortunately there aren't much formatting conventions either.
That said, I found in the internet the PDFbox java library and I would like to use it, but I can't find examples on how retreiving information about formatting (or about coordinates of the text).
What I need is to know the margin box coordinates and the ones of the text so I can compare them to check whether the text is indented or not.
I hope I've been clear enough.
Thank you in advance!
https://pdfbox.apache.org/2.0/commandline.html#extracttext
"-html boolean false Output in HTML format instead of raw text."
This looks like what you need.
Please have a look at this link. I think it will help you. Anyway I am copying the code from there in case if the link is broken...
package printtextlocations;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.TextPosition;
public class PrintTextLocations extends PDFTextStripper {
public static StringBuilder tWord = new StringBuilder();
public static String seek;
public static String[] seekA;
public static List wordList = new ArrayList();
public static boolean is1stChar = true;
public static boolean lineMatch;
public static int pageNo = 1;
public static double lastYVal;
public PrintTextLocations()
throws IOException {
super.setSortByPosition(true);
}
public static void main(String[] args)
throws Exception {
PDDocument document = null;
seekA = args[1].split(",");
seek = args[1];
try {
File input = new File(args[0]);
document = PDDocument.load(input);
if (document.isEncrypted()) {
try {
document.decrypt("");
} catch (InvalidPasswordException e) {
System.err.println("Error: Document is encrypted with a password.");
System.exit(1);
}
}
PrintTextLocations printer = new PrintTextLocations();
List allPages = document.getDocumentCatalog().getAllPages();
for (int i = 0; i < allPages.size(); i++) {
PDPage page = (PDPage) allPages.get(i);
PDStream contents = page.getContents();
if (contents != null) {
printer.processStream(page, page.findResources(), page.getContents().getStream());
}
pageNo += 1;
}
} finally {
if (document != null) {
System.out.println(wordList);
document.close();
}
}
}
#Override
protected void processTextPosition(TextPosition text) {
String tChar = text.getCharacter();
System.out.println("String[" + text.getXDirAdj() + ","
+ text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale="
+ text.getXScale() + " height=" + text.getHeightDir() + " space="
+ text.getWidthOfSpace() + " width="
+ text.getWidthDirAdj() + "]" + text.getCharacter());
String REGEX = "[,.\\[\\](:;!?)/]";
char c = tChar.charAt(0);
lineMatch = matchCharLine(text);
if ((!tChar.matches(REGEX)) && (!Character.isWhitespace(c))) {
if ((!is1stChar) && (lineMatch == true)) {
appendChar(tChar);
} else if (is1stChar == true) {
setWordCoord(text, tChar);
}
} else {
endWord();
}
}
protected void appendChar(String tChar) {
tWord.append(tChar);
is1stChar = false;
}
protected void setWordCoord(TextPosition text, String tChar) {
tWord.append("(").append(pageNo).append(")[").append(roundVal(Float.valueOf(text.getXDirAdj()))).append(" : ").append(roundVal(Float.valueOf(text.getYDirAdj()))).append("] ").append(tChar);
is1stChar = false;
}
protected void endWord() {
String newWord = tWord.toString().replaceAll("[^\\x00-\\x7F]", "");
String sWord = newWord.substring(newWord.lastIndexOf(' ') + 1);
if (!"".equals(sWord)) {
if (Arrays.asList(seekA).contains(sWord)) {
wordList.add(newWord);
} else if ("SHOWMETHEMONEY".equals(seek)) {
wordList.add(newWord);
}
}
tWord.delete(0, tWord.length());
is1stChar = true;
}
protected boolean matchCharLine(TextPosition text) {
Double yVal = roundVal(Float.valueOf(text.getYDirAdj()));
if (yVal.doubleValue() == lastYVal) {
return true;
}
lastYVal = yVal.doubleValue();
endWord();
return false;
}
protected Double roundVal(Float yVal) {
DecimalFormat rounded = new DecimalFormat("0.0'0'");
Double yValDub = new Double(rounded.format(yVal));
return yValDub;
}
}