Multithreading, Read multiple files and send them to server in parallel

Multithreading, Read multiple files and send them to server in parallel - java

public class SOAPClient implements Runnable {
/*
* endpoint url, the address where soap xml will be sent. It is hard coded
* now, later on to be made configurable
*/
private String endpointUrl = "";
/*
* This is for debugging purposes Message and response are written to the
* fileName
*/
static String fileName = "";
/*
* serverResponse This is a string representation of the response received
* from server
*/
private String serverResponse = null;
public String tempTestStringForDirectory = "";
/*
* A single file or a folder maybe provided
*/
private File fileOrFolder;
public SOAPClient(String endpointURL, File fileOrFolder) {
this.endpointUrl = endpointURL;
this.fileOrFolder = fileOrFolder;
serverResponse = null;
}
/*
* Creats a SOAPMessage out of a file that is passed
*
* #param fileAddress - Contents of this file are read and a SOAPMessage is
* created that will get sent to the server. This is a helper method. Is
* this step (method, conversion) necessary? set tempSoapText = XML String,
* currently getting from file, but it can be a simple string
*/
private SOAPMessage xmlStringToSOAPMessage(String fileAddress) {
System.out.println("xmlStringToSoap()");
// Picking up this string from file right now
// This can come from anywhere
String tempSoapText = readFileToString(fileAddress);
SOAPMessage soapMessage = null;
try {
// Create SoapMessage
MessageFactory msgFactory = MessageFactory.newInstance();
SOAPMessage message = msgFactory.createMessage();
SOAPPart soapPart = message.getSOAPPart();
// Load the SOAP text into a stream source
byte[] buffer = tempSoapText.getBytes();
ByteArrayInputStream stream = new ByteArrayInputStream(buffer);
StreamSource source = new StreamSource(stream);
ByteArrayOutputStream out = new ByteArrayOutputStream();
// Set contents of message
soapPart.setContent(source);
message.writeTo(out);
soapMessage = message;
} catch (SOAPException e) {
System.out.println("soapException xmlStringToSoap()");
System.out.println("SOAPException : " + e);
} catch (IOException e) {
System.out.println("IOException xmlStringToSoap()");
System.out.println("IOException : " + e);
}
return soapMessage;
}
/*
* Reads the file passed and creates a string. fileAddress - Contents of
* this file are read into a String
*/
private String readFileToString(String fileAddress) {
FileInputStream stream = null;
MappedByteBuffer bb = null;
String stringFromFile = "";
try {
stream = new FileInputStream(new File(fileAddress));
FileChannel fc = stream.getChannel();
bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
stringFromFile = Charset.defaultCharset().decode(bb).toString();
} catch (IOException e) {
System.out.println("readFileToString IOException");
e.printStackTrace();
} finally {
try {
stream.close();
} catch (IOException e) {
System.out.println("readFileToString IOException");
e.printStackTrace();
}
}
return stringFromFile;
}
/*
* soapXMLtoEndpoint sends the soapXMLFileLocation to the endpointURL
*/
public void soapXMLtoEndpoint(String endpointURL, String soapXMLFileLocation) throws SOAPException {
SOAPConnection connection = SOAPConnectionFactory.newInstance().createConnection();
SOAPMessage response = connection.call(xmlStringToSOAPMessage(soapXMLFileLocation), endpointURL);
connection.close();
SOAPBody responseBody = response.getSOAPBody();
SOAPBodyElement responseElement = (SOAPBodyElement) responseBody.getChildElements().next();
SOAPElement returnElement = (SOAPElement) responseElement.getChildElements().next();
if (responseBody.getFault() != null) {
System.out.println("fault != null");
System.out.println(returnElement.getValue() + " " + responseBody.getFault().getFaultString());
} else {
serverResponse = returnElement.getValue();
System.out.println(serverResponse);
System.out.println("\nfault == null, got the response properly.\n");
}
}
/*
* This is for debugging purposes. Writes string to a file.
*
* #param message Contents to be written to file
*
* #param fileName the name of the
*/
private static void toFile(String message, String fileName) {
try {
FileWriter fstream = new FileWriter(fileName);
System.out.println("printing to file: ".concat(fileName));
BufferedWriter out = new BufferedWriter(fstream);
out.write(message);
out.close();
} catch (Exception e) {
System.out.println("toFile() Exception");
System.err.println("Error: " + e.getMessage());
}
}
/*
* Using dom to parse the xml. Getting both orderID and the description.
*
* #param xmlToParse XML in String format to parse. Gets the orderID and
* description Is the error handling required? What if orderID or
* description isn't found in the xmlToParse? Use setters and getters?
*
* #param fileName only for debuggining, it can be safely removed any time.
*/
private void domParsing(String xmlToParse, String fileName) {
if (serverResponse == null) {
return;
} else {
try {
System.out.println("in domParsing()");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
System.out.println("serverResponse contains fault");
Document doc = dBuilder.parse(new InputSource(new StringReader(serverResponse)));
doc.getDocumentElement().normalize();
NodeList orderNodeList = doc.getElementsByTagName("Order");
if (orderNodeList.getLength() > 0) {
tempTestStringForDirectory = tempTestStringForDirectory + "\n Got order\n" + "\n" + fileName + "\n" + "got order\n";
for (int x = 0; x < orderNodeList.getLength(); x++) {
System.out.println(orderNodeList.item(x).getAttributes().getNamedItem("orderId").getNodeValue());
}
}
NodeList descriptionNodeList = doc.getElementsByTagName("Description");
if (descriptionNodeList.getLength() > 0) {
System.out.println("getting description");
String tempDescriptionString = descriptionNodeList.item(0).getTextContent();
System.out.println(tempDescriptionString);
tempTestStringForDirectory = tempTestStringForDirectory + "\n Got description" + "\n" + fileName + "\n" + tempDescriptionString + "\n";
}
} catch (Exception e) {
System.out.println("domParsing() Exception");
e.printStackTrace();
}
}
}
/*
* Reads a single file or a whole directory structure
*/
private void listFilesForFolder(final File fileOrFolder) {
String temp = "";
if (fileOrFolder.isDirectory()) {
for (final File fileEntry : fileOrFolder.listFiles()) {
if (fileEntry.isDirectory()) {
listFilesForFolder(fileEntry);
} else {
if (fileEntry.isFile()) {
temp = fileEntry.getName();
try {
soapXMLtoEndpoint(endpointUrl, fileOrFolder.getAbsolutePath() + "\\" + fileEntry.getName());
domParsing(serverResponse, fileEntry.getName());
} catch (SOAPException e) {
e.printStackTrace();
}
}
}
}
}
if (fileOrFolder.isFile()) {
temp = fileOrFolder.getName();
System.out.println("this is a file");
System.out.println(temp);
try {
soapXMLtoEndpoint(endpointUrl, fileOrFolder.getAbsolutePath());
} catch (SOAPException e) {
e.printStackTrace();
}
domParsing(serverResponse, temp);
}
}
#Override
public void run() {
listFilesForFolder(fileOrFolder);
toFile(tempTestStringForDirectory, "test.txt");
}
public static void main(String[] args) {
String tempURLString = ".../OrderingService";
String tempFileLocation = "C:/Workspace2/Test5/";
SOAPClient soapClient = new SOAPClient(tempURLString, new File(tempFileLocation));
Thread thread = new Thread(soapClient);
thread.start();
System.out.println("program ended");
}
}
I think n threads for n files would be bad? Wouldn't that crash the system, or give too many threads error?
I'm trying to make my program multi threaded. I don't know what I am missing. My program has a logic to know if a single file is passed or a directory is passed. One thread is fine if a single file is passed. But what should I do if a directory is passed? Do I need to create threads in my listFilesForFolder method? Are the threads always started from the main method, or can they be started from other methods? Also, this program is going to be used by other people, so it should be my job to handle the threads properly. All they should have to do is be using my program. So I feel that the thread logic should not belong in the main method but rather listFilesForFolder which is the starting point of my program. Thank you for your help.

From what I have seen, most download managers will try to download at most around 3 files at a time, plus or minus two. I suggest you do the same. Essentially, you could do something like this (Psuedo code)
//Set up a list of objects
fileList={"a","b","c"}
nextIndex=0;
Mutex mutex
//Start_X_threads
String next_object(void){
String nextFile;
try{
mutex.acquire();
try {
if (nextFileIndex<fileList.length)
{
nextFile=fileList(nextFileIndex);
nextFileIndex++;
}
else
nextFile="";
}
finally
{
mutex.release();
}
} catch(InterruptedException ie) {
nextFile="";
}
return nextFile;
}
Each thread :
String nextFile;
do
{
nextFile=nextObject();
//Get nextFile
} while (!nextFile.equals(""))

Related

Discontinuous FTP download throws "Read timed out" or "Connection reset"

I used FTP and FTPClient in package 'org.apache.commons.net.ftp' to download files from FTP server.
Here is my total example code
public class FtpInput {
private static final Logger LOG = Logger.getLogger(FtpInput.class);
private static final int TIMEOUT = 120000;
private static final String SIZE_COMMAND_REPLY_CODE = "213 ";
/**
* FTPClient
*/
private FTPClient ftpClient;
/**
* FTP size
*/
private long completeFileSize = 0;
protected String ip = "";
protected int port = 21;
protected String user = "";
protected String passwd = "";
protected String path = "";
protected String fileName = "";
/**
* count input bytes
*/
private CountingInputStream is;
/**
* the bytes already processed
*/
private long processedBytesNum;
private byte[] inputBuffer = new byte[1024];
/**
* connect to ftp server and fetch inputStream
*/
public void connect() {
this.ftpClient = new FTPClient();
ftpClient.setRemoteVerificationEnabled(false);
try {
ftpClient.connect(ip, port);
if (!ftpClient.login(user, passwd)) {
throw new IOException("ftp login failed!");
}
if (StringUtils.isNotBlank(path)) {
if (!ftpClient.changeWorkingDirectory(path)) {
ftpClient.mkd(path);
if (!ftpClient.changeWorkingDirectory(path)) {
throw new IOException("ftp change working dir failed! path:" + path);
}
}
}
ftpClient.setFileType(FTP.BINARY_FILE_TYPE);
ftpClient.setSoTimeout(TIMEOUT);
ftpClient.setConnectTimeout(TIMEOUT);
ftpClient.setDataTimeout(TIMEOUT);
ftpClient.enterLocalPassiveMode();
// keep control channel keep-alive when download large file
ftpClient.setControlKeepAliveTimeout(120);
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException("ftp login failed!", e);
}
// get complete ftp size
completeFileSize = getFtpFileSize();
LOG.info(String.format("ftp file size: %d", completeFileSize));
try {
InputStream ftpis = this.ftpClient.retrieveFileStream(this.fileName);
if (ftpis == null) {
LOG.error("cannot fetch source file.");
}
this.is = new CountingInputStream(ftpis);
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException(e.getMessage());
}
}
/**
* readBytes
*
* #return
*/
public byte[] readBytes() {
byte[] bytes = readBytesFromStream(is, inputBuffer);
// the bytes processed
processedBytesNum = is.getCount();
return bytes;
}
/**
* readBytesFromStream
*
* #param stream
* #param inputBuffer
* #return
*/
protected byte[] readBytesFromStream(InputStream stream, byte[] inputBuffer) {
Preconditions.checkNotNull(stream != null, "InputStream has not been inited yet.");
Preconditions.checkArgument(inputBuffer != null && inputBuffer.length > 0);
int readBytes;
try {
readBytes = stream.read(inputBuffer);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (readBytes == inputBuffer.length) {
// inputBuffer is filled full.
return inputBuffer;
} else if (readBytes > 0 && readBytes < inputBuffer.length) {
// inputBuffer is not filled full.
byte[] tmpBytes = new byte[readBytes];
System.arraycopy(inputBuffer, 0, tmpBytes, 0, readBytes);
return tmpBytes;
} else if (readBytes == -1) {
// Read end.
return null;
} else {
// may other situation happens?
throw new RuntimeException(String.format("readBytesFromStream: readBytes=%s inputBuffer.length=%s",
readBytes, inputBuffer.length));
}
}
/**
* fetch the byte size of remote file size
*/
private long getFtpFileSize() {
try {
ftpClient.sendCommand("SIZE", this.fileName);
String reply = ftpClient.getReplyString().trim();
LOG.info(String.format("ftp file %s size reply : %s", fileName, reply));
Preconditions.checkArgument(reply.startsWith(SIZE_COMMAND_REPLY_CODE),
"ftp file size reply: %s is not success", reply);
String sizeSubStr = reply.substring(SIZE_COMMAND_REPLY_CODE.length());
long actualFtpSize = Long.parseLong(sizeSubStr);
return actualFtpSize;
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException(e.getMessage());
}
}
public void close() {
try {
if (is != null) {
LOG.info(String.format("already read %d bytes from ftp file %s", is.getCount(), fileName));
is.close();
}
if (ftpClient != null) {
// Must call completePendingCommand() to finish command.
boolean isSuccessTransfer = ftpClient.completePendingCommand();
if (!isSuccessTransfer) {
LOG.error("error happened when complete transfer of ftp");
}
ftpClient.logout();
ftpClient.disconnect();
}
} catch (Throwable e) {
e.printStackTrace();
LOG.error(String.format("Close ftp input failed:%s,%s", e.getMessage(), e.getCause()));
} finally {
is = null;
ftpClient = null;
}
}
public void validInputComplete() {
Preconditions.checkArgument(processedBytesNum == completeFileSize, "ftp file transfer is not complete");
}
/**
* main
*
* #param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String ip = "***.***.***.****";
int port = 21;
String user = "***";
String passwd = "***";
String path = "/home/work";
String fileName = "b.txt";
FtpInput input = new FtpInput();
try {
input.fileName = fileName;
input.path = path;
input.ip = ip;
input.port = port;
input.user = user;
input.passwd = passwd;
// connect to FTP server
input.connect();
while (true) {
// read bytes
byte[] bytes = input.readBytes();
if (bytes == null) {
break;
}
LOG.info("read " + bytes.length + " bytes at :" + new Date(System.currentTimeMillis()));
// Attention: this is used for simulating the process of writing data into hive table
// it maybe consume more than 1 minute;
Thread.sleep(3000);
}
input.validInputComplete();
} catch (Exception e) {
e.printStackTrace();
} finally {
input.close();
}
}
}
here is the exception message:
java.net.SocketTimeoutException: Read timed out
or
java.net.SocketException: Connection reset
at stream.readBytes in method readBytesFromStream
At first, i think it probably caused by writing into hive table slowly, and then the FTP Server closed the connection.
But actually, the speed of writing into hive table is fast enough.
Now, i need your help, how can i fix this problem.

From your comments, it looks like it can take hours before you finish downloading the file.
You cannot reasonably expect an FTP server to wait for you for hours to finish the transfer. Particularly if you are not transferring anything most of the time. You waste server resources and most servers will protect themselves against such abuse.
Your design is flawed.
You should redesign your application to first fully download the file; and import the file only after the download finishes.

How to serve static content using suns simple httpserver

I'm using jersey's HttpServerFactory to create a simple embedded HttpServer that hosts a couple of rest services. We just needed something small quick and lightweight. I need to host a small static html page inside the same server instance. Is there a simple way to add a static handler to the server? Is there a pre-defined handler I can use? It seems like a pretty common task, I'd hate to re-write code for it if it already exists.
server = HttpServerFactory.create(url);
server.setExecutor(Executors.newCachedThreadPool());
server.createContext("/staticcontent", new HttpHandler() {
#Override
public void handle(HttpExchange arg0) throws IOException {
//What goes here?
}
});
server.start();

Here is a safe version. You may want to add a couple of MIME types, depending on which ones are common (or use another method if your platform has that).
package de.phihag.miniticker;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;
public class StaticFileHandler implements HttpHandler {
private static final Map<String,String> MIME_MAP = new HashMap<>();
static {
MIME_MAP.put("appcache", "text/cache-manifest");
MIME_MAP.put("css", "text/css");
MIME_MAP.put("gif", "image/gif");
MIME_MAP.put("html", "text/html");
MIME_MAP.put("js", "application/javascript");
MIME_MAP.put("json", "application/json");
MIME_MAP.put("jpg", "image/jpeg");
MIME_MAP.put("jpeg", "image/jpeg");
MIME_MAP.put("mp4", "video/mp4");
MIME_MAP.put("pdf", "application/pdf");
MIME_MAP.put("png", "image/png");
MIME_MAP.put("svg", "image/svg+xml");
MIME_MAP.put("xlsm", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
MIME_MAP.put("xml", "application/xml");
MIME_MAP.put("zip", "application/zip");
MIME_MAP.put("md", "text/plain");
MIME_MAP.put("txt", "text/plain");
MIME_MAP.put("php", "text/plain");
};
private String filesystemRoot;
private String urlPrefix;
private String directoryIndex;
/**
* #param urlPrefix The prefix of all URLs.
* This is the first argument to createContext. Must start and end in a slash.
* #param filesystemRoot The root directory in the filesystem.
* Only files under this directory will be served to the client.
* For instance "./staticfiles".
* #param directoryIndex File to show when a directory is requested, e.g. "index.html".
*/
public StaticFileHandler(String urlPrefix, String filesystemRoot, String directoryIndex) {
if (!urlPrefix.startsWith("/")) {
throw new RuntimeException("pathPrefix does not start with a slash");
}
if (!urlPrefix.endsWith("/")) {
throw new RuntimeException("pathPrefix does not end with a slash");
}
this.urlPrefix = urlPrefix;
assert filesystemRoot.endsWith("/");
try {
this.filesystemRoot = new File(filesystemRoot).getCanonicalPath();
} catch (IOException e) {
throw new RuntimeException(e);
}
this.directoryIndex = directoryIndex;
}
/**
* Create and register a new static file handler.
* #param hs The HTTP server where the file handler will be registered.
* #param path The path in the URL prefixed to all requests, such as "/static/"
* #param filesystemRoot The filesystem location.
* For instance "/var/www/mystaticfiles/".
* A request to "/static/x/y.html" will be served from the filesystem file "/var/www/mystaticfiles/x/y.html"
* #param directoryIndex File to show when a directory is requested, e.g. "index.html".
*/
public static void create(HttpServer hs, String path, String filesystemRoot, String directoryIndex) {
StaticFileHandler sfh = new StaticFileHandler(path, filesystemRoot, directoryIndex);
hs.createContext(path, sfh);
}
public void handle(HttpExchange he) throws IOException {
String method = he.getRequestMethod();
if (! ("HEAD".equals(method) || "GET".equals(method))) {
sendError(he, 501, "Unsupported HTTP method");
return;
}
String wholeUrlPath = he.getRequestURI().getPath();
if (wholeUrlPath.endsWith("/")) {
wholeUrlPath += directoryIndex;
}
if (! wholeUrlPath.startsWith(urlPrefix)) {
throw new RuntimeException("Path is not in prefix - incorrect routing?");
}
String urlPath = wholeUrlPath.substring(urlPrefix.length());
File f = new File(filesystemRoot, urlPath);
File canonicalFile;
try {
canonicalFile = f.getCanonicalFile();
} catch (IOException e) {
// This may be more benign (i.e. not an attack, just a 403),
// but we don't want the attacker to be able to discern the difference.
reportPathTraversal(he);
return;
}
String canonicalPath = canonicalFile.getPath();
if (! canonicalPath.startsWith(filesystemRoot)) {
reportPathTraversal(he);
return;
}
FileInputStream fis;
try {
fis = new FileInputStream(canonicalFile);
} catch (FileNotFoundException e) {
// The file may also be forbidden to us instead of missing, but we're leaking less information this way
sendError(he, 404, "File not found");
return;
}
String mimeType = lookupMime(urlPath);
he.getResponseHeaders().set("Content-Type", mimeType);
if ("GET".equals(method)) {
he.sendResponseHeaders(200, canonicalFile.length());
OutputStream os = he.getResponseBody();
copyStream(fis, os);
os.close();
} else {
assert("HEAD".equals(method));
he.sendResponseHeaders(200, -1);
}
fis.close();
}
private void copyStream(InputStream is, OutputStream os) throws IOException {
byte[] buf = new byte[4096];
int n;
while ((n = is.read(buf)) >= 0) {
os.write(buf, 0, n);
}
}
private void sendError(HttpExchange he, int rCode, String description) throws IOException {
String message = "HTTP error " + rCode + ": " + description;
byte[] messageBytes = message.getBytes("UTF-8");
he.getResponseHeaders().set("Content-Type", "text/plain; charset=utf-8");
he.sendResponseHeaders(rCode, messageBytes.length);
OutputStream os = he.getResponseBody();
os.write(messageBytes);
os.close();
}
// This is one function to avoid giving away where we failed
private void reportPathTraversal(HttpExchange he) throws IOException {
sendError(he, 400, "Path traversal attempt detected");
}
private static String getExt(String path) {
int slashIndex = path.lastIndexOf('/');
String basename = (slashIndex < 0) ? path : path.substring(slashIndex + 1);
int dotIndex = basename.lastIndexOf('.');
if (dotIndex >= 0) {
return basename.substring(dotIndex + 1);
} else {
return "";
}
}
private static String lookupMime(String path) {
String ext = getExt(path).toLowerCase();
return MIME_MAP.getOrDefault(ext, "application/octet-stream");
}
}

This will do the trick, though it does allow anyone to walk the tree by requesting ../../../
You can change ./wwwroot to any valid java filepath.
static class MyHandler implements HttpHandler {
public void handle(HttpExchange t) throws IOException {
String root = "./wwwroot";
URI uri = t.getRequestURI();
System.out.println("looking for: "+ root + uri.getPath());
String path = uri.getPath();
File file = new File(root + path).getCanonicalFile();
if (!file.isFile()) {
// Object does not exist or is not a file: reject with 404 error.
String response = "404 (Not Found)\n";
t.sendResponseHeaders(404, response.length());
OutputStream os = t.getResponseBody();
os.write(response.getBytes());
os.close();
} else {
// Object exists and is a file: accept with response code 200.
String mime = "text/html";
if(path.substring(path.length()-3).equals(".js")) mime = "application/javascript";
if(path.substring(path.length()-3).equals("css")) mime = "text/css";
Headers h = t.getResponseHeaders();
h.set("Content-Type", mime);
t.sendResponseHeaders(200, 0);
OutputStream os = t.getResponseBody();
FileInputStream fs = new FileInputStream(file);
final byte[] buffer = new byte[0x10000];
int count = 0;
while ((count = fs.read(buffer)) >= 0) {
os.write(buffer,0,count);
}
fs.close();
os.close();
}
}
}

using dbpedia spotlight in java or scala

Does anyone know where to find a little how to on using dbpedia spotlight in java or scala? Or could anyone explain how it's done? I can't find any information on this...

The DBpedia Spotlight wiki pages would be a good place to start.
And I believe the installation page has listed the most popular ways (using a jar, or set up a web service) to use the application.
It includes instructions on using the Java/Scala API with your own installation, or calling the Web Service.
There are some additional data needed to be downloaded to run your own server for full service, good time to make a coffee for yourself.

you need download dbpedia spotlight (jar file) after that u can use next two classes ( author pablomendes ) i only make some change .
public class db extends AnnotationClient {
//private final static String API_URL = "http://jodaiber.dyndns.org:2222/";
private static String API_URL = "http://spotlight.dbpedia.org:80/";
private static double CONFIDENCE = 0.0;
private static int SUPPORT = 0;
private static String powered_by ="non";
private static String spotter ="CoOccurrenceBasedSelector";//"LingPipeSpotter"=Annotate all spots
//AtLeastOneNounSelector"=No verbs and adjs.
//"CoOccurrenceBasedSelector" =No 'common words'
//"NESpotter"=Only Per.,Org.,Loc.
private static String disambiguator ="Default";//Default ;Occurrences=Occurrence-centric;Document=Document-centric
private static String showScores ="yes";
#SuppressWarnings("static-access")
public void configiration(double CONFIDENCE,int SUPPORT,
String powered_by,String spotter,String disambiguator,String showScores){
this.CONFIDENCE=CONFIDENCE;
this.SUPPORT=SUPPORT;
this.powered_by=powered_by;
this.spotter=spotter;
this.disambiguator=disambiguator;
this.showScores=showScores;
}
public List<DBpediaResource> extract(Text text) throws AnnotationException {
LOG.info("Querying API.");
String spotlightResponse;
try {
String Query=API_URL + "rest/annotate/?" +
"confidence=" + CONFIDENCE
+ "&support=" + SUPPORT
+ "&spotter=" + spotter
+ "&disambiguator=" + disambiguator
+ "&showScores=" + showScores
+ "&powered_by=" + powered_by
+ "&text=" + URLEncoder.encode(text.text(), "utf-8");
LOG.info(Query);
GetMethod getMethod = new GetMethod(Query);
getMethod.addRequestHeader(new Header("Accept", "application/json"));
spotlightResponse = request(getMethod);
} catch (UnsupportedEncodingException e) {
throw new AnnotationException("Could not encode text.", e);
}
assert spotlightResponse != null;
JSONObject resultJSON = null;
JSONArray entities = null;
try {
resultJSON = new JSONObject(spotlightResponse);
entities = resultJSON.getJSONArray("Resources");
} catch (JSONException e) {
//throw new AnnotationException("Received invalid response from DBpedia Spotlight API.");
}
LinkedList<DBpediaResource> resources = new LinkedList<DBpediaResource>();
if(entities!=null)
for(int i = 0; i < entities.length(); i++) {
try {
JSONObject entity = entities.getJSONObject(i);
resources.add(
new DBpediaResource(entity.getString("#URI"),
Integer.parseInt(entity.getString("#support"))));
} catch (JSONException e) {
LOG.error("JSON exception "+e);
}
}
return resources;
}
}
second class
/**
* #author pablomendes
*/
public abstract class AnnotationClient {
public Logger LOG = Logger.getLogger(this.getClass());
private List<String> RES = new ArrayList<String>();
// Create an instance of HttpClient.
private static HttpClient client = new HttpClient();
public List<String> getResu(){
return RES;
}
public String request(HttpMethod method) throws AnnotationException {
String response = null;
// Provide custom retry handler is necessary
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
try {
// Execute the method.
int statusCode = client.executeMethod(method);
if (statusCode != HttpStatus.SC_OK) {
LOG.error("Method failed: " + method.getStatusLine());
}
// Read the response body.
byte[] responseBody = method.getResponseBody(); //TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended.
// Deal with the response.
// Use caution: ensure correct character encoding and is not binary data
response = new String(responseBody);
} catch (HttpException e) {
LOG.error("Fatal protocol violation: " + e.getMessage());
throw new AnnotationException("Protocol error executing HTTP request.",e);
} catch (IOException e) {
LOG.error("Fatal transport error: " + e.getMessage());
LOG.error(method.getQueryString());
throw new AnnotationException("Transport error executing HTTP request.",e);
} finally {
// Release the connection.
method.releaseConnection();
}
return response;
}
protected static String readFileAsString(String filePath) throws java.io.IOException{
return readFileAsString(new File(filePath));
}
protected static String readFileAsString(File file) throws IOException {
byte[] buffer = new byte[(int) file.length()];
#SuppressWarnings("resource")
BufferedInputStream f = new BufferedInputStream(new FileInputStream(file));
f.read(buffer);
return new String(buffer);
}
static abstract class LineParser {
public abstract String parse(String s) throws ParseException;
static class ManualDatasetLineParser extends LineParser {
public String parse(String s) throws ParseException {
return s.trim();
}
}
static class OccTSVLineParser extends LineParser {
public String parse(String s) throws ParseException {
String result = s;
try {
result = s.trim().split("\t")[3];
} catch (ArrayIndexOutOfBoundsException e) {
throw new ParseException(e.getMessage(), 3);
}
return result;
}
}
}
public void saveExtractedEntitiesSet(String Question, LineParser parser, int restartFrom) throws Exception {
String text = Question;
int i=0;
//int correct =0 ; int error = 0;int sum = 0;
for (String snippet: text.split("\n")) {
String s = parser.parse(snippet);
if (s!= null && !s.equals("")) {
i++;
if (i<restartFrom) continue;
List<DBpediaResource> entities = new ArrayList<DBpediaResource>();
try {
entities = extract(new Text(snippet.replaceAll("\\s+"," ")));
System.out.println(entities.get(0).getFullUri());
} catch (AnnotationException e) {
// error++;
LOG.error(e);
e.printStackTrace();
}
for (DBpediaResource e: entities) {
RES.add(e.uri());
}
}
}
}
public abstract List<DBpediaResource> extract(Text text) throws AnnotationException;
public void evaluate(String Question) throws Exception {
evaluateManual(Question,0);
}
public void evaluateManual(String Question, int restartFrom) throws Exception {
saveExtractedEntitiesSet(Question,new LineParser.ManualDatasetLineParser(), restartFrom);
}
}
main()
public static void main(String[] args) throws Exception {
String Question ="Is the Amazon river longer than the Nile River?";
db c = new db ();
c.configiration(0.0, 0, "non", "CoOccurrenceBasedSelector", "Default", "yes");
System.out.println("resource : "+c.getResu());
}

I just add one little fix for your answer.
Your code is running, if you add the evaluate method call:
public static void main(String[] args) throws Exception {
String question = "Is the Amazon river longer than the Nile River?";
db c = new db ();
c.configiration(0.0, 0, "non", "CoOccurrenceBasedSelector", "Default", "yes");
c.evaluate(question);
System.out.println("resource : "+c.getResu());
}
Lamine

In the request method of the second class (AnnotationClient) in Adel's answer, the author Pablo Mendes hasn't finished
TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended.
which is an annoying warning that needs to be removed by replacing
byte[] responseBody = method.getResponseBody(); //TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended.
// Deal with the response.
// Use caution: ensure correct character encoding and is not binary data
response = new String(responseBody);
with
Reader in = new InputStreamReader(method.getResponseBodyAsStream(), "UTF-8");
StringWriter writer = new StringWriter();
org.apache.commons.io.IOUtils.copy(in, writer);
response = writer.toString();

How do I post a picture/image using the IO Codenameone

Because codenameone can not use external libraries (HttpConnection) then I have to use the internal library / API provided Codenameone, it's just that I've managed to post the data to format text / string by using ConnectionRequest, I want to know is there any way to post the data in the form of an image with using ConnectionRequest? Thank you for your help
Snippet ConnectionRequest i'm using:
ConnectionRequest myrequest = new ConnectionRequest();
myrequest.setUrl("http://www.xxxx.com/mobile/login/");
myrequest.setPost(true);
myrequest.addArgument("email", "info#xxx.net");
myrequest.addArgument("password", "xxx");
myrequest.setPriority(ConnectionRequest.PRIORITY_CRITICAL);
NetworkManager.getInstance().addToQueue(myrequest);
myrequest.addResponseListener(new ActionListener() {
#Override
public void actionPerformed(ActionEvent evt) {
NetworkEvent n = (NetworkEvent)evt;
// gets the data from the server as a byte array...
byte[] data = (byte[])n.getMetaData();
String response = new String(data);
}
});

Thank you for your answer Shai, I see in the repository that has been added Codenameone MultipartRequest function (good job for Codenameone team)
It's just that if I use Write.flush function (), then the program I always get a Stream Closed Exception, but if I comment this command, the program I became normal again, following the code of which I edited slightly Codenameone suit my needs:
/**
* A multipart post request allows a developer to submit large binary data
* files to the server in a post request
*
* #author Shai Almog
*/
public class MultipartRequest extends ConnectionRequest {
private String boundary;
private Hashtable args = new Hashtable();
private Hashtable mimeTypes = new Hashtable();
private static final String CRLF = "\r\n";
protected void readResponse(InputStream input) throws IOException {
// TODO Auto-generated method stub
StringBuffer stringBuffer = new StringBuffer();
int ch;
while ((ch = input.read()) != -1) {
stringBuffer.append((char) ch);
}
fireResponseListener(new NetworkEvent(this, stringBuffer.toString()));
}
/**
* Initialize variables
*/
public MultipartRequest() {
setPost(true);
setWriteRequest(true);
// Just generate some unique random value.
boundary = Long.toString(System.currentTimeMillis(), 16);
// Line separator required by multipart/form-data.
setContentType("multipart/form-data; boundary=" + boundary);
}
/**
* Adds a binary argument to the arguments
* #param name the name of the data
* #param data the data as bytes
* #param mimeType the mime type for the content
*/
public void addData(String name, byte[] data, String mimeType) {
args.put(name, data);
mimeTypes.put(name, mimeType);
}
/**
* Adds a binary argument to the arguments, notice the input stream will be read only during submission
* #param name the name of the data
* #param data the data stream
* #param mimeType the mime type for the content
*/
public void addData(String name, InputStream data, String mimeType) {
args.put(name, data);
mimeTypes.put(name, mimeType);
}
/**
* #inheritDoc
*/
public void addArgument(String name, String value) {
args.put(name, value);
}
/**
* #inheritDoc
*/
protected void buildRequestBody(OutputStream os) throws IOException {
Writer writer = null;
writer = new OutputStreamWriter(os, "UTF-8");
Enumeration e = args.keys();
while(e.hasMoreElements()) {
String key = (String)e.nextElement();
Object value = args.get(key);
writer.write("--" + boundary);
writer.write(CRLF);
if(value instanceof String) {
writer.write("Content-Disposition: form-data; name=\"" + key + "\"");
writer.write(CRLF);
writer.write("Content-Type: text/plain; charset=UTF-8");
writer.write(CRLF);
writer.write(CRLF);
// writer.flush(); // always error if I use this??
writer.write(Util.encodeBody((String)value));
writer.write(CRLF); // always error if I use this??
// writer.flush();
} else {
writer.write("Content-Disposition: form-data; name=\"" + key + "\"; filename=\"" + key +"\"");
writer.write(CRLF);
writer.write("Content-Type: ");
writer.write((String)mimeTypes.get(key));
writer.write(CRLF);
writer.write("Content-Transfer-Encoding: binary");
writer.write(CRLF);
writer.write(CRLF);
if(value instanceof InputStream) {
InputStream i = (InputStream)value;
byte[] buffer = new byte[8192];
int s = i.read(buffer);
while(s > -1) {
os.write(buffer, 0, s);
s = i.read(buffer);
}
} else {
os.write((byte[])value);
}
writer.write(CRLF);
// writer.flush();
}
writer.write(CRLF);
//writer.flush();
}
writer.write("--" + boundary + "--");
writer.write(CRLF);
writer.close();
}
Examples of how to use :
public class FormTest extends Form implements ActionListener{
private Button btnUpload;
private Button btnBrowse;
public FormTest(){
NetworkManager.getInstance().start();
setLayout(new BoxLayout(BoxLayout.Y_AXIS));
btnBrowse = new Button("Browse");
btnUpload = new Button("Upload");
addComponent(btnBrowse);
addComponent(btnUpload);
btnBrowse.addActionListener(this);
btnUpload.addActionListener(this);
}
private MultipartRequest request;
public void actionPerformed(ActionEvent evt) {
// TODO Auto-generated method stub
if (evt.getSource().equals(btnBrowse)){
//browse here
btnBrowse.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent evt) {
// TODO Auto-generated method stub
Utility.pathfile = "";
Utility.main.getFile();
new Thread(new Runnable() {
public void run() {
// TODO Auto-generated method stub
while (Utility.pathfile.equals("")) {
}
}
}).start();
}
});
}
if (evt.getSource().equals(btnUpload)){
//upload here
request = new MultipartRequest();
request.setUrl("http://10.151.xx.xx/testuploadinfo.php");
request.addArgument("Parameter1","Value1");
//add the data image
request.addData("file", getTheImageByte("Your Url to Image here"),"image/png");
request.setPriority(ConnectionRequest.PRIORITY_CRITICAL);
request.addResponseListener(FormTest.this);
NetworkManager.getInstance().addToQueue(request);
//Dialog.show("Test","ok", "","");
}
if (evt instanceof NetworkEvent) {
NetworkEvent ne = (NetworkEvent)evt;
Dialog.show("Result:", ne.getMetaData().toString(), "","");
}
}
private byte[] getTheImageByte(String url) {
Bitmap bitmap = null, scaleBitmap = null;
byte[] data = null;
InputStream inputStream = null;
FileConnection fileConnection = null;
try {
fileConnection = (FileConnection) Connector
.open(url);
if (fileConnection.exists()) {
inputStream = fileConnection.openInputStream();
data = new byte[(int) fileConnection.fileSize()];
data = IOUtilities.streamToBytes(inputStream);
}
} catch (Exception e) {
try {
if (inputStream != null) {
inputStream.close();
}
if (fileConnection != null) {
fileConnection.close();
}
} catch (Exception exp) {
}
}
return data;// return the scale Bitmap not the original bitmap;
}
}
And simple PHP code :
<?php
print_r($_FILES);
$new_image_name = "image.jpg";
move_uploaded_file($_FILES["file"]["tmp_name"], "sia/".$new_image_name);
?>

Sure you can just add the image data as an argument to the request but you will need to encode it. Alternatively you can override the method:
protected void buildRequestBody(OutputStream os) throws IOException
And write into the post output stream any arbitrary data you need.

Do Not Crawl certain page in a particular link(exclude certain url from crawling)

This is the below code in my MyCrawler.java and it is crawling all those links that I have provided in href.startsWith but suppose If I do not want to crawl this particular page http://inv.somehost.com/people/index.html then how can I do this in my code..
public MyCrawler() {
}
public boolean shouldVisit(WebURL url) {
String href = url.getURL().toLowerCase();
if (href.startsWith("http://www.somehost.com/") || href.startsWith("http://inv.somehost.com/") || href.startsWith("http://jo.somehost.com/")) {
//And If I do not want to crawl this page http://inv.somehost.com/data/index.html then how it can be done..
return true;
}
return false;
}
public void visit(Page page) {
int docid = page.getWebURL().getDocid();
String url = page.getWebURL().getURL();
String text = page.getText();
List<WebURL> links = page.getURLs();
int parentDocid = page.getWebURL().getParentDocid();
try {
URL url1 = new URL(url);
System.out.println("URL:- " +url1);
URLConnection connection = url1.openConnection();
Map responseMap = connection.getHeaderFields();
Iterator iterator = responseMap.entrySet().iterator();
while (iterator.hasNext())
{
String key = iterator.next().toString();
if (key.contains("text/html") || key.contains("text/xhtml"))
{
System.out.println(key);
// Content-Type=[text/html; charset=ISO-8859-1]
if (filters.matcher(key) != null){
System.out.println(url1);
try {
final File parentDir = new File("crawl_html");
parentDir.mkdir();
final String hash = MD5Util.md5Hex(url1.toString());
final String fileName = hash + ".txt";
final File file = new File(parentDir, fileName);
boolean success =file.createNewFile(); // Creates file crawl_html/abc.txt
System.out.println("hash:-" + hash);
System.out.println(file);
// Create file if it does not exist
// File did not exist and was created
FileOutputStream fos = new FileOutputStream(file, true);
PrintWriter out = new PrintWriter(fos);
// Also could be written as follows on one line
// Printwriter out = new PrintWriter(new FileWriter(args[0]));
// Write text to file
Tika t = new Tika();
String content= t.parseToString(new URL(url1.toString()));
out.println("===============================================================");
out.println(url1);
out.println(key);
//out.println(success);
out.println(content);
out.println("===============================================================");
out.close();
fos.flush();
fos.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TikaException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// http://google.com
}
}
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("=============");
}
And this is my Controller.java code from where MyCrawler is getting called..
public class Controller {
public static void main(String[] args) throws Exception {
CrawlController controller = new CrawlController("/data/crawl/root");
controller.addSeed("http://www.somehost.com/");
controller.addSeed("http://inv.somehost.com/");
controller.addSeed("http://jo.somehost.com/");
controller.start(MyCrawler.class, 20);
controller.setPolitenessDelay(200);
controller.setMaximumCrawlDepth(2);
}
}
Any suggestions will be appreciated..

How about adding a property to tell which urls you want to exclude.
Add to your exclusions list all the pages that you don't want them to get crawled.
Here is an example:
public class MyCrawler extends WebCrawler {
List<Pattern> exclusionsPatterns;
public MyCrawler() {
exclusionsPatterns = new ArrayList<Pattern>();
//Add here all your exclusions using Regular Expresssions
exclusionsPatterns.add(Pattern.compile("http://investor\\.somehost\\.com.*"));
}
/*
* You should implement this function to specify
* whether the given URL should be visited or not.
*/
public boolean shouldVisit(WebURL url) {
String href = url.getURL().toLowerCase();
//Iterate the patterns to find if the url is excluded.
for (Pattern exclusionPattern : exclusionsPatterns) {
Matcher matcher = exclusionPattern.matcher(href);
if (matcher.matches()) {
return false;
}
}
if (href.startsWith("http://www.ics.uci.edu/")) {
return true;
}
return false;
}
}
In this example we are telling that all urls that start with http://investor.somehost.com should not be crawled.
So these wont be crawled:
http://investor.somehost.com/index.html
http://investor.somehost.com/something/else
I recommend you reading about regular expresions.

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Multithreading, Read multiple files and send them to server in parallel - java

Related

Discontinuous FTP download throws "Read timed out" or "Connection reset"

How to serve static content using suns simple httpserver

using dbpedia spotlight in java or scala

How do I post a picture/image using the IO Codenameone

Do Not Crawl certain page in a particular link(exclude certain url from crawling)

Categories

Resources