ElasticSearch indexing documents using Java Executors service - java

Am trying to indexing more than 100k documents using Java Executors sevice inorder to index much faster.
Am reading 100k plus file path from the index documents_qa using scroll API. Actual files will be available in my local d:\drive. By using the file path am reading the actual file and converting into base64 and am reindex with the base64 content in another index document_attachment_qa.
Please find my code below
public class DocumentIndex {
private final static String INDEX = "documents_qa";
private final static String TYPE = "doc";
public static void main(String[] args) throws IOException {
ExecutorService executor = Executors.newFixedThreadPool(5);
List<String> filePathList = new ArrayList<String>();
Document doc=new Document();
logger.info("Started Indexing the Document.....");
//Fetching Id, FilePath & FileName from Document Index.
SearchRequest searchRequest = new SearchRequest(INDEX);
searchRequest.types(TYPE);
final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(60L)); //part of Scroll API
searchRequest.scroll(scroll); //part of Scroll API
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
QueryBuilder qb = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(qb);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = SearchEngineClient.getInstance3().search(searchRequest);
String scrollId = searchResponse.getScrollId(); //part of Scroll API
SearchHit[] searchHits = searchResponse.getHits().getHits();
long totalHits=searchResponse.getHits().totalHits;
//part of Scroll API -- Starts
while (searchHits != null && searchHits.length > 0) {
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
searchResponse = SearchEngineClient.getInstance3().searchScroll(scrollRequest);
scrollId = searchResponse.getScrollId();
searchHits = searchResponse.getHits().getHits();
Map<String, Object> jsonMap ;
for (SearchHit hit : searchHits) {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
if(sourceAsMap != null) {
doc.setId((int) sourceAsMap.get("id"));
doc.setApp_language(String.valueOf(sourceAsMap.get("app_language")));
doc.setFilename(String.valueOf(sourceAsMap.get("filename")));
doc.setPath(String.valueOf(sourceAsMap.get("path")));
}
if(doc.getPath()!= null && doc.getFilename() != null) {
filePathList.add(doc.getPath().concat(doc.getFilename()));
}
}
for (int i = 0; i < filePathList.size(); i++) {
Runnable worker = new WorkerThread(doc);
executor.execute(worker);
}
}
executor.shutdown();
while (!executor.isTerminated()) {
}
System.out.println("Finished all threads");
}
}
Please find the worker Thread :
public class WorkerThread implements Runnable {
private String command;
private Document doc;
private final static String ATTACHMENT = "document_attachment_qa";
private static final Logger logger = Logger.getLogger(Thread.currentThread().getStackTrace()[0].getClassName());
Map<String, Object> jsonMap ;
List<String> filePathList = new ArrayList<String>();
public WorkerThread(Document doc){
this.doc=doc;
}
#Override
public void run() {
File all_files_path = new File("d:\\All_Files_Path.txt");
File available_files = new File("d:\\Available_Files.txt");
int totalFilePath=1;
int totalAvailableFile=1;
String encodedfile = null;
File file=null;
if(doc.getPath()!= null && doc.getFilename() != null) {
filePathList.add(doc.getPath().concat(doc.getFilename()));
}
PrintWriter out=null;
try{
out = new PrintWriter(new FileOutputStream(all_files_path, true));
for(int i=0;i<filePathList.size();i++) {
out.println("FilePath Count ---"+totalFilePath+":::::::ID---> "+doc.getId()+"File Path --->"+filePathList.get(i));
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
finally {
out.close();
}
for(int i=0;i<filePathList.size();i++) {
file = new File(filePathList.get(i));
if(file.exists() && !file.isDirectory()) {
try {
try(PrintWriter out1 = new PrintWriter(new FileOutputStream(available_files, true)) ){
out1.println("Available File Count --->"+totalAvailableFile+":::::::ID---> "+doc.getId()+"File Path --->"+filePathList.get(i));
totalAvailableFile++;
}
FileInputStream fileInputStreamReader = new FileInputStream(file);
byte[] bytes = new byte[(int) file.length()];
fileInputStreamReader.read(bytes);
encodedfile = new String(Base64.getEncoder().encodeToString(bytes));
fileInputStreamReader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
jsonMap = new HashMap<String, Object>();
jsonMap.put("id", doc.getId());
jsonMap.put("app_language", doc.getApp_language());
jsonMap.put("fileContent", encodedfile);
System.out.println(Thread.currentThread().getName()+" End.");
String id=Long.toString(doc.getId());
IndexRequest request = new IndexRequest(ATTACHMENT, "doc", id )
.source(jsonMap)
.setPipeline(ATTACHMENT);
try {
IndexResponse response = SearchEngineClient.getInstance3().index(request);
} catch(ElasticsearchException e) {
if (e.status() == RestStatus.CONFLICT) {
}
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
processCommand();
}
}
private void processCommand() {
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
#Override
public String toString(){
return this.command;
}
}
Issue is, after indexing first document - Its taking very long time to process another document and finally code getting terminated without indexing further.

Related

Read from splunk source and write to topic - writing same record. not pulling latest records

same record is being written to topic. not pulling latest records from splunk. time parameters are set in start method to pull last one min data. Any inputs.
currently i dont set offset from source. when poll is run every time, does it look for source offset and then poll? in logs can we have time as offset.
#Override
public List<SourceRecord> poll() throws InterruptedException {
List<SourceRecord> results = new ArrayList<>();
Map<String, String> recordProperties = new HashMap<String, String>();
while (true) {
try {
String line = null;
InputStream stream = job.getResults(previewArgs);
String earlierKey = null;
String value = null;
ResultsReaderCsv csv = new ResultsReaderCsv(stream);
HashMap<String, String> event;
while ((event = csv.getNextEvent()) != null) {
for (String key: event.keySet()) {
if(key.equals("rawlogs")){
recordProperties.put("rawlogs", event.get(key)); results.add(extractRecord(Splunklog.SplunkLogSchema(), line, recordProperties));
return results;}}}
csv.close();
stream.close();
Thread.sleep(500);
} catch(Exception ex) {
System.out.println("Exception occurred : " + ex);
}
}
}
private SourceRecord extractRecord(Schema schema, String line, Map<String, String> recordProperties) {
Map<String, String> sourcePartition = Collections.singletonMap(FILENAME_FIELD, FILENAME);
Map<String, String> sourceOffset = Collections.singletonMap(POSITION_FIELD, recordProperties.get(OFFSET_KEY));
return new SourceRecord(sourcePartition, sourceOffset, TOPIC_NAME, schema, recordProperties);
}
#Override
public void start(Map<String, String> properties) {
try {
config = new SplunkSourceTaskConfig(properties);
} catch (ConfigException e) {
throw new ConnectException("Couldn't start SplunkSourceTask due to configuration error", e);
}
HttpService.setSslSecurityProtocol(SSLSecurityProtocol.TLSv1_2);
Service service = new Service("splnkip", port);
String credentials = "user:pwd";
String basicAuthHeader = Base64.encode(credentials.getBytes());
service.setToken("Basic " + basicAuthHeader);
String startOffset = readOffset();
JobArgs jobArgs = new JobArgs();
if (startOffset != null) {
log.info("-------------------------------task OFFSET!NULL ");
jobArgs.setExecutionMode(JobArgs.ExecutionMode.BLOCKING);
jobArgs.setSearchMode(JobArgs.SearchMode.NORMAL);
jobArgs.setEarliestTime(startOffset);
jobArgs.setLatestTime("now");
jobArgs.setStatusBuckets(300);
} else {
log.info("-------------------------------task OFFSET=NULL ");
jobArgs.setExecutionMode(JobArgs.ExecutionMode.BLOCKING);
jobArgs.setSearchMode(JobArgs.SearchMode.NORMAL);
jobArgs.setEarliestTime("+419m");
jobArgs.setLatestTime("+420m");
jobArgs.setStatusBuckets(300);
}
String mySearch = "search host=search query";
job = service.search(mySearch, jobArgs);
while (!job.isReady()) {
try {
Thread.sleep(500);
} catch (InterruptedException ex) {
log.error("Exception occurred while waiting for job to start: " + ex);
}
}
previewArgs = new JobResultsPreviewArgs();
previewArgs.put("output_mode", "csv");
stop = new AtomicBoolean(false);
}

ElasticSearch - Java API indexing 100K + PDFs using producer & consumer

Am indexing pdf using java api. I have installed ingest-attachement processor plugin and from my java code, am converting PDF into base64 and indexing encoded format of PDF.
Actually, PDFs are available in my machine d:\ drive. The file path are available in ElasticSearch index named as documents_local. So, am fetching all the records from documents_local index and getting the file path. Then, am reading the pdf file and encode into base64. Then indexing them.
For this process, am using scrollRequest API to fetch file path from index, because am having more that 100k documents. so, for indexing 20000 PDFs its taking 8 hours of time with the below java code.
So, i tried to seperated this process of indexing.
I have created 3 classses,
Controller.java
Producer.java
Consumer.java
From Controller.java class am reading all the filePath from my index and am storing all the filePath into ArrayList and passing to Producer class.
From Producer.java class am reading PDF using the filePath and converting into base64 and pushing into the queue.
From Consumer.java class i will read all the messages from the queue which are published by producer.java class.
My idea is, i want to index the encoded files in Consumer.java class. ( which is not implemented and am not sure how to do that).
Please find my java code below.
Controller.java
public class Controller {
private static final int QUEUE_SIZE = 2;
private static BlockingQueue<String> queue;
private static Collection<Thread> producerThreadCollection, allThreadCollection;
private final static String INDEX = "documents_local";
private final static String ATTACHMENT = "document_suggestion";
private final static String TYPE = "doc";
private static final Logger logger = Logger.getLogger(Thread.currentThread().getStackTrace()[0].getClassName());
public static void main(String[] args) throws IOException {
RestHighLevelClient restHighLevelClient = null;
Document doc=new Document();
List<String> filePathList = new ArrayList<String>();
producerThreadCollection = new ArrayList<Thread>();
allThreadCollection = new ArrayList<Thread>();
queue = new LinkedBlockingDeque<String>(QUEUE_SIZE);
SearchRequest searchRequest = new SearchRequest(INDEX);
searchRequest.types(TYPE);
final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(60L)); //part of Scroll API
searchRequest.scroll(scroll); //part of Scroll API
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
QueryBuilder qb = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(qb);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = SearchEngineClient.getInstance3().search(searchRequest);
String scrollId = searchResponse.getScrollId(); //part of Scroll API
SearchHit[] searchHits = searchResponse.getHits().getHits();
long totalHits=searchResponse.getHits().totalHits;
logger.info("Total Hits --->"+totalHits);
//part of Scroll API -- Starts
while (searchHits != null && searchHits.length > 0) {
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
searchResponse = SearchEngineClient.getInstance3().searchScroll(scrollRequest);
scrollId = searchResponse.getScrollId();
searchHits = searchResponse.getHits().getHits();
for (SearchHit hit : searchHits) {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
if(sourceAsMap != null) {
doc.setId((int) sourceAsMap.get("id"));
doc.setApp_language(String.valueOf(sourceAsMap.get("app_language")));
}
filePathList.add(doc.getPath().concat(doc.getFilename()));
}
}
createAndStartProducers(filePathList);
createAndStartConsumers(filePathList);
for(Thread t: allThreadCollection){
try {
t.join();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
System.out.println("Controller finished");
}
private static void createAndStartProducers(List<String> filePathList){
for(int i = 1; i <= filePathList.size(); i++){
Producer producer = new Producer(Paths.get(filePathList.get(i)), queue);
Thread producerThread = new Thread(producer,"producer-"+i);
producerThreadCollection.add(producerThread);
producerThread.start();
}
allThreadCollection.addAll(producerThreadCollection);
}
private static void createAndStartConsumers(List<String> filePathList){
for(int i = 0; i < filePathList.size(); i++){
Thread consumerThread = new Thread(new Consumer(queue), "consumer-"+i);
allThreadCollection.add(consumerThread);
consumerThread.start();
}
}
public static boolean isProducerAlive(){
for(Thread t: producerThreadCollection){
if(t.isAlive())
return true;
}
return false;
}
}
Producer.java
public class Producer implements Runnable {
private Path fileToRead;
private BlockingQueue<String> queue;
File file=null;
public Producer(Path filePath, BlockingQueue<String> q){
fileToRead = filePath;
queue = q;
}
public void run() {
String encodedfile = null;
BufferedReader reader = null;
try {
reader = Files.newBufferedReader(fileToRead);
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
File file=new File(reader.toString());
if(file.exists() && !file.isDirectory()) {
try {
FileInputStream fileInputStreamReader = new FileInputStream(file);
byte[] bytes = new byte[(int) file.length()];
fileInputStreamReader.read(bytes);
encodedfile = new String(Base64.getEncoder().encodeToString(bytes));
fileInputStreamReader.close();
System.out.println(Thread.currentThread().getName()+" finished");
} catch (IOException e) {
e.printStackTrace();
}
}
else
{
System.out.println("File not exists");
}
}
}
Consumer.java (uncompleted class, not sure how i can do index from consumer class , Just showing skeleton of my consumer class.)
public class Consumer implements Runnable {
private BlockingQueue<String> queue;
File file=null;
public Consumer(BlockingQueue<String> q){
queue = q;
}
public void run(){
while(true){
String line = queue.poll();
if(line == null && !Controller.isProducerAlive())
return;
if(line != null){
System.out.println(Thread.currentThread().getName()+" processing line: "+line);
//Do something with the line here like see if it contains a string
}
}
}
}
With the below piece of code i have indexed the encoded file, But its taking more time to index because am having 100k documents. So that am trying for Producer & Consumer concept
jsonMap = new HashMap<>();
jsonMap.put("id", doc.getId());
jsonMap.put("app_language", doc.getApp_language());
jsonMap.put("fileContent", result);
String id=Long.toString(doc.getId());
IndexRequest request = new IndexRequest(ATTACHMENT, "doc", id )
.source(jsonMap)
.setPipeline(ATTACHMENT);

NIFI: how to update config data from custom processor

in my custom processor i need to update config file(it is placed in one of nifi folder, it is not original config ) based on data which i get from upstream connection but i can't get desired result what should i do?
is there any way i can controll upstream connection flowfile
destination i mean when i make debugging i use this function fileQueue.drainTo(file, batchSize) in this part "file " was null
here is one more thing i am interested in, on this line
session.commit() i always get errors that :"trnasfer operation isn't specified" and tanks to it whole operation is failed ?
it seems that when i try to make session.exportTo() operations it doesnt write anything in dotCopyFile, how should i fix it?
should i clean fileQueue and then make session.commit()?
FlowFile flowfile;
#Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();
final ArrayList value = new ArrayList<>();
flowfile = session.get();
if (flowfile == null) {
return;
}
value.add(flowfile.getAttribute("filename"));
session.remove(flowfile);
final File directory = new File(context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue());
final boolean keepingSourceFile = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
final ComponentLog logger = getLogger();
if (fileQueue.size() < 100) {
final long pollingMillis = context.getProperty(POLLING_INTERVAL).asTimePeriod(TimeUnit.MILLISECONDS);
if ((queueLastUpdated.get() < System.currentTimeMillis() - pollingMillis) && listingLock.tryLock()) {
try {
final Set<File> listing = performListing(directory, fileFilterRef.get(), context.getProperty(RECURSE).asBoolean().booleanValue());
queueLock.lock();
try {
listing.removeAll(inProcess);
if (!keepingSourceFile) {
listing.removeAll(recentlyProcessed);
}
fileQueue.clear();
fileQueue.addAll(listing);
queueLastUpdated.set(System.currentTimeMillis());
recentlyProcessed.clear();
if (listing.isEmpty()) {
context.yield();
}
} finally {
queueLock.unlock();
}
} finally {
listingLock.unlock();
}
}
}
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
final List<File> file = new ArrayList<>(batchSize);
queueLock.lock();
try {
fileQueue.drainTo(file, batchSize);
if (file.isEmpty()) {
return;
} else {
inProcess.addAll(file);
}
} finally {
queueLock.unlock();
}
//make xml parsing
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
try {
dBuilder = dbFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
try {
File f = file.get(0);
doc = dBuilder.parse(f);
} catch (IOException e) {
e.printStackTrace();
} catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
NodeList nList = doc.getElementsByTagName("localAttributes");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
start = eElement.getElementsByTagName("start").item(0).getTextContent();
startDate = eElement.getElementsByTagName("startDate").item(0).getTextContent();
endDate = eElement.getElementsByTagName("endDate").item(0).getTextContent();
patch = eElement.getElementsByTagName("patch").item(0).getTextContent();
runAs = eElement.getElementsByTagName("runAs").item(0).getTextContent();
makeVersion = eElement.getElementsByTagName("makeVersion").item(0).getTextContent();
///parameter = eElement.getElementsByTagName("parameter").item(0).getTextContent();
}
}
final ListIterator<File> itr = file.listIterator();
FlowFile flowFile1 = null;
try {
final Path directoryPath = directory.toPath();
while (itr.hasNext()) {
final File files = itr.next();
final Path filePath = files.toPath();
final Path relativePath = directoryPath.relativize(filePath.getParent());
String relativePathString = relativePath.toString() + "/";
if (relativePathString.isEmpty()) {
relativePathString = "./";
}
final Path absPath = filePath.toAbsolutePath();
final String absPathString = absPath.getParent().toString() + "/";
flowFile1 = session.create();
final long importStart = System.nanoTime();
flowFile1 = session.importFrom(filePath, keepingSourceFile, flowFile1);
final long importNanos = System.nanoTime() - importStart;
final long importMillis = TimeUnit.MILLISECONDS.convert(importNanos, TimeUnit.NANOSECONDS);
flowFile1 = session.putAttribute(flowFile1, CoreAttributes.FILENAME.key(), files.getName());
flowFile1 = session.putAttribute(flowFile1, CoreAttributes.PATH.key(), relativePathString);
flowFile1 = session.putAttribute(flowFile1, CoreAttributes.ABSOLUTE_PATH.key(), absPathString);
Map<String, String> attributes = getAttributesFromFile(filePath);
if (attributes.size() > 0) {
flowFile1 = session.putAllAttributes(flowFile1, attributes);
}
InputStream ffStream = session.read(flowFile1);
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(ffStream);
XPath xPath = XPathFactory.newInstance().newXPath();
XPathExpression myNodeList = (XPathExpression) xPath.compile("/localAttributes");
Node nodeGettingChanged = (Node) myNodeList.evaluate(xmlDocument, XPathConstants.NODE);
NodeList childNodes = nodeGettingChanged.getChildNodes();
boolean make=false;
for (int i = 0; i != childNodes.getLength(); ++i) {
Node child = childNodes.item(i);
if (!(child instanceof Element))
continue;
if(child.getNodeName().equals("start")){
String date;
for(int j=0;j<value.size();j++) {
if(value.get(j).length()>10){
date=value.get(j).substring(0,10);
}
else{
date=value.get(j);
}
if (date == child.getFirstChild().getTextContent()){
child.getFirstChild().setNodeValue(addOneDay(child.getFirstChild().getTextContent()));
make=true;
}
}
}
if(make){
if(child.getNodeName().equals("runAs")){
child.getFirstChild().setNodeValue("true");
}
}
}
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = null;
transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(xmlDocument);
String path = "C:/Users/user/Desktop/nifi-1.3.0/nifi-assembly/target/nifi-1.3.0-bin/nifi-1.3.0/1/conf.xml";
File f = new File(path);
StreamResult file1 = new StreamResult(f);
try {
transformer.transform(source, file1);
} catch (TransformerException e) {
e.printStackTrace();
}
session.write(flowFile1, new StreamCallback() {
#Override
public void process(InputStream inputStream, OutputStream outputStream) throws IOException {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = null;
try {
transformer = transformerFactory.newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DOMSource source = new DOMSource(xmlDocument);
ffStream.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
StreamResult result = new StreamResult(bos);
try {
transformer.transform(source, result);
} catch (TransformerException e) {
e.printStackTrace();
}
byte[] array = bos.toByteArray();
outputStream.write(array);
}
});
Path tempDotCopyFile = null;
try {
final Path rootDirPath = Paths.get("C://Users//user//Desktop//try2//nifi-1.3.0//1");
final Path tempCopyFile = rootDirPath.resolve("." + flowFile1.getAttribute(CoreAttributes.FILENAME.key()));
final Path copyFile = rootDirPath.resolve(flowFile1.getAttribute(CoreAttributes.FILENAME.key()));
if (!Files.exists(rootDirPath)) {
if (context.getProperty(CREATE_DIRS).asBoolean()) {
Files.createDirectories(rootDirPath);
} else {
flowFile1 = session.penalize(flowFile1);
session.transfer(flowFile1,REL_FAILURE);
logger.error("Penalizing {} and routing to 'failure' because the output directory {} does not exist and Processor is "
+ "configured not to create missing directories", new Object[]{flowFile1, rootDirPath});
return;
}
}
final Path dotCopyFile = tempCopyFile;
tempDotCopyFile = dotCopyFile;
Path finalCopyFile = copyFile;
final Path finalCopyFileDir = finalCopyFile.getParent();
if (Files.exists(finalCopyFileDir)) { // check if too many files already
final int numFiles = finalCopyFileDir.toFile().list().length;
if (numFiles >= 34) {
flowFile1 = session.penalize(flowFile1);
logger.warn("Penalizing {} and routing to 'failure' because the output directory {} has {} files, which exceeds the "
+ "configured maximum number of files", new Object[]{flowFile1, finalCopyFileDir, numFiles});
session.transfer(flowFile1,REL_FAILURE);
return;
}
}
if (Files.exists(finalCopyFile)) {
switch (conflictResponse) {
case REPLACE_RESOLUTION:
Files.delete(finalCopyFile);
logger.info("Deleted {} as configured in order to replace with the contents of {}", new Object[]{finalCopyFile, flowFile1});
break;
case IGNORE_RESOLUTION:
session.transfer(flowFile1, REL_SUCCESS);
logger.info("Transferring {} to success because file with same name already exists", new Object[]{flowFile1});
return;
case FAIL_RESOLUTION:
flowFile1 = session.penalize(flowFile1);
logger.warn("Penalizing {} and routing to failure as configured because file with the same name already exists", new Object[]{flowFile1});
session.transfer(flowFile1,REL_FAILURE);
return;
default:
break;
}
}
session.exportTo(flowFile1, dotCopyFile, false);
final String permissions = "-rwxrwx---";
if (permissions != null && !permissions.trim().isEmpty()) {
try {
String perms = stringPermissions(permissions);
if (!perms.isEmpty()) {
Files.setPosixFilePermissions(dotCopyFile, PosixFilePermissions.fromString(perms));
}
} catch (Exception e) {
logger.warn("Could not set file permissions to {} because {}", new Object[]{permissions, e});
}
}
boolean renamed = false;
for (int i = 0; i < 10; i++) { // try rename up to 10 times.
if (dotCopyFile.toFile().renameTo(finalCopyFile.toFile())) {
renamed = true;
break;// rename was successful
}
Thread.sleep(100L);// try waiting a few ms to let whatever might cause rename failure to resolve
}
if (!renamed) {
if (Files.exists(dotCopyFile) && dotCopyFile.toFile().delete()) {
logger.debug("Deleted dot copy file {}", new Object[]{dotCopyFile});
}
throw new ProcessException("Could not rename: " + dotCopyFile);
} else {
logger.info("Produced copy of {} at location {}", new Object[]{flowFile1, finalCopyFile});
}
/*session.getProvenanceReporter().send(flowFile, finalCopyFile.toFile().toURI().toString(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);*/
session.getProvenanceReporter().receive(flowFile1, files.toURI().toString(), importMillis);
//session.transfer(flowFile1, REL_SUCCESS);
session.remove(flowFile1);
} catch (final Throwable t) {
if (tempDotCopyFile != null) {
try {
Files.deleteIfExists(tempDotCopyFile);
} catch (final Exception e) {
logger.error("Unable to remove temporary file {} due to {}", new Object[]{tempDotCopyFile, e});
}
}
flowFile1 = session.penalize(flowFile1);
logger.error("Penalizing {} and transferring to failure due to {}", new Object[]{flowFile1, t});
session.transfer(flowFile1,REL_FAILURE);
}
}
if (!isScheduled()) { // if processor stopped, put the rest of the files back on the queue.
queueLock.lock();
try {
while (itr.hasNext()) {
final File nextFile = itr.next();
fileQueue.add(nextFile);
inProcess.remove(nextFile);
}
} finally {
queueLock.unlock();
}
}
} catch (IOException e1) {
e1.printStackTrace();
} catch (TransformerConfigurationException e1) {
e1.printStackTrace();
} catch (ParserConfigurationException e1) {
e1.printStackTrace();
} catch (XPathExpressionException e1) {
e1.printStackTrace();
} catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
session.commit();
}

Xstream - change ArrayList elements name

I'm trying to create XML report, that can be opened as xls table.
I have following output:
<Report>
<test>
<string>4.419</string>
<string>4.256</string>
</test>
</Report>
from this code:
/**
* declare arrays
*/
// ArrayList<String> test = new ArrayList<String>();
ArrayList<String> stats = new ArrayList<String>();
// ArrayList<String> count = new ArrayList<String>();
/**
*return array list with loading times
*/
public ArrayList launch() {
for (int i = 0; i < 2; i++) {
// ui.off();
// ui.on();
device.pressHome();
ui.openProgramInMenu("ON");
long TStart = System.currentTimeMillis();
ui.detectContactList();
long TStop = System.currentTimeMillis();
float res = TStop - TStart;
res /= 1000;
ui.log("[loading time]: " + res);
// ui.off();
test.add(i, "Loading time");
stats.add(i, Float.toString(res));
count.add(i, Integer.toString(i));
}
System.out.println(stats);
}
where rep.class has code:
public class ReportSettings {
public List<String> test = new ArrayList<String>();
public List<String> count = new ArrayList<String>();
public List<String> stats = new ArrayList<String>();
/**
* Test method
*/
public static void main(String[] args) {
ReportSettings rep = new ReportSettings();
rep.saveXML("report/data.xml");
// System.out.println(rep.test);
// rep = rep.loadXML("report/data.xml");
// System.out.println(rep.home);
System.out.println(rep.getXML());
}
public void createReport() {
ReportSettings rep = new ReportSettings();
rep.saveXML("report/data.xml");
}
public String getXML() {
XStream xstream = new XStream();
xstream.alias("Report", ReportSettings.class);
xstream.autodetectAnnotations(true);
return xstream.toXML(this);
}
public void saveXML(String filename) {
if (!filename.contains(".xml")) {
System.out.println("Error in saveReport syntax");
return;
}
String xml = this.getXML();
File f = new File(filename);
try {
FileOutputStream fo = new FileOutputStream(f);
fo.write(xml.getBytes());
fo.close();
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
}
public ReportSettings loadXML(String filename) {
if (!filename.endsWith(".xml")) {
System.out.println("Error in loadReport syntax!");
throw new RuntimeException("Error in loadReport syntax!");
}
File f = new File(filename);
XStream xstream = new XStream(new DomDriver());
xstream.alias("Report", ReportSettings.class);
xstream.autodetectAnnotations(true);
ReportSettings ort = (ReportSettings)xstream.fromXML(f);
return ort;
}
}
Finally I want to create table from 3 ArrayList, where {stats, count, test}*i. /n
How can I use Xstream.alias to change <strings> to <somethingAnother> in the XML file? I need to change them to stringOne and stringTwo as example.
You can use the ClassAliasMapper in Xstream to give the items in your collection a different tag when serializing to XML.
You add a block like this (for each collection: stats, count, test):
ClassAliasingMapper statsMapper = new ClassAliasingMapper(xstream.getMapper());
mapper.addClassAlias("somethingAnother", String.class);
xstream.registerLocalConverter(
InteractionSession.class,
"stats",
new CollectionConverter(mapper)
);

Searching Twitter with OAuth

Okay so I get an access token for Twitter each time I run my program...(It's tedious at the moment but I will find out how to make it a persistent store)... How do I go about using this access token so that when I search, I won't get a
"Rate limit exceeded. Clients may not make more than 150 requests per hour."
error?
It happens when I'm searching for the following: "https://api.twitter.com/1/users/show.json?screen_name=[screenName]"
Which is coded as :
BufferedReader ff = new BufferedReader( new InputStreamReader(ffUser.openConnection().getInputStream()));
In my code below:
public class UDC {
private static String term1;
private static String term2;
public static String PIN;
private static final String twitterSearch = "http://search.twitter.com/search.json?q=%23";
private static String rppPage = "&rpp=500&page=1";
private static final String ffGet = "https://api.twitter.com/1/users/show.json?screen_name=";
private static final String CONSUMER_KEY = "K7el7Fqu7UtcJv3A3ssOQ";
private static final String CONSUMER_SECRET = "w7ZX27ys58mafLYeivuA2POVe0gjhTIIUH26f2IM";
private static String entities = "&include_entities=true";
static Object[][] tableData = new Object[500][6];
static SearchResultC s = new SearchResultC();
static StringBuffer buff = new StringBuffer();
static StringBuffer buff1 = new StringBuffer();
public static void main (String args[]) throws Exception{
verifyURL v = new verifyURL();
Twitter twitter = new TwitterFactory().getInstance();
twitter.setOAuthConsumer(CONSUMER_KEY, CONSUMER_SECRET);
RequestToken requestToken = twitter.getOAuthRequestToken();
AccessToken accessToken = null; // = loadAccessToken(Integer.parseInt(args[0]));
//Twitter twitter = factory.getInstance);
//twitter.setOAuthConsumerKey(COMSUMER_KEY, COMSUMER_SECRET);
//twitter.setOAuthAccessToken(accessToken);
v.valURLText.setText(requestToken.getAuthorizationURL());
v.vFrame.setVisible(true);
int p = 0;
do {
//nothing
} while (v.vFrame.isVisible());
try {
if (PIN.length() > 0) {
accessToken = twitter.getOAuthAccessToken(requestToken, PIN);
} else {
accessToken = twitter.getOAuthAccessToken();
}
} catch (TwitterException te) {
if(401 == te.getStatusCode()) {
showErrorPane("Unable to get access code", "Error");
p = 1;
} else {
te.printStackTrace();
}
}
//storeAccessToken(twitter.verifyCredentials().getId(), accessToken);
if (p == 0) {
initComponents();
UDTFrame.setVisible(true);
} else {
System.exit(0);
}
}
#SuppressWarnings({ "static-access"})
private static void searchButtonMouseClicked(String t1, String t2) throws IOException {
if(t1.equals("") || t2.equals("") || t1.equals(t2))
{
showErrorPane("Invalid Search Terms", "Search Error");
}
else
{
s.getInitComponents();
clicked(t1, 0);
clicked(t2, 3);
s.SRTFrame.setVisible(true);
s.sTerm1Field.setText(t1);
s.sTerm2Field.setText(t2);
}
}
#SuppressWarnings("static-access")
public static void clicked(String term, int t){
UDTFrame.setVisible(false);
float follower;
float friends;
float ffRatio;
float DUA;
int statuses;
int day;
int year;
String month;
try {
URL searchURL1 = new URL (twitterSearch + term + rppPage);
//String searchURL = new String (twitterSearch + term + rppPage);
BufferedReader br = new BufferedReader( new InputStreamReader(searchURL1.openConnection().getInputStream()));
//OAuthRequest request = new OAuthRequest(Verb.POST, searchURL);
int c;
while ((c=br.read()) != -1) {
buff.append((char)c);
}
br.close();
/*******************************************************************************************/
/*******************************************************************************************/
/******************************** follower/friend ratio ************************************/
/*******************************************************************************************/
/*******************************************************************************************/
JSONObject js = new JSONObject(buff.toString());
JSONArray tweets = js.getJSONArray("results");
JSONObject tweet = new JSONObject();
for(int i=0; i < tweets.length(); i++) {
tweet = tweets.getJSONObject(i);
//System.out.println(tweet);
//user[i] = tweet.getString("from_user_name");
//System.out.println(tweet.getString("from_user_name"));
//System.out.println(user[i]);
String userName = tweet.getString("from_user");
//URL ffUser = new URL(ffGet + user[i] + entities);
URL ffUser = new URL(ffGet + userName + entities);
String ffUser1 = new String(ffGet + userName + entities);
BufferedReader ff = new BufferedReader( new InputStreamReader(ffUser.openConnection().getInputStream()));
OAuthRequest request = new OAuthRequest(Verb.POST, ffUser1);
int d, e = 0;
while((d = ff.read()) != -1) {
buff1.append((char)d);
e++;
}
ff.close();
JSONObject js1 = new JSONObject(buff1.toString());
//System.out.println(js1);
//JSONArray userData = new JSONArray(buff1.toString());
//JSONObject userData1;
//for(int j = 0; j < js1.length(); i++){
//userData1 = userData.getJSONObject(j);
follower = js1.getInt("followers_count");
friends = js1.getInt("friends_count");
ffRatio = friends/follower;
String createdDate = js1.getString("created_at");
statuses = js1.getInt("statuses_count");
String nameData = js1.getString("name");
String gen = gender(nameData);
//}
} catch (MalformedURLException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (JSONException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
I'm completely new to this OAuth and Access Tokens and all so any help will be much appreciated.
With
OAuthRequest request = new OAuthRequest(Verb.POST, ffUser1);
you are doing an unauthenticated request using the Scribe library (you never instantiated an OAuthService object which would have to be used to sign this request). So when you do this too often Twitter denies these requests.
So your problem here does indeed come from mixing Twitter4J and Scribe.

Categories

Resources