XML parser for android don't read the last child - java

I have a problem with my parser in my android phone!
here is the code for the XML
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<file>
<Staff>
<connection>wifi</connection>
<timestamp>20</timestamp>
<sport>0</sport>
</Staff>
<Staff>
<connection>3g</connection>
<timestamp>40</timestamp>
<sport>0</sport>
</Staff>
<Staff>
<connection>wifi</connection>
<timestamp>60</timestamp>
<sport>0</sport>
</Staff>
<Staff>
<connection>3g</connection>
<timestamp>80</timestamp>
<sport>0</sport>
</Staff>
</file>
and here is the parser code that i have
try {
InputStream filename = null;
Document obj_doc = null;
DocumentBuilderFactory doc_build_fact = null;
DocumentBuilder doc_builder = null;
filename = new FileInputStream("/sdcard/data.xml");
doc_build_fact = DocumentBuilderFactory.newInstance();
doc_builder = doc_build_fact.newDocumentBuilder();
System.out.println("readed data.xml");
obj_doc = doc_builder.parse(filename);
NodeList obj_nod_list = null;
if (null != obj_doc) {
org.w3c.dom.Element feed = obj_doc.getDocumentElement();
obj_nod_list = feed.getElementsByTagName("file");
}
Element root = obj_doc.getDocumentElement();
NodeList items = root.getElementsByTagName("Staff");
System.out.println("items "+items.getLength());
for (int i = 0; i < items.getLength(); i++) {
Node item = items.item(i);
NodeList properties = item.getChildNodes();
System.out.println("properties length "+item.getChildNodes().getLength());
// System.out.println("properties "+properties.getLength());
for (int j = 0; j < items.getLength(); j++) {
Node property = properties.item(j);
// System.out.println("properties "+property.getNodeName());
String name = property.getNodeName();
if (name.equalsIgnoreCase("connection")) {
// Store it where you want
connection.add(property.getFirstChild().getNodeValue());
System.out.println("connection "+connection);
// System.out.println("connection "+connection.get(i));
}
if (name.equalsIgnoreCase("timestamp")) {
int inttimestamp = Integer.parseInt(property.getFirstChild().getNodeValue());
timestamp.add(inttimestamp);
System.out.println("timestamp "+timestamp);
}
if (name.equalsIgnoreCase("sport")) {
int inttimestamp = Integer.parseInt(property.getFirstChild().getNodeValue());
capacity.add(inttimestamp);
System.out.println("capacity "+capacity);
}
}
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
when the parsing is done! it can't read the last child! which is sport! can you provide me a solution for it! Also when i change the xml and put the "sport" first, after it can't read the last node! thanks

Use jsoup html parser. Here is the official site. You can use it parsing html, xml etc. It's a tag based parser.

here is an example using the SAX parser it can help you
the parser class:
public class XMLParser extends DefaultHandler
{
// xml Tags name
private final String TENDER_TYPE = "TenderType";
private final String TRX_TYPE = "TransactionType";
private final String DATA_ELEM = "DataElement";
//xml Values
private final String NAME = "Name";
private final String VALUE = "Value";
private final String AMOUNT = "Amount";
private final String T_TYPE = "TenderType";
private final String CLRK_ID = "ClerkId";
private final String INV_NUM = "InvoiceNum";
private final String AUTH = "Authorization";
private final String ORIG_SEQ = "OriginalSequence";
private final String ORIG_REF = "OriginalReference";
private final String TAG = "Tag";
private final String DESCRIPTION = "Description";
//list for imported Config data
private ArrayList<TenderType> theTenderTypeList = null;
private TenderType currentTenderType = null;
private ArrayList<TransactionType> theTrxTypeList = null;
private TransactionType currentTrxType = null;
private ArrayList<DataElement> theDataElementList = null;
private DataElement currentDataElement = null;
#Override
public void startDocument() throws SAXException
{
super.startDocument();
if(theTenderTypeList == null)
{
theTenderTypeList = new ArrayList<TenderType>();
}
if(theTrxTypeList == null)
{
theTrxTypeList = new ArrayList<TransactionType>();
}
if(theDataElementList == null)
{
theDataElementList = new ArrayList<DataElement>();
}
}
#Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException
{
if (localName.equalsIgnoreCase(TRX_TYPE))
{
this.currentTrxType = new TransactionType(attributes.getValue(NAME),
attributes.getValue(VALUE),attributes.getValue(AMOUNT),
attributes.getValue(T_TYPE),attributes.getValue(CLRK_ID),
attributes.getValue(INV_NUM), attributes.getValue(AUTH),
attributes.getValue(ORIG_SEQ), attributes.getValue(ORIG_REF));
this.theTrxTypeList.add(currentTrxType);
}
else if (localName.equalsIgnoreCase(TENDER_TYPE))
{
this.currentTenderType = new TenderType(attributes.getValue(NAME),
attributes.getValue(VALUE));
theTenderTypeList.add(currentTenderType);
}
else if (localName.equalsIgnoreCase(DATA_ELEM))
{
this.currentDataElement = new DataElement(attributes.getValue(TAG),
attributes.getValue(DESCRIPTION));
theDataElementList.add(currentDataElement);
}
}
public ArrayList<TenderType> getTenderTypeList()
{
return theTenderTypeList;
}
public ArrayList<TransactionType> getTrxTypeList()
{
return theTrxTypeList;
}
public ArrayList<DataElement> getDataElementList()
{
return theDataElementList;
}
}
and then u can call it:
SAXParserFactory fabrique = SAXParserFactory.newInstance();
SAXParser parseur = null;
try
{
parseur = fabrique.newSAXParser();
}
catch (ParserConfigurationException e)
{
System.out.println("Parse Config Exception");
}
catch (SAXException e)
{
System.out.println("SAX Parse Exception");
}
handler = new XMLParser();
try
{
/* Parse Config File */
myFileCfg = new File(FolderPath + CFG_FILE_NAME);
parseur.parse(myFileCfg, handler);
}
catch (SAXException e)

I solved it! it was bad the XML file structure! After i changed it! it worked. thanks for your awnsers

Related

NIFI: how to update config data from custom processor

in my custom processor i need to update config file(it is placed in one of nifi folder, it is not original config ) based on data which i get from upstream connection but i can't get desired result what should i do?
is there any way i can controll upstream connection flowfile
destination i mean when i make debugging i use this function fileQueue.drainTo(file, batchSize) in this part "file " was null
here is one more thing i am interested in, on this line
session.commit() i always get errors that :"trnasfer operation isn't specified" and tanks to it whole operation is failed ?
it seems that when i try to make session.exportTo() operations it doesnt write anything in dotCopyFile, how should i fix it?
should i clean fileQueue and then make session.commit()?
FlowFile flowfile;
#Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();
final ArrayList value = new ArrayList<>();
flowfile = session.get();
if (flowfile == null) {
return;
}
value.add(flowfile.getAttribute("filename"));
session.remove(flowfile);
final File directory = new File(context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue());
final boolean keepingSourceFile = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
final ComponentLog logger = getLogger();
if (fileQueue.size() < 100) {
final long pollingMillis = context.getProperty(POLLING_INTERVAL).asTimePeriod(TimeUnit.MILLISECONDS);
if ((queueLastUpdated.get() < System.currentTimeMillis() - pollingMillis) && listingLock.tryLock()) {
try {
final Set<File> listing = performListing(directory, fileFilterRef.get(), context.getProperty(RECURSE).asBoolean().booleanValue());
queueLock.lock();
try {
listing.removeAll(inProcess);
if (!keepingSourceFile) {
listing.removeAll(recentlyProcessed);
}
fileQueue.clear();
fileQueue.addAll(listing);
queueLastUpdated.set(System.currentTimeMillis());
recentlyProcessed.clear();
if (listing.isEmpty()) {
context.yield();
}
} finally {
queueLock.unlock();
}
} finally {
listingLock.unlock();
}
}
}
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
final List<File> file = new ArrayList<>(batchSize);
queueLock.lock();
try {
fileQueue.drainTo(file, batchSize);
if (file.isEmpty()) {
return;
} else {
inProcess.addAll(file);
}
} finally {
queueLock.unlock();
}
//make xml parsing
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
try {
dBuilder = dbFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
try {
File f = file.get(0);
doc = dBuilder.parse(f);
} catch (IOException e) {
e.printStackTrace();
} catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
NodeList nList = doc.getElementsByTagName("localAttributes");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
start = eElement.getElementsByTagName("start").item(0).getTextContent();
startDate = eElement.getElementsByTagName("startDate").item(0).getTextContent();
endDate = eElement.getElementsByTagName("endDate").item(0).getTextContent();
patch = eElement.getElementsByTagName("patch").item(0).getTextContent();
runAs = eElement.getElementsByTagName("runAs").item(0).getTextContent();
makeVersion = eElement.getElementsByTagName("makeVersion").item(0).getTextContent();
///parameter = eElement.getElementsByTagName("parameter").item(0).getTextContent();
}
}
final ListIterator<File> itr = file.listIterator();
FlowFile flowFile1 = null;
try {
final Path directoryPath = directory.toPath();
while (itr.hasNext()) {
final File files = itr.next();
final Path filePath = files.toPath();
final Path relativePath = directoryPath.relativize(filePath.getParent());
String relativePathString = relativePath.toString() + "/";
if (relativePathString.isEmpty()) {
relativePathString = "./";
}
final Path absPath = filePath.toAbsolutePath();
final String absPathString = absPath.getParent().toString() + "/";
flowFile1 = session.create();
final long importStart = System.nanoTime();
flowFile1 = session.importFrom(filePath, keepingSourceFile, flowFile1);
final long importNanos = System.nanoTime() - importStart;
final long importMillis = TimeUnit.MILLISECONDS.convert(importNanos, TimeUnit.NANOSECONDS);
flowFile1 = session.putAttribute(flowFile1, CoreAttributes.FILENAME.key(), files.getName());
flowFile1 = session.putAttribute(flowFile1, CoreAttributes.PATH.key(), relativePathString);
flowFile1 = session.putAttribute(flowFile1, CoreAttributes.ABSOLUTE_PATH.key(), absPathString);
Map<String, String> attributes = getAttributesFromFile(filePath);
if (attributes.size() > 0) {
flowFile1 = session.putAllAttributes(flowFile1, attributes);
}
InputStream ffStream = session.read(flowFile1);
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(ffStream);
XPath xPath = XPathFactory.newInstance().newXPath();
XPathExpression myNodeList = (XPathExpression) xPath.compile("/localAttributes");
Node nodeGettingChanged = (Node) myNodeList.evaluate(xmlDocument, XPathConstants.NODE);
NodeList childNodes = nodeGettingChanged.getChildNodes();
boolean make=false;
for (int i = 0; i != childNodes.getLength(); ++i) {
Node child = childNodes.item(i);
if (!(child instanceof Element))
continue;
if(child.getNodeName().equals("start")){
String date;
for(int j=0;j<value.size();j++) {
if(value.get(j).length()>10){
date=value.get(j).substring(0,10);
}
else{
date=value.get(j);
}
if (date == child.getFirstChild().getTextContent()){
child.getFirstChild().setNodeValue(addOneDay(child.getFirstChild().getTextContent()));
make=true;
}
}
}
if(make){
if(child.getNodeName().equals("runAs")){
child.getFirstChild().setNodeValue("true");
}
}
}
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = null;
transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(xmlDocument);
String path = "C:/Users/user/Desktop/nifi-1.3.0/nifi-assembly/target/nifi-1.3.0-bin/nifi-1.3.0/1/conf.xml";
File f = new File(path);
StreamResult file1 = new StreamResult(f);
try {
transformer.transform(source, file1);
} catch (TransformerException e) {
e.printStackTrace();
}
session.write(flowFile1, new StreamCallback() {
#Override
public void process(InputStream inputStream, OutputStream outputStream) throws IOException {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = null;
try {
transformer = transformerFactory.newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DOMSource source = new DOMSource(xmlDocument);
ffStream.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
StreamResult result = new StreamResult(bos);
try {
transformer.transform(source, result);
} catch (TransformerException e) {
e.printStackTrace();
}
byte[] array = bos.toByteArray();
outputStream.write(array);
}
});
Path tempDotCopyFile = null;
try {
final Path rootDirPath = Paths.get("C://Users//user//Desktop//try2//nifi-1.3.0//1");
final Path tempCopyFile = rootDirPath.resolve("." + flowFile1.getAttribute(CoreAttributes.FILENAME.key()));
final Path copyFile = rootDirPath.resolve(flowFile1.getAttribute(CoreAttributes.FILENAME.key()));
if (!Files.exists(rootDirPath)) {
if (context.getProperty(CREATE_DIRS).asBoolean()) {
Files.createDirectories(rootDirPath);
} else {
flowFile1 = session.penalize(flowFile1);
session.transfer(flowFile1,REL_FAILURE);
logger.error("Penalizing {} and routing to 'failure' because the output directory {} does not exist and Processor is "
+ "configured not to create missing directories", new Object[]{flowFile1, rootDirPath});
return;
}
}
final Path dotCopyFile = tempCopyFile;
tempDotCopyFile = dotCopyFile;
Path finalCopyFile = copyFile;
final Path finalCopyFileDir = finalCopyFile.getParent();
if (Files.exists(finalCopyFileDir)) { // check if too many files already
final int numFiles = finalCopyFileDir.toFile().list().length;
if (numFiles >= 34) {
flowFile1 = session.penalize(flowFile1);
logger.warn("Penalizing {} and routing to 'failure' because the output directory {} has {} files, which exceeds the "
+ "configured maximum number of files", new Object[]{flowFile1, finalCopyFileDir, numFiles});
session.transfer(flowFile1,REL_FAILURE);
return;
}
}
if (Files.exists(finalCopyFile)) {
switch (conflictResponse) {
case REPLACE_RESOLUTION:
Files.delete(finalCopyFile);
logger.info("Deleted {} as configured in order to replace with the contents of {}", new Object[]{finalCopyFile, flowFile1});
break;
case IGNORE_RESOLUTION:
session.transfer(flowFile1, REL_SUCCESS);
logger.info("Transferring {} to success because file with same name already exists", new Object[]{flowFile1});
return;
case FAIL_RESOLUTION:
flowFile1 = session.penalize(flowFile1);
logger.warn("Penalizing {} and routing to failure as configured because file with the same name already exists", new Object[]{flowFile1});
session.transfer(flowFile1,REL_FAILURE);
return;
default:
break;
}
}
session.exportTo(flowFile1, dotCopyFile, false);
final String permissions = "-rwxrwx---";
if (permissions != null && !permissions.trim().isEmpty()) {
try {
String perms = stringPermissions(permissions);
if (!perms.isEmpty()) {
Files.setPosixFilePermissions(dotCopyFile, PosixFilePermissions.fromString(perms));
}
} catch (Exception e) {
logger.warn("Could not set file permissions to {} because {}", new Object[]{permissions, e});
}
}
boolean renamed = false;
for (int i = 0; i < 10; i++) { // try rename up to 10 times.
if (dotCopyFile.toFile().renameTo(finalCopyFile.toFile())) {
renamed = true;
break;// rename was successful
}
Thread.sleep(100L);// try waiting a few ms to let whatever might cause rename failure to resolve
}
if (!renamed) {
if (Files.exists(dotCopyFile) && dotCopyFile.toFile().delete()) {
logger.debug("Deleted dot copy file {}", new Object[]{dotCopyFile});
}
throw new ProcessException("Could not rename: " + dotCopyFile);
} else {
logger.info("Produced copy of {} at location {}", new Object[]{flowFile1, finalCopyFile});
}
/*session.getProvenanceReporter().send(flowFile, finalCopyFile.toFile().toURI().toString(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);*/
session.getProvenanceReporter().receive(flowFile1, files.toURI().toString(), importMillis);
//session.transfer(flowFile1, REL_SUCCESS);
session.remove(flowFile1);
} catch (final Throwable t) {
if (tempDotCopyFile != null) {
try {
Files.deleteIfExists(tempDotCopyFile);
} catch (final Exception e) {
logger.error("Unable to remove temporary file {} due to {}", new Object[]{tempDotCopyFile, e});
}
}
flowFile1 = session.penalize(flowFile1);
logger.error("Penalizing {} and transferring to failure due to {}", new Object[]{flowFile1, t});
session.transfer(flowFile1,REL_FAILURE);
}
}
if (!isScheduled()) { // if processor stopped, put the rest of the files back on the queue.
queueLock.lock();
try {
while (itr.hasNext()) {
final File nextFile = itr.next();
fileQueue.add(nextFile);
inProcess.remove(nextFile);
}
} finally {
queueLock.unlock();
}
}
} catch (IOException e1) {
e1.printStackTrace();
} catch (TransformerConfigurationException e1) {
e1.printStackTrace();
} catch (ParserConfigurationException e1) {
e1.printStackTrace();
} catch (XPathExpressionException e1) {
e1.printStackTrace();
} catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
session.commit();
}

How to locate or work with different locator in selemium webdrier frameowrk?

I am beginner in selenium webdriver, I am trying to work with different locator by reading excel sheet but in these it take only one data put it in one field by finding locator "id", and when it come to the second text field for this we are using locator by "Xpath" but its not taking. So, my question is how can I work with different locator also by don't using switch case if possible.
Below are my code:
public class MainClass {
private static final String BROWSER_PATH = "D:\\firefox.exe";
private static final String TEST_SUITE_PATH = "D:\\GmailTestSuite.xls";
private static final String OBJECT_REPOSITORY_PATH = "D:\\objectrepository.xls";
private static final String ADDRESS_TO_TEST = "https://www.gmail.com";
// other constants
private WebDriver driver;
private Properties properties;
/*private WebElement we;*/
public MainClass() {
File file = new File(BROWSER_PATH);
FirefoxBinary fb = new FirefoxBinary(file);
driver = new FirefoxDriver(fb, new FirefoxProfile());
driver.get(ADDRESS_TO_TEST);
}
public static void main(String[] args) throws IOException, BiffException {
MainClass main = new MainClass();
main.handleTestSuite();
}
private void handleTestSuite() throws BiffException, IOException {
ReadPropertyFile readConfigFile = new ReadPropertyFile();
properties = readConfigFile.loadPropertiess();
ExcelHandler testSuite = new ExcelHandler(TEST_SUITE_PATH, "Suite");
testSuite.columnData();
int rowCount = testSuite.rowCount();
System.out.println("Total Rows=" + rowCount);
for (int i = 1; i < rowCount; i++) {
String executable = testSuite.readCell(testSuite.getCell("Executable"), i);
System.out.println("Executable=" + executable);
if (executable.equalsIgnoreCase("y")) {
// exe. the process
String scenarioName = testSuite.readCell(testSuite.getCell("TestScenario"), i);
System.out.println("Scenario Name=" + scenarioName);
handleScenario(scenarioName);
}
}
}
private void handleScenario(String scenarioName) throws BiffException, IOException {
ExcelHandler testScenarios = new ExcelHandler(TEST_SUITE_PATH);
testScenarios.setSheetName("Login");
testScenarios.columnData();
int rowWorkBook1 = testScenarios.rowCount();
for (int j = 1; j < rowWorkBook1; j++) {
String framWork = testScenarios.readCell(testScenarios.getCell("FrameworkName"), j);
String operation = testScenarios.readCell(testScenarios.getCell("Operation"), j); // SendKey
String value = testScenarios.readCell(testScenarios.getCell("Value"), j);
System.out.println("FRMNameKK=" + framWork + ",Operation=" + operation +
",Value=" + value);
handleObjects(operation,value,framWork);
}
}
private void handleObjects(String operation,String value,String framWork) throws BiffException, IOException
{
System.out.println("HandleObject--> "+framWork);
ExcelHandler objectRepository = new ExcelHandler(OBJECT_REPOSITORY_PATH, "OR");
objectRepository.columnData();
int rowCount = objectRepository.rowCount();
System.out.println("Total Rows in hadleObject=" + rowCount);
for (int k = 1; k < rowCount; k++) {
String frameWorkName = objectRepository.readCell(objectRepository.getCell("FrameworkName"), k);
String ObjectName = objectRepository.readCell(objectRepository.getCell("ObjectName"), k);
String Locator = objectRepository.readCell(objectRepository.getCell("Locator"), k); // SendKey
System.out.println("FrameWorkNameV=" + frameWorkName +
",ObjectName=" + ObjectName + ",Locator=" + Locator);
if(framWork.equalsIgnoreCase(frameWorkName))
{
operateWebDriver(operation,Locator,value,ObjectName);
}
}
}
private void operateWebDriver(String operation,String Locator,String value, String objectName)
{
System.out.println("Operation execution in progress");
WebElement temp=getElement(Locator,objectName);
if (operation.equalsIgnoreCase("SendKey"))
{
temp.sendKeys(value);
}
if (operation.equalsIgnoreCase("Click"))
{
temp.click();
}
}
public WebElement getElement(String locator,String objectName)
{
WebElement temp = null;
if(locator.equalsIgnoreCase("id"))
{
temp = driver.findElement(By.id(objectName));
}else if(locator.equalsIgnoreCase("xpath")) {
temp = driver.findElement(By.xpath(objectName));
}
if(locator.equalsIgnoreCase("link"))
{
}
return temp;
}
}
You need the Reflection. See example in 2 methods below:
import java.lang.reflect.Method;
...
#Test
public void TestAmazon() {
driver = new ChromeDriver();
driver.navigate().to("http://www.amazon.com");
String locatorType = "id";
String locatorExpression = "twotabsearchtextbox";
By locator = createLocator(locatorType, locatorExpression);
WebElement textbox = driver.findElement(locator);
textbox.sendKeys("Star Wars: The Digital Movie Collection");
textbox.sendKeys(Keys.ENTER);
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
locatorType = "className";
locatorExpression = "nav-logo-link";
locator = createLocator(locatorType, locatorExpression);
driver.findElement(locator).click();
}
private By createLocator(String locatorType, final String locatorExpression) {
By locator = null;
Class<By> byClass = By.class;
Class[] argTypes = new Class[] { String.class };
try {
Method m = byClass.getDeclaredMethod(locatorType, argTypes);
try {
locator = (By)m.invoke(null,locatorExpression);
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalArgumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvocationTargetException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (NoSuchMethodException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return locator;
}

How to write a unit test for an XML parser I wrote in Java

The context is as follows:
I've got objects that represent Tweets (from Twitter). Each object has an id, a date and the id of the original tweet (if there was one).
I receive a file of tweets (where each tweet is in the format of 05/04/2014 12:00:00, tweetID, originalID and is in its' own line) and I want to save them as an XML file where each field has its' own tag.
I want to then be able to read the file and return a list of Tweet objects corresponding to the Tweets from the XML file.
After writing the XML parser that does this I want to test that it works correctly. I've got no idea how to test this.
The XML Parser:
public class TweetToXMLConverter implements TweetImporterExporter {
//there is a single file used for the tweets database
static final String xmlPath = "src/main/resources/tweetsDataBase.xml";
//some "defines", as we like to call them ;)
static final String DB_HEADER = "tweetDataBase";
static final String TWEET_HEADER = "tweet";
static final String TWEET_ID_FIELD = "id";
static final String TWEET_ORIGIN_ID_FIELD = "original tweet";
static final String TWEET_DATE_FIELD = "tweet date";
static File xmlFile;
static boolean initialized = false;
#Override
public void createDB() {
try {
Element tweetDB = new Element(DB_HEADER);
Document doc = new Document(tweetDB);
doc.setRootElement(tweetDB);
XMLOutputter xmlOutput = new XMLOutputter();
// display nice nice? WTF does that chinese whacko want?
xmlOutput.setFormat(Format.getPrettyFormat());
xmlOutput.output(doc, new FileWriter(xmlPath));
xmlFile = new File(xmlPath);
initialized = true;
} catch (IOException io) {
System.out.println(io.getMessage());
}
}
#Override
public void addTweet(Tweet tweet) {
if (!initialized) {
//TODO throw an exception? should not come to pass!
return;
}
SAXBuilder builder = new SAXBuilder();
try {
Document document = (Document) builder.build(xmlFile);
Element newTweet = new Element(TWEET_HEADER);
newTweet.setAttribute(new Attribute(TWEET_ID_FIELD, tweet.getTweetID()));
newTweet.setAttribute(new Attribute(TWEET_DATE_FIELD, tweet.getDate().toString()));
if (tweet.isRetweet())
newTweet.addContent(new Element(TWEET_ORIGIN_ID_FIELD).setText(tweet.getOriginalTweet()));
document.getRootElement().addContent(newTweet);
} catch (IOException io) {
System.out.println(io.getMessage());
} catch (JDOMException jdomex) {
System.out.println(jdomex.getMessage());
}
}
//break glass in case of emergency
#Override
public void addListOfTweets(List<Tweet> list) {
for (Tweet t : list) {
addTweet(t);
}
}
#Override
public List<Tweet> getListOfTweets() {
if (!initialized) {
//TODO throw an exception? should not come to pass!
return null;
}
try {
SAXBuilder builder = new SAXBuilder();
Document document;
document = (Document) builder.build(xmlFile);
List<Tweet> $ = new ArrayList<Tweet>();
for (Object o : document.getRootElement().getChildren(TWEET_HEADER)) {
Element rawTweet = (Element) o;
String id = rawTweet.getAttributeValue(TWEET_ID_FIELD);
String original = rawTweet.getChildText(TWEET_ORIGIN_ID_FIELD);
Date date = new Date(rawTweet.getAttributeValue(TWEET_DATE_FIELD));
$.add(new Tweet(id, original, date));
}
return $;
} catch (JDOMException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}
Some usage:
private TweetImporterExporter converter;
List<Tweet> tweetList = converter.getListOfTweets();
for (String tweetString : lines)
converter.addTweet(new Tweet(tweetString));
How can I make sure the the XML file I read (that contains tweets) corresponds to the file I receive (in the form stated above)?
How can I make sure the tweets I add to the file correspond to the ones I tried to add?
Assuming that you have the following model:
public class Tweet {
private Long id;
private Date date;
private Long originalTweetid;
//getters and seters
}
The process would be the following:
create an isntance of TweetToXMLConverter
create a list of Tweet instances that you expect to receive after parsing the file
feed the converter the list you generated
compare the list received by parsing the list and the list you initiated at the start of the test
public class MainTest {
private TweetToXMLConverter converter;
private List<Tweet> tweets;
#Before
public void setup() {
Tweet tweet = new Tweet(1, "05/04/2014 12:00:00", 2);
Tweet tweet2 = new Tweet(2, "06/04/2014 12:00:00", 1);
Tweet tweet3 = new Tweet(3, "07/04/2014 12:00:00", 2);
tweets.add(tweet);
tweets.add(tweet2);
tweets.add(tweet3);
converter = new TweetToXMLConverter();
converter.addListOfTweets(tweets);
}
#Test
public void testParse() {
List<Tweet> parsedTweets = converter.getListOfTweets();
Assert.assertEquals(parsedTweets.size(), tweets.size());
for (int i=0; i<parsedTweets.size(); i++) {
//assuming that both lists are sorted
Assert.assertEquals(parsedTweets.get(i), tweets.get(i));
};
}
}
I am using JUnit for the actual testing.

Parsing xml with & < > " ' by java DOM parser

Good day ppl.
I have class:
public class XmlModifier {
private DocumentBuilderFactory docFactory = null;
private DocumentBuilder docBuilder = null;
private Document document = null;
private TransformerFactory transformFactory = null;
private Transformer transform = null;
private DOMSource source = null;
private StreamResult streamRes = null;
private boolean exepDocBuilderAlarm = true;
public XmlModifier() {
this.docFactory = DocumentBuilderFactory.newInstance();
try {
this.docBuilder = docFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
this.exepDocBuilderAlarm = false;
e.printStackTrace();
}
}
public void prepareXMLMessage(File file, String userName, String userPassword, String reqId, Integer NUMBER_OF_MSG_SENDS) {
if (exepDocBuilderAlarm != false) {
try {
document = docBuilder.parse(file);
setElementValues("si:sourceInfo", addElementsToArray("si:reqId,si:reqTag"), addElementsToArray(reqId + "," + reqId), NUMBER_OF_MSG_SENDS);
setElementValues("usr:user", addElementsToArray("usr:id,usr:password"), addElementsToArray(userName + "," + userPassword), NUMBER_OF_MSG_SENDS);
setElementValues("meth:method", addElementsToArray("meth:id,meth:tag"), addElementsToArray(reqId + "," + reqId), NUMBER_OF_MSG_SENDS);
setElementValues("tb:tradeField", addElementsToArray("tb:value"), addElementsToArray(reqId), NUMBER_OF_MSG_SENDS);
this.transformFactory = TransformerFactory.newInstance();
this.transform = transformFactory.newTransformer();
this.source = new DOMSource(document);
this.streamRes = new StreamResult(file);
this.transform.transform(source, streamRes);
System.out.println("Done to execute XmlModifier");
} catch (Exception e) {
e.printStackTrace();
} finally {
this.docFactory = null;
this.docBuilder = null;
this.transformFactory = null;
this.transform = null;
this.source = null;
this.streamRes = null;
}
}
}
private void setElementValues(String rootElement, String[] childElements, String[] childElementsValues, Integer msgIDIncrement) {
Node nodeRootElement = document.getElementsByTagName(rootElement).item(0);
NodeList childElementlist = nodeRootElement.getChildNodes();
for (int i = 0; i < childElements.length; i++) {
for (int z = 0; z < childElementlist.getLength(); z++) {
Node node = childElementlist.item(z);
if (childElements[i].equals(node.getNodeName())) {
node.setTextContent(childElementsValues[i]);
}
}
}
}
private String[] addElementsToArray(String elements) {
String[] theArray = null;
theArray = elements.split(",");
return theArray;
}
}
On input I have xml which is not well formed sometimes. The problem of DOM parser is that in my point of view DOM parser parsing whole xml from the beggining which my have special symbols like < > & " ' is not good for me.
How can I on input to prepareXMLMessage(..) provide xml with < > & " ', then in prepareXMLMessage(..) parse it with no problems(change some values inside elements) and than as output provide xml with < > & " ' back???
Thank you for help.

Do Not Crawl certain page in a particular link(exclude certain url from crawling)

This is the below code in my MyCrawler.java and it is crawling all those links that I have provided in href.startsWith but suppose If I do not want to crawl this particular page http://inv.somehost.com/people/index.html then how can I do this in my code..
public MyCrawler() {
}
public boolean shouldVisit(WebURL url) {
String href = url.getURL().toLowerCase();
if (href.startsWith("http://www.somehost.com/") || href.startsWith("http://inv.somehost.com/") || href.startsWith("http://jo.somehost.com/")) {
//And If I do not want to crawl this page http://inv.somehost.com/data/index.html then how it can be done..
return true;
}
return false;
}
public void visit(Page page) {
int docid = page.getWebURL().getDocid();
String url = page.getWebURL().getURL();
String text = page.getText();
List<WebURL> links = page.getURLs();
int parentDocid = page.getWebURL().getParentDocid();
try {
URL url1 = new URL(url);
System.out.println("URL:- " +url1);
URLConnection connection = url1.openConnection();
Map responseMap = connection.getHeaderFields();
Iterator iterator = responseMap.entrySet().iterator();
while (iterator.hasNext())
{
String key = iterator.next().toString();
if (key.contains("text/html") || key.contains("text/xhtml"))
{
System.out.println(key);
// Content-Type=[text/html; charset=ISO-8859-1]
if (filters.matcher(key) != null){
System.out.println(url1);
try {
final File parentDir = new File("crawl_html");
parentDir.mkdir();
final String hash = MD5Util.md5Hex(url1.toString());
final String fileName = hash + ".txt";
final File file = new File(parentDir, fileName);
boolean success =file.createNewFile(); // Creates file crawl_html/abc.txt
System.out.println("hash:-" + hash);
System.out.println(file);
// Create file if it does not exist
// File did not exist and was created
FileOutputStream fos = new FileOutputStream(file, true);
PrintWriter out = new PrintWriter(fos);
// Also could be written as follows on one line
// Printwriter out = new PrintWriter(new FileWriter(args[0]));
// Write text to file
Tika t = new Tika();
String content= t.parseToString(new URL(url1.toString()));
out.println("===============================================================");
out.println(url1);
out.println(key);
//out.println(success);
out.println(content);
out.println("===============================================================");
out.close();
fos.flush();
fos.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TikaException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// http://google.com
}
}
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("=============");
}
And this is my Controller.java code from where MyCrawler is getting called..
public class Controller {
public static void main(String[] args) throws Exception {
CrawlController controller = new CrawlController("/data/crawl/root");
controller.addSeed("http://www.somehost.com/");
controller.addSeed("http://inv.somehost.com/");
controller.addSeed("http://jo.somehost.com/");
controller.start(MyCrawler.class, 20);
controller.setPolitenessDelay(200);
controller.setMaximumCrawlDepth(2);
}
}
Any suggestions will be appreciated..
How about adding a property to tell which urls you want to exclude.
Add to your exclusions list all the pages that you don't want them to get crawled.
Here is an example:
public class MyCrawler extends WebCrawler {
List<Pattern> exclusionsPatterns;
public MyCrawler() {
exclusionsPatterns = new ArrayList<Pattern>();
//Add here all your exclusions using Regular Expresssions
exclusionsPatterns.add(Pattern.compile("http://investor\\.somehost\\.com.*"));
}
/*
* You should implement this function to specify
* whether the given URL should be visited or not.
*/
public boolean shouldVisit(WebURL url) {
String href = url.getURL().toLowerCase();
//Iterate the patterns to find if the url is excluded.
for (Pattern exclusionPattern : exclusionsPatterns) {
Matcher matcher = exclusionPattern.matcher(href);
if (matcher.matches()) {
return false;
}
}
if (href.startsWith("http://www.ics.uci.edu/")) {
return true;
}
return false;
}
}
In this example we are telling that all urls that start with http://investor.somehost.com should not be crawled.
So these wont be crawled:
http://investor.somehost.com/index.html
http://investor.somehost.com/something/else
I recommend you reading about regular expresions.

Categories

Resources