Problems finding correct table - java
i'm trying to scrape a site using Jaunt ( https://ravit.is.fi/hevoset/1 ) and I'm having problems finding the correct table element to parse this table (in red: https://i.imgur.com/JWNByHR.png )
From the html, I assumed the correct element would be < table border=\"0\" cellpadding=\"3\" cellspacing=\"1\">
but the table marked in green also uses the same element so what would be the way to "choose" the correct table? Been trying tons of things to no avail but as I am pretty new to java, html and coding in general, I'm most likely missing something obvious
Also, I tried putting the data from the other table to the xls table but everything went to same cell so what do you need to do so it would look like this: https://i.imgur.com/2TF4mO4.png ?
Thank you in advance
public class JauntTesti{
public static void main(String[] args){
int sivu = 1;
while (true) {
try{
UserAgent userAgent = new UserAgent();
if (sivu <= 1) {
userAgent.visit("https://ravit.is.fi/hevoset/" + sivu);
String title = userAgent.doc.findFirst("<title>").getChildText(); //hakee ekan löytyvän otsikon stringiin title
System.out.println("\n" + sivu);
Element body = userAgent.doc.findFirst("<body>");
Element strong = body.findEach("<strong>");
Element strong2 = userAgent.doc.findEach("<td>");
Element strong3 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(1);
Element strong4 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(1).getElement(1);
Element strong5 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(2).getElement(1);
Element strong6 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(3).getElement(1);
Element strong7 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(4).getElement(1);
Element strong8 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(5).getElement(1);
Element test1 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0);
Element test2 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(1).getElement(0);
Element test3 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(2).getElement(0);
Element test4 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(3).getElement(0);
Element test5 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(4).getElement(0);
Element test6 = strong2.getElement(0).getElement(0).getElement(1).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(0).getElement(5).getElement(0);
String nimi = strong3.innerText();
String laji = strong4.innerText();
String sukupuoli = strong5.innerText();
String ika = strong6.innerText();
String valmentaja = strong7.innerText();
String omistaja = strong8.innerText();
while (true) {
if (test4.innerHTML().equals("<strong>IKÄ:</strong> ")){
ika = strong6.innerText();
break;
}
ika = " ";
break;
}
while (true) {
if (test4.innerHTML().equals("<strong>VALMENTAJA:</strong> ")){
valmentaja = strong6.innerText();
break;
}
if (test5.innerHTML().equals("<strong>VALMENTAJA:</strong> ")){
valmentaja = strong7.innerText();
break;
}
valmentaja = "-1";
break;
}
while (true) {
if (test4.innerHTML().equals("<strong>OMISTAJA:</strong> ")){
omistaja = strong6.innerText();
break;
}
if (test5.innerHTML().equals("<strong>OMISTAJA:</strong> ")){
omistaja = strong7.innerText();
break;
}
if (test6.innerHTML().equals("<strong>OMISTAJA:</strong> ")){
omistaja = strong8.innerText();
break;
}
omistaja = "-1";
break;
}
Table taulukko2 = userAgent.doc.getTable("<table border=\"0\" cellpadding=\"3\" cellspacing=\"1\">");
Elements taul1 = taulukko2.getCol(0);
for(Element element : taul1) System.out.println(taul1.innerText());
ika = ika.replace(" v","");
//int ikav = Integer.parseInt(ika);
System.out.println("Nimi: " + nimi);
System.out.println("Laji: " + laji);
System.out.println("Sukupuoli: " + sukupuoli);
System.out.println("Ikä: " + ika);
System.out.println("Valmentaja: " + valmentaja);
System.out.println("Omistaja: " + omistaja);
try {
String filename = "C:/sheets/" + sivu + ".xls";
HSSFWorkbook workbook = new HSSFWorkbook();
HSSFSheet sheet = workbook.createSheet("FirstSheet");
sheet.setColumnWidth(0, 5000);
sheet.setColumnWidth(1, 5000);
sheet.setColumnWidth(2, 3000);
sheet.setColumnWidth(3, 2000);
sheet.setColumnWidth(4, 4000);
sheet.setColumnWidth(5, 8000);
HSSFRow rowhead = sheet.createRow((short)0);
rowhead.createCell(0).setCellValue("NIMI");
rowhead.createCell(1).setCellValue("LAJI");
rowhead.createCell(2).setCellValue("SUKUPUOLI");
rowhead.createCell(3).setCellValue("IKÄ");
rowhead.createCell(4).setCellValue("VALMENTAJA");
rowhead.createCell(5).setCellValue("OMISTAJA");
//rowhead.createCell(6).setCellValue(taul1.innerText());
HSSFRow row = sheet.createRow((short)1);
row.createCell(0).setCellValue(nimi);
row.createCell(1).setCellValue(laji);
row.createCell(2).setCellValue(sukupuoli);
row.createCell(3).setCellValue(ika);
row.createCell(4).setCellValue(valmentaja);
row.createCell(5).setCellValue(omistaja);
FileOutputStream fileOut = new FileOutputStream(filename);
workbook.write(fileOut);
fileOut.close();
workbook.close();
} catch ( Exception ex ) {
System.out.println(ex);
}
sivu++;
} else {
break;
}
}
catch(JauntException e){
System.err.println(e);
}
}
}
}
With the univocity-html-parser, you can get all details from all tables. Not sure how you need to organize your data, but this should give you some guidance:
HtmlEntityList entityList = new HtmlEntityList();
HtmlEntitySettings person = entityList.configureEntity("person");
addFields(person, "NIMI", "LAJI", "SUKUPUOLI", "IKÄ", "VALMENTAJA", "OMISTAJA");
//not sure if you need the fields under "URAN TILASTOT", but here we go:
addFields(person, "STARTIT", "VOITOT", "2. SIJAT", "3. SIJAT", "VOITTOSUMMA");
//also not not sure if you need the fields under "Tilastot, kun kenkiä riisuttu pois", but here we go again:
addExactFields(person, "ȻȻ, ȻC tai CȻ", "ȻȻ", "ȻC", "CȻ", "ENNÄTYSAJAT", "RYHMÄLÄHTÖ", "TASOITUSAJO");
//I have no clue what the tables mean, so I'm calling them "table 1" and "table 2"
HtmlEntitySettings table1 = entityList.configureEntity("table1");
captureColumns(table1, "VUOSI", "STARTIT", "VOITOT", "2. SIJAT", "3. SIJAT", "VOITTOSUMMA", "RYHMÄ", "TASOITUS");
HtmlEntitySettings table2 = entityList.configureEntity("table2");
captureColumnsInLastTable(table2, "R", "PVM", "L", "R-NRO", "MATKA", "S", "KMA", "HYL", "KERR.", "PALK.", "OHJ.", "VALM.");
HtmlParser parser = new HtmlParser(entityList);
Results<HtmlParserResult> results = parser.parse(new UrlReaderProvider("https://ravit.is.fi/hevoset/1"));
printResult(results.get("person"));
printResult(results.get("table1"));
printResult(results.get("table2"));
Which uses the following methods:
private void addFields(HtmlEntitySettings entity, String... labels) {
for (String label : labels) {
entity.addField(label).match("td")
.withText(label)
.not().classes("heppatilastohead")
.matchNext("td").getText();
}
}
private void addExactFields(HtmlEntitySettings entity, String... labels) {
for (String label : labels) {
entity.addField(label).match("td").withExactText(label).matchNext("td").getText();
}
}
private void captureColumns(HtmlEntitySettings entity, String... headers) {
for (String header : headers) {
entity.addField(header)
.match("td")
.underHeader("td").withExactText(header)
.getText();
}
}
private void captureColumnsInLastTable(HtmlEntitySettings entity, String... headers) {
for (String header : headers) {
entity.addField(header)
.match("form")
.match("tr").not().at(2)
.match("td")
.underHeader("td").withExactText(header)
.getText();
}
}
private void printResult(HtmlParserResult result) {
System.out.println("\nValues of [" + result.getEntityName() + "]");
for (HtmlRecord record : result.iterateRecords()) {
System.out.println(record.fillFieldMap(new LinkedHashMap<String, String>()));
}
}
The output of this code is:
Values of [person]
{NIMI=Bernard Gazeau, LAJI=Lämminverinen, SUKUPUOLI=Ruuna, IKÄ=18 v, VALMENTAJA=Hannele Haapala, OMISTAJA=HaapalaHannele, Mouhijärvi, STARTIT=6, VOITOT=0, 2. SIJAT=0, 3. SIJAT=0, VOITTOSUMMA=680 €, ȻȻ, ȻC tai CȻ=0: 0-0-0, ȻȻ=0: 0-0-0, ȻC=0: 0-0-0, CȻ=0: 0-0-0, ENNÄTYSAJAT=null, RYHMÄLÄHTÖ=null, TASOITUSAJO=20,1 ke}
Values of [table1]
{VUOSI=2009, STARTIT=1, VOITOT=0, 2. SIJAT=0, 3. SIJAT=0, VOITTOSUMMA=140, RYHMÄ=null, TASOITUS=20,1 ke}
{VUOSI=2008, STARTIT=3, VOITOT=0, 2. SIJAT=0, 3. SIJAT=0, VOITTOSUMMA=420, RYHMÄ=null, TASOITUS=21,5 ke}
{VUOSI=2006, STARTIT=2, VOITOT=0, 2. SIJAT=0, 3. SIJAT=0, VOITTOSUMMA=120, RYHMÄ=null, TASOITUS=22,2 ke}
{VUOSI=YHT, STARTIT=6, VOITOT=0, 2. SIJAT=0, 3. SIJAT=0, VOITTOSUMMA=680, RYHMÄ=null, TASOITUS=20,1 ke}
Values of [table2]
{R=T, PVM=12.05.09, L=1, R-NRO=5, MATKA=2120, S=p, KMA=null, HYL=p, KERR.=0,0, PALK.=0, OHJ.=M Forss, VALM.=Haapala}
{R=TK, PVM=10.04.09, L=2, R-NRO=3, MATKA=2120, S=4, KMA=20,1, HYL=null, KERR.=25,6, PALK.=140, OHJ.=M Forss, VALM.=Haapala}
{R=TK, PVM=31.10.08, L=3, R-NRO=6, MATKA=2120, S=4, KMA=22,7, HYL=null, KERR.=104,2, PALK.=240, OHJ.=H Hell, VALM.=Haapala}
{R=T, PVM=04.03.08, L=2, R-NRO=10, MATKA=2100, S=8, KMA=21,5, HYL=null, KERR.=99,6, PALK.=100, OHJ.=H Hell, VALM.=Haapala}
{R=P, PVM=17.02.08, L=10, R-NRO=1, MATKA=2100, S=5, KMA=23,6, HYL=null, KERR.=96,1, PALK.=80, OHJ.=H Hell, VALM.=Haapala}
{R=T, PVM=15.01.08, L=KL1, R-NRO=3, MATKA=2120, S=kl, KMA=22,4, HYL=null, KERR.=0,0, PALK.=0, OHJ.=H Hell, VALM.=Haapala}
{R=T, PVM=18.12.07, L=KL1, R-NRO=1, MATKA=2120, S=kl hlo, KMA=25,2, HYL=hlo, KERR.=0,0, PALK.=0, OHJ.=H Hell, VALM.=Haapala}
{R=F, PVM=15.01.06, L=2, R-NRO=1, MATKA=2140, S=5, KMA=22,2, HYL=null, KERR.=21,4, PALK.=120, OHJ.=H Kamppuri, VALM.=Haapala}
{R=F, PVM=08.01.06, L=1, R-NRO=1, MATKA=2140, S=8, KMA=22,9, HYL=null, KERR.=4,0, PALK.=0, OHJ.=Ha Korpi, VALM.=Haapala}
Hope this can be useful to you.
Disclosure: I'm the author of this library. It's commercial closed source but it can save you a lot of development time.
Related
Rally Java: Duplicate test case getting created
I have built a Rally dependency, which auto creates test case, folder in Test Plan. While creating test case it checks first if there any any existing test case with same name, else it creates new test case. This was working while total test case size was small, while the test case size increased, i am seeing duplicate test cases are created. So I made thread to wait for few seconds (Thread.sleep(8000)) after checking existing scenarios and then creating new scenario. It works by this way. Is there better way to handle & implement this to handle any size of test case. Please advice. String tcName = rallyMethods.getTestScenarios(parentFolder, scenarioName); Thread.sleep(8000); if (tcName == null) { rallyMethods.createTestCase(parentFolder, scenarioName); Thread.sleep(8000); } else { rallyMethods.updateTestCase(parentFolder, scenarioName); Thread.sleep(8000); } public String getTestScenarios(String parentFolderName, String ScenarioName) throws Throwable { String sName = null; String pFolder; QueryRequest testCaseRequest = new QueryRequest("TestCase"); testCaseRequest.setLimit(Integer.MAX_VALUE); testCaseRequest.setPageSize(Integer.MAX_VALUE); testCaseRequest.setFetch(new Fetch("FormattedID", "Name", "Workspace", "Project", "TestFolder")); testCaseRequest.setQueryFilter(new QueryFilter("Name", "=", ScenarioName)); testCaseRequest.setWorkspace(WORKSPACE_ID); testCaseRequest.setProject(PROJECT_ID); QueryResponse testCaseQueryResponse = query(testCaseRequest); int testCaseCount = testCaseQueryResponse.getTotalResultCount(); // System.out.println("TestCaseCount:" + testCaseCount); for (int i = 0; i < testCaseCount; i++) { JsonObject scenarioObj = testCaseQueryResponse.getResults().get(i).getAsJsonObject(); String scenarioName = String.valueOf(scenarioObj.get("Name").getAsString()); JsonElement pFolderObj = scenarioObj.get("TestFolder"); if (!(pFolderObj.isJsonNull())) { JsonObject tFolderObj = scenarioObj.get("TestFolder").getAsJsonObject(); pFolder = String.valueOf(tFolderObj.get("Name").getAsString()); if (parentFolderName.equalsIgnoreCase(pFolder)) { sName = scenarioName; logger.info("Test Scenarios identified in Rally: " + sName); } else { logger.info("Scenario, " + ScenarioName + " not found, New Scenario will be created in Rally"); } } } return sName; } public void createTestCase(String parentFolderName, String testCaseName) throws Throwable { String tcName = null; String userID = readUser(); // Query Child Folders: QueryRequest testFolderRequest = new QueryRequest("TestFolder"); testFolderRequest.setFetch(new Fetch("Name", "Workspace", "Project")); testFolderRequest.setQueryFilter(new QueryFilter("Name", "=", parentFolderName)); testFolderRequest.setWorkspace(WORKSPACE_ID); testFolderRequest.setProject(PROJECT_ID); QueryResponse testFolderQueryResponse = query(testFolderRequest); int folderCount = testFolderQueryResponse.getTotalResultCount(); for (int i = 0; i < folderCount; i++) { String testFolderRef = testFolderQueryResponse.getResults().get(i).getAsJsonObject().get("_ref").getAsString(); JsonObject testFolderObj = testFolderQueryResponse.getResults().get(i).getAsJsonObject(); String pFolder = String.valueOf(testFolderObj.get("Name").getAsString()); if (pFolder.equalsIgnoreCase(parentFolderName)) { //System.out.println("Creating a test case..."); JsonObject newTC = new JsonObject(); newTC.addProperty("Name", testCaseName); newTC.addProperty("Workspace", WORKSPACE_ID); newTC.addProperty("Project", PROJECT_ID); newTC.addProperty("Description", "Selenium Automated TestCase"); newTC.addProperty("TestFolder", testFolderRef); newTC.addProperty("Method", "Automated"); newTC.addProperty("Type", "Functional"); if (!(userID == null)) { newTC.addProperty("Owner", userID); } CreateRequest createRequest = new CreateRequest("testcase", newTC); CreateResponse createResponse = create(createRequest); if (createResponse.wasSuccessful()) { JsonObject tcObj = createResponse.getObject(); tcName = String.valueOf(tcObj.get("Name").getAsString()); logger.info("Created test scenario name is: " + tcName); } else { String[] createErrors; createErrors = createResponse.getErrors(); logger.info("Error while creating test scenario below parent folder!"); for (int j = 0; j < createErrors.length; j++) { System.out.println(createErrors[j]); logger.info(createErrors[j]); } } } } }
Hmmm... I'm not too familiar with the Java REST toolkit, but I can't think of a reason why a larger set of test cases in the workspace would cause the query to fail like that. Did you try checking testCaseQueryResponse.wasSuccessful()? If it returns false, can you see what the error is? testCaseQueryResponse.getErrors() My first thoughts are that you should provide a reasonable value for the limit and pageSize parameters, rather than passing Integer.MAX_VALUE. And second, rather than checking if the returned test cases are in the specified parent folder, you should include a query filter to filter the test cases results on TestFolder.Name = parentFolderName. Then you should only be expecting either 1 or 0 results returned (assuming that you're expecting all test cases within a test folder to have unique names).
I'm getting the values from the table and while comparing I'm getting words split completely
System.setProperty("webdriver.chrome.driver", "C:\\Users\\Testing\\Downloads\\chromedriver_win32\\chromedriver.exe"); driver = new ChromeDriver(); driver.navigate().to("https://jpetstore.cfapps.io/catalog"); driver.findElement(By.xpath("//a[contains(text(),'Sign In')]")).click(); driver.findElement(By.name("username")).sendKeys("Testing6738788"); driver.findElement(By.name("password")).sendKeys("test#123"); driver.findElement(By.id("login")).click(); driver.findElement(By.xpath("//div[#id='SidebarContent']/a[contains(#href,'FISH')]/img")).click(); driver.findElement(By.xpath("//td[contains(text(),'Angelfish')]//preceding-sibling::td//a")).click(); List<WebElement> tablelist = driver.findElements(By.xpath("//div[#id='Catalog']//tr")); for(int i = 0; i < tablelist.size(); i++) { String gotvalues = tablelist.get(i).getText(); System.out.println("Values got from the table " +gotvalues); // Here im using split function but no luck String[] splitword = gotvalues.split(" "); for(String words : splitword) { System.out.println("Got single words from the split " + words); // I want to compare the Large Angelfish value from the output if(words.equalsIgnoreCase("Large Angelfish")) { System.out.println("Element present " + words); } } } Words should be split as "Item ID" -EST-1. I'm facing an issue with the description. The complete word is not getting displayed. How to write code to get item ID, product ID, and description?
Without String Array also you can verify.Try this code see if this help.Take input from keyboard.You have to type both values in console Like EST-1 then Enter and Then Large Angelfish and its compared later.Try Now. Scanner scan= new Scanner(System.in); String textID= scan.nextLine(); //Enter ID Here String textDesc= scan.nextLine();//Enter Desc Here System.setProperty("webdriver.chrome.driver", "C:\\Users\\Testing\\Downloads\\chromedriver_win32\\chromedriver.exe"); driver = new ChromeDriver(); driver.navigate().to("https://jpetstore.cfapps.io/catalog"); driver.findElement(By.xpath("//a[contains(text(),'Sign In')]")).click(); driver.findElement(By.name("username")).sendKeys("Testing6738788"); driver.findElement(By.name("password")).sendKeys("test#123"); driver.findElement(By.id("login")).click(); driver.findElement(By.xpath("//div[#id='SidebarContent']/a[contains(#href,'FISH')]/img")).click(); driver.findElement(By.xpath("//td[contains(text(),'Angelfish')]//preceding-sibling::td//a")).click(); List<WebElement> tablelist = driver.findElements(By.xpath("//div[#id='Catalog']//tr/td")); System.out.println(tablelist.size()); for(int i=0;i<tablelist.size();i++) { String gotvalues = tablelist.get(0).getText(); String gotvaluesdesc = tablelist.get(2).getText(); // System.out.println("Values got from the table " +gotvalues ); if(gotvalues.trim().equalsIgnoreCase(textID) && gotvaluesdesc.trim().equalsIgnoreCase(textDesc)) { System.out.println("Element present ID: " + gotvalues + " Desc :" + gotvaluesdesc); break; }
As you want to compare the table data, you need to modify your xPath little to fetch the data effectively so that you can avoid the splitting part and you can compare the data easily. Try the below code : String xPath = "//div[#id='Catalog']//tr"; List<WebElement> tableList = driver.findElements(By.xpath(xPath)); System.out.println("Item ID\t\tProduct ID\t\tDescription\t\tList Price\t\tOther"); System.out.println("--------------------------------------------------------------------------------------------------"); for(int i=1;i<tableList.size();i++) { // Below line fetches/stores each table data as column wise List<WebElement> listData = driver.findElements(By.xpath(xPath+"["+(i+1)+"]/td")); for(int j=0;j<listData.size();j++) { // Printing the column data System.out.print(listData.get(j).getText()+"\t\t"); } System.out.println(); } // As per the above output, description is in the 3rd(2nd index) column so you can fetch that with the index number 2. for(int i=1;i<tableList.size();i++) { List<WebElement> listData = driver.findElements(By.xpath(xPath+"["+(i+1)+"]/td")); if(listData.get(2).getText().trim().equals("Large Angelfish")) { System.out.println("=> 'Large Angelfish' is Matching..."); } if(listData.get(2).getText().trim().equals("Large Angelfish")) { System.out.println("=> 'Small Angelfish' is Matching..."); } } If you execute the above code, it will print output as below : Item ID Product ID Description List Price Other ---------------------------------------------------------------------------- EST-1 FI-SW-01 Large Angelfish $16.50 Add to Cart EST-2 FI-SW-01 Small Angelfish $16.50 Add to Cart In the above output, Description column number is 3 so you can substitute an index number in the below line for the corresponding column : listData.get(2).getText().trim().equals("Large Angelfish") I hope it helps...
How to get a specific value from a string name via config
The green is the lore and the yellow is the displayname http://puu.sh/k2iI7/62619f9536.jpg I'm trying to seperate them in there rightful places for some odd reason there both appearing in both of the places. items.java public ItemStack applyLore(ItemStack stack, String name, String lore1){ ItemMeta meta = stack.getItemMeta(); meta.setDisplayName(name.replaceAll("&([0-9a-f])", "\u00A7$1")); ArrayList<String> lore = new ArrayList<String>(); lore.add(lore1.replaceAll("&([0-9a-f])", "\u00A7$1")); meta.setLore(lore); stack.setItemMeta(meta); return stack; } // p.getInventory().addItem(new ItemStack(Integer.parseInt(s), 1)); public void giveItemfromConfig(Player p) { String name ="name:"; String lore ="lore:"; for ( String s : plugin.file.getFile().getStringList(plugin.file.path) ) { try { s.split(" "); if ( s.contains(name) || s.contains(lore) ) { String namelength = s.substring(name.length()); String lorelength = s.substring(lore.length()); p.getInventory().addItem(applyLore(new ItemStack(Integer.parseInt(s.split(" ")[0])), namelength.replace("_", " ").replace("ame:", "").replace("e:", "").replace("lor", "").replace("ore", ""), lorelength.replace("_", " ").replace("lor:", "").replace("e:", "").replace("am", "").replace("lor", "").replace("ore", ""))); p.sendMessage("debug"); } else { ///nope.exe p.getInventory().addItem(new ItemStack(Integer.parseInt(s))); } } catch(Exception e) { e.printStackTrace(); Bukkit.getConsoleSender().sendMessage(ChatColor.AQUA + "Error in Config, Your id must be a integer ERROR:" + e); } } } config.yml ChestPopulater: items: - 276 name:cookie
First thing is the problem is because of "Integer.parseInt(s)". I dont know why you added this one as we dont have full source code or idea what you are trying to do with String "s" so that you will get result. If you are doing it to get "276" , I will suggest you to do following : String s2 = s.replaceAll("[A-Za-z]","").replace(":","").trim(); Integer i = Integer.parseInt(s2);
How to identify PP-tags/NP-tags/VP-tags in openNLP chunker?
I want to count the numbers of pp/np/vp in the text but I don't know how to identify PP-tags/NP-tags/VP-tags in openNLP chunker? I have tried this code but it's not working. ChunkerModel cModel = new ChunkerModel(modelIn); ChunkerME chunkerME = new ChunkerME(cModel); String result[] = chunkerME.chunk(whitespaceTokenizerLine, tags); HashMap<Integer,String> phraseLablesMap = new HashMap<Integer, String>(); Integer wordCount = 1; Integer phLableCount = 0; for (String phLable : result) { if(phLable.equals("O")) phLable += "-Punctuation"; //The phLable of the last word is OP if(phLable.split("-")[0].equals("B")) phLableCount++; phLable = phLable.split("-")[1] + phLableCount; System.out.println(wordCount + ":" + phLable); phraseLablesMap.put(wordCount, phLable); wordCount++; } Integer noPP=0; Integer TotalPP=0; for (String PPattach: result) { if (PPattach.equals("PP")) { for (int i=0;i<result.length;i++) TotalPP = noPP +1; } } System.out.println(TotalPP); Output: 1:NP1 2:VP2 3:NP3 4:NP3 5:VP4 6:PP5 7:NP6 8:NP6 9:NP6 10:NP6 11:PP7 12:NP8 13:NP8 14:NP8 15:PP9 16:NP10 17:NP10 18:PP11 19:NP12 20:NP12 21:VP13 22:VP13 23:NP14 24:NP14 25:PP15 26:NP16 27:NP16 28:Punctuation16 0
best way is by using the span objects, they have a getType() method that returns the chunk type. see this post grouping all Named entities in a Document
How to read data from Hbase?
Hi there I'm use to SQL, but I need to read data from a HBase table. Any help on this would be great. A book or maybe just some sample code to read from the table. Someone said using a scanner would do the trick, but I do not know how to use it.
From the website: // Sometimes, you won't know the row you're looking for. In this case, you // use a Scanner. This will give you cursor-like interface to the contents // of the table. To set up a Scanner, do like you did above making a Put // and a Get, create a Scan. Adorn it with column names, etc. Scan s = new Scan(); s.addColumn(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier")); ResultScanner scanner = table.getScanner(s); try { // Scanners return Result instances. // Now, for the actual iteration. One way is to use a while loop like so: for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { // print out the row we found and the columns we were looking for System.out.println("Found row: " + rr); } // The other approach is to use a foreach loop. Scanners are iterable! // for (Result rr : scanner) { // System.out.println("Found row: " + rr); // } } finally { // Make sure you close your scanners when you are done! // Thats why we have it inside a try/finally clause scanner.close(); }
I would like to offer solution without deprecated methods Configuration conf = HBaseConfiguration.create(); Connection connection = ConnectionFactory.createConnection(conf); Admin admin = connection.getAdmin(); // list the tables Arrays.stream(admin.listTables()).forEach(System.out::println); // let's insert some data in 'mytable' and get the row TableName tableName = TableName.valueOf("test_1"); Table table = connection.getTable(tableName); //Put Put thePut = new Put(Bytes.toBytes("rowkey1")); String columnFamily = "m"; String columnQualifier1 = "col1"; String outValue1 = "value1"; String columnQualifier2 = "col2"; String outValue2 = "value2"; thePut.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier1), Bytes.toBytes(outValue1)); thePut.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier2), Bytes.toBytes(outValue2)); table.put(thePut); //Get Get theGet = new Get(Bytes.toBytes("rowkey1")); Result result = table.get(theGet); //get value first column String inValue1 = Bytes.toString(result.value()); //get value by ColumnFamily and ColumnName byte[] inValueByte = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier1)); String inValue2 = Bytes.toString(inValueByte); //loop for result for (Cell cell : result.listCells()) { String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell)); String value = Bytes.toString(CellUtil.cloneValue(cell)); System.out.printf("Qualifier : %s : Value : %s%n", qualifier, value); } //create Map by result and print it Map<String, String> getResult = result.listCells().stream().collect(Collectors.toMap(e -> Bytes.toString(CellUtil.cloneQualifier(e)), e -> Bytes.toString(CellUtil.cloneValue(e)))); getResult.entrySet().stream().forEach(e -> System.out.printf("Qualifier : %s : Value : %s%n", e.getKey(), e.getValue())); System.out.println("---------Scan---------"); Scan scan = new Scan(); ResultScanner resultScan = table.getScanner(scan); resultScan.forEach(e -> { System.out.printf("Row \"%s\"%n", Bytes.toString(e.getRow())); Map<String, String> getResultScan = e.listCells().stream().collect(Collectors.toMap(d -> Bytes.toString(CellUtil.cloneQualifier(d)), d -> Bytes.toString(CellUtil.cloneValue(d)))); getResultScan.entrySet().stream().forEach(d -> System.out.printf("column \"%s\", value \"%s\"%n", d.getKey(), d.getValue())); System.out.println(); });
I used that but to get the String value you must use method getValue from Result. byte[] bytes = rr.getValue(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier")); System.out.println(new String(bytes));