Similar code in NSOUP - java

I have a code in Jsoup
import java.io.IOException;
import java.util.logging.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.*;
import java.util.*;
public class JavaApplication17 {
public static void main(String[] args) {
try {
String str=/* url */;
Document doc = Jsoup.connect(url).get();
Elements paragraphs = doc.select("td");
List<String> text = new ArrayList<>();
for(Element p : paragraphs)
{
text.add(p.text());
}
String name[] = new String[1000];
for(int i=0,j=0;i<100;i++)
{
name[i]=text.get(i);
System.out.println(name[i]);
}
}
catch (IOException ex) {
Logger.getLogger(JavaApplication17.class.getName())
.log(Level.SEVERE, null, ex);
}
}
}
What is the equilivalent NSOUP ie. the similar .NET code...
If NSOUP is not the best option for similar use in .net then what should be used... ??

Related

How to call a method in every test which is in the base class and has a data provider in selenium

I have been trying to integrate browser stack with my selenium scripts. As part of which i have added desired capabilities in my 'getBrowser' method with data provider in the Base class. As i want to run my scripts in multiple browsers. The browsers list is in "getData" method in Base class.
How can i call the getBrowser method in my testcases and have the getData(browsers list) defined in only the base class. This what i have done and its not right. i have added my base class and the test script.
Base.getBrowser(Platform platform, String browserName, String browserVersion); this line is were i am stuck.
Any help would be appreciated. Thanks
package com.gale.precision.FundVisualizer.core;
import java.net.MalformedURLException;
import java.net.URL;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.openqa.selenium.remote.RemoteWebDriver;
import org.testng.annotations.DataProvider;
import org.openqa.selenium.Platform;
import org.openqa.selenium.StaleElementReferenceException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.edge.EdgeDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.ie.InternetExplorerDriver;
import org.openqa.selenium.remote.DesiredCapabilities;
public class Base {
//public static WebDriver driver = null;
public static WebDriver driver;
public static String DriverPath = System.getProperty("user.dir") + "//" + "Drivers";
public static String DirectoryPath = System.getProperty("user.dir");
public static Properties prop = new Properties();
public static InputStream input = null;
public static final String USERNAME = "antonyprabhu1";
public static final String AUTOMATE_KEY = "xHRMpqxgD8sn3e3sr75s";
public static final String URL = "https://" + USERNAME + ":" + AUTOMATE_KEY + "#hub-cloud.browserstack.com/wd/hub";
#DataProvider(name = "EnvironmentDetails")
public static void getBrowser(Platform platform, String browserName, String browserVersion) throws MalformedURLException
{
DesiredCapabilities capability = new DesiredCapabilities();
capability.setPlatform(platform);
capability.setBrowserName(browserName);
capability.setVersion(browserVersion);
capability.setCapability("browserstack.debug", "true");
driver = new RemoteWebDriver(new URL(URL), capability);
try {
input = new FileInputStream(DirectoryPath + "//" + "config" + "//" + "app.properties");
prop.load(input);
} catch (IOException e) {
e.printStackTrace();
}
}
#DataProvider(name = "EnvironmentDetails", parallel = true)
public Object[][] getData() {
Object[][] testData = new Object[][] {
{
Platform.MAC, "chrome", "62.0"
}, {
Platform.WIN8,
"chrome",
"62.0"
}, {
Platform.WINDOWS,
"firefox",
"57"
}
};
return testData;
}
public static void closeBrowser() {
driver.quit();
}
}
package comparison;
import java.net.MalformedURLException;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Platform;
import org.openqa.selenium.StaleElementReferenceException;
import org.openqa.selenium.WebElement;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.AfterTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import com.gale.precision.FundVisualizer.core.Base;
import com.gale.precision.FundVisualizer.core.ExtentReport;
import com.gale.precision.FundVisualizer.pageObject.Comparison;
import com.gale.precision.FundVisualizer.pageObject.InvestmentsSearch;
#SuppressWarnings("unused")
public class AddedInvestmentDisplay extends ExtentReport {
/*============================================================================================================================
Test case : Verify that already selected Investments for comparison does not show up
======================================================================================*/
#Test(testName = "Comparison: Verify an already selected Investment for comparison does not show up in search")
public void verifyAddedInvestmentDisplay() throws InterruptedException, MalformedURLException {
//test = extent.createTest(Thread.currentThread().getStackTrace()[1].getMethodName());
Base.getBrowser(Platform platform, String browserName, String browserVersion);
InvestmentsSearch.login(Base.driver);
InvestmentsSearch.InvestmentsLink(Base.driver).click();
JavascriptExecutor jse = (JavascriptExecutor) Base.driver;
jse.executeScript("window.scrollBy(0,750)", "");
InvestmentsSearch.ViewResults(Base.driver).click();
for (int i = 0; i <= 2; i++)
{
try {
Comparison.firstCheckBox(Base.driver).click();
break;
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
//Base.clickingStaleElements(Comparison.firstCheckBox(Base.driver));
//Comparison.firstCheckBox(Base.driver).click();
for (int i = 0; i <= 2; i++)
{
try {
Comparison.analyzeOrCompare(Base.driver).click();
break;
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
Comparison.addInvestmentField(Base.driver).sendKeys("MFC027");
Comparison.firstSearchResult(Base.driver).click();
Comparison.addInvestmentField(Base.driver).sendKeys("MFC027");
Assert.assertEquals(Comparison.emptySearchResult(Base.driver).isDisplayed(), true);
}
#DataProvider(name = "EnvironmentDetails", parallel = true)
public Object[][] getData() {
Object[][] testData = new Object[][] {
{
Platform.MAC, "chrome", "62.0"
}, {
Platform.WIN8,
"chrome",
"62.0"
}, {
Platform.WINDOWS,
"firefox",
"57"
}
};
return testData;
}
#AfterClass
public void tearDown()
{
Base.closeBrowser();
}
}
I could resolve this issue by passing the parameters in the #test method
#Test(testName="Comparison: Verify adding an index in the comparison", dataProvider = "EnvironmentDetails",dataProviderClass = BrowsersDataProvider.class)
public void verifyAddingIndex(Platform platform, String browserName, String browserVersion) throws InterruptedException, MalformedURLException {
//test = extent.createTest(Thread.currentThread().getStackTrace()[1].getMethodName());
Base.getBrowser(platform,browserName,browserVersion);
InvestmentsSearch.login(Base.driver);
like

I want ro get all article content from all links inside from an website

I want to extract all article content from an website using any web crawling/scraping methods.
The problem is I can get content from a single page but not its redirecting links.
Anyone please give me the proper solutions
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.EditorKit;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Main3 {
public static void main(String[] argv) throws Exception {
HTMLDocument doc = new HTMLDocument() {
public HTMLEditorKit.ParserCallback getReader(int pos) {
return new HTMLEditorKit.ParserCallback() {
public void handleText(char[] data, int pos) {
System.out.println(data);
}
};
}
};
URL url = new URI("http://tamilblog.ishafoundation.org/").toURL();
URLConnection conn = url.openConnection();
Reader rd = new InputStreamReader(conn.getInputStream());
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream("ram.txt"), "UTF-8");
EditorKit kit = new HTMLEditorKit();
kit.read(rd, doc, 0);
try {
Document docs = Jsoup.connect("http://tamilblog.ishafoundation.org/").get();
Elements links = docs.select("a[href]");
Elements elements = docs.select("*");
System.out.println("Total Links :"+links.size());
for (Element element : elements) {
System.out.println(element.ownText());
}
for (Element link : links) {
System.out.println(" * a: link :"+ link.attr("a:href"));
System.out.println(" * a: text :"+ link.text());
System.out.println(" * a: text :"+ link.text());
System.out.println(" * a: Alt :"+ link.attr("alt"));
System.out.println(link.attr("p"));
}
} catch (Exception e) {
e.printStackTrace();
}
}
}`
This is the solution:
package com.github.davidepastore.stackoverflow34014436;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.BadLocationException;
import javax.swing.text.EditorKit;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* Stackoverflow 34014436 question.
*
*/
public class App {
public static void main(String[] args) throws URISyntaxException,
IOException, BadLocationException {
HTMLDocument doc = new HTMLDocument() {
public HTMLEditorKit.ParserCallback getReader(int pos) {
return new HTMLEditorKit.ParserCallback() {
public void handleText(char[] data, int pos) {
System.out.println(data);
}
};
}
};
URL url = new URI("http://tamilblog.ishafoundation.org/").toURL();
URLConnection conn = url.openConnection();
Reader rd = new InputStreamReader(conn.getInputStream());
OutputStreamWriter writer = new OutputStreamWriter(
new FileOutputStream("ram.txt"), "UTF-8");
EditorKit kit = new HTMLEditorKit();
kit.read(rd, doc, 0);
try {
Document docs = Jsoup.connect(
"http://tamilblog.ishafoundation.org/").get();
Elements links = docs.select("a[href]");
Elements elements = docs.select("*");
System.out.println("Total Links :" + links.size());
for (Element element : elements) {
System.out.println(element.ownText());
}
for (Element link : links) {
String hrefUrl = link.attr("href");
if (!"#".equals(hrefUrl) && !hrefUrl.isEmpty()) {
System.out.println(" * a: link :" + hrefUrl);
System.out.println(" * a: text :" + link.text());
writer.write(link.text() + " => " + hrefUrl + "\n");
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
writer.close();
}
}
}
Here we are using the writer to write the text of every link in the ram.txt file.
You should use an existing crawler such as Apache Nutch or StormCrawler.

Crawling Web and Stored Links

I want to create a thread in order to crawl all links of a website and store it in LinkedHashSet, but when I print the size of this LinkedHashSet, it prints nothing. I've started learning crawling! I've referenced The Art of Java. Here is my code:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedHashSet;
import java.util.logging.Level;
import java.util.logging.Logger;
public class TestThread {
public void crawl(URL url) {
try {
BufferedReader reader = new BufferedReader(
new InputStreamReader(url.openConnection().getInputStream()));
String line = reader.readLine();
LinkedHashSet toCrawlList = new LinkedHashSet();
while (line != null) {
toCrawlList.add(line);
System.out.println(toCrawlList.size());
}
} catch (IOException ex) {
Logger.getLogger(TestThread.class.getName()).log(Level.SEVERE, null, ex);
}
}
public static void main(String[] args) {
final TestThread test1 = new TestThread();
Thread thread = new Thread(new Runnable() {
public void run(){
try {
test1.crawl(new URL("http://stackoverflow.com/"));
} catch (MalformedURLException ex) {
Logger.getLogger(TestThread.class.getName()).log(Level.SEVERE, null, ex);
}
}
});
}
}
You should fill your list like this:
while ((line = reader.readLine()) != null) {
toCrawlList.add(line);
}
System.out.println(toCrawlList.size());
If that doesn't work, try to set a break point in your code and find out if your reader even contains anything.

JSONParser: Unexpected token COLON(:)

I am trying to parse a json file with JSONParse and am getting this error, with the error occurring in beginning of the following json:
Unexpected token COLON(:) at position 11.
{"276716878": {
"followers": [
2435018580,
1664252310,
372262434
],
"following": [
16211434,
945440959,
130682467,
264257750,
900526363,
318231688,
40335029,
64044676
]
}}
I also wrote the json file with the following:
FileWriter out = new FileWriter(JSON_FILE);
out.write(json.toString(1));
out.flush();
out.close();
There could be some format error in the json string you passed. Validate your json string. The below sample code works fine.
import java.io.FileWriter;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.json.JSONException;
import org.json.JSONObject;
public class Test {
private static final Logger logger = Logger.getLogger(Test.class.getName());
private static final String JSON_FILE = "/home/visruth/Desktop/Visruth.txt";
public static void main(String[] args) {
String jsonText = "{\"215876567\": { \"followers\": [ 2464054938, 772677937]}}";
try (
FileWriter out = new FileWriter(JSON_FILE);
) {
JSONObject json = new JSONObject(jsonText);
int indentFactor = 1;
String prettyprintedJSON = json.toString(indentFactor);
System.out.println(prettyprintedJSON);
out.write(prettyprintedJSON);
} catch (JSONException e) {
logger.log(Level.SEVERE, e.getMessage());
} catch (IOException e) {
logger.severe(e.getMessage());
}
}
}
Assign your json text in jsonText variable and try.
I think you are referring classes from different apis for this purpose. Just use only one api.
Here is a demo code which works fine with the given json string in the question.
import java.io.FileWriter;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.junit.Assert;
import org.junit.Test;
public class TestCase {
private static final String JSON_FILE = "/home/visruth/Desktop/Visruth.txt";
#Test
public void testJSONParser() throws Exception {
JSONParser parser = new JSONParser();
try (
FileWriter out = new FileWriter(JSON_FILE);
) {
String jsonText = "{\"276716878\": { \"followers\": [ 2435018580, 1664252310, 372262434 ], \"following\": [ 16211434, 945440959, 130682467, 264257750, 900526363, 318231688, 40335029, 64044676 ] }}";
Object obj = parser.parse(jsonText);
JSONObject jsonObject = (JSONObject) obj;
JSONObject jsonObject215876567 = (JSONObject)jsonObject.get("276716878");
JSONArray followers = (JSONArray)(jsonObject215876567.get("followers"));
Assert.assertEquals("[2435018580,1664252310,372262434]", followers.toString());
String jsonStringFromJsonObject = jsonObject.toString();// There is no argument as an int
out.write(jsonStringFromJsonObject);
} catch (ParseException e) {
Assert.fail(e.getMessage());
}
}
}

Pulling all urls from any webpage, having trouble with indexOf [homework]

the indexOf always returns negative 7 no matter what i put, i will be using the website http://www.columbusstate.edu
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
public class WebCrawler
{
private static int linkCount = 0;
public static void main(String[] args) throws IOException
{
instance variables
ArrayList<String> links = new ArrayList<String>();
System.out.println("Enter the website you would like to web crawl");
Scanner input = new Scanner(System.in);
String address=input.next();
goes to website
URL locator = new URL(address);
Scanner in=new Scanner(locator.openStream());
String str="";
PrintWriter out=new PrintWriter("links.txt");
searches the webpage and pulls the links, or it should anyways.
while(in.hasNextLine())
{
str=in.next();
if(str.contains("href=\"http://"))
{
linkCount++;
int start = str.indexOf("ht");
int end = str.indexOf("/\"");
if(links.contains(str.substring(start, end))){
}
else{
links.add("Line Number "+linkCount+""+str.substring(start, end));
}
}
else if(str.contains("href=\"https://")){
linkCount++;
int start = str.indexOf("ht");
int end = str.indexOf("://")+15;
if(links.contains(str.substring(start, end))){
}
else{
links.add("Line Number "+linkCount+""+str.substring(start, end));
}
}
}
int num = links.size();
System.out.println(num);
out.println("Number of links on this webpage is "+linkCount);
out.println("Links are:");
for(int i = links.size()-1; i>0; i--){
out.println(links.get(i));
}
out.close();
}
}
If you really looking for a way to extract links from a web page then it's better to use a proper HTML parser than trying to do it manually. Here an example with JSOUP
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HTMLUtils {
private HTMLUtils() {}
public static List<String>extractLinks(String url) throws IOException {
final ArrayList<String> result = new ArrayList<String>();
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
// href ...
for (Element link : links) {
result.add(link.attr("abs:href"));
// result.add(link.text());
}
return result;
}
public final static void main(String[] args) throws Exception{
String site = "http://www.columbusstate.edu";
List<String> links = HTMLUtils.extractLinks(site);
for (String link : links) {
System.out.println(link);
}
}
}

Categories

Resources