Getting 403 error with java - java

When I pass the URL "http://echo.jsontest.com/key/value/one/two" to the code below it returns JSON data.
But when I pass "http://jsonplaceholder.typicode.com/comments?postId=1" instead, I get 403 forbidden error.
I'm not sure what's going on. Any advice?
package automation_Demo_First;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.nio.charset.Charset;
import org.testng.annotations.Test;
public class JsonReader{
//http://jsonplaceholder.typicode.com/comments?postId=1
public String url ="http://echo.jsontest.com/key/value/one/two";
#Test
public void testJson() throws IOException{
String data = getDataByJavaIO(url);
System.out.println(data);
}
public String getDataByJavaIO(String url) throws IOException{
InputStream inputstream = null;
BufferedReader bufferreader = null;
try{
inputstream = new URL(url).openStream();
bufferreader = new BufferedReader(new InputStreamReader(inputstream, Charset.forName("UTF-8")));
return readData(bufferreader);
}catch(IOException e){
throw e;
}
finally{
closeResource(inputstream);
closeResource(bufferreader);
}
}
public String readData(Reader reader) throws IOException{
StringBuilder stringbuilder = new StringBuilder();
int cp;
while((cp=reader.read())!=-1){
stringbuilder.append((char)cp);
}
return stringbuilder.toString();
}
public void closeResource(AutoCloseable closable){
try{
if(closable!=null){
closable.close();
System.out.println("\n" +closable.getClass().getName() + "closed ..." );
}
}
catch(Exception e){
e.printStackTrace(System.err);
}
}
}

In your try block,
try {
inputstream = new URL(url).openStream();
bufferreader = new BufferedReader(new InputStreamReader(inputstream, Charset.forName("UTF-8")));
return readData(bufferreader);
}
change it to
try {
HttpURLConnection httpCon = (HttpURLConnection) new URL(url).openConnection();
httpCon.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36");
inputstream = httpCon.getInputStream();
bufferreader = new BufferedReader(new InputStreamReader(inputstream, Charset.forName("UTF-8")));
return readData(bufferreader);
}
Source: https://stackoverflow.com/a/18889991/3903483

Use:
URLConnection hc = new URL(url).openConnection();
hc.setRequestProperty("User-Agent", "");
inputstream = hc.getInputStream();
Instead of:
inputstream = new URL(url).openStream();

Related

How can I make a POST request in the same manner as Apache HttpClient lib, using HttpURLConnection

I'm trying to make a POST request to a website. As the response to the POST request, I expect some JSON data.
Using Apache's HttpClient library, I am able to do this without any problems. The response data is JSON so I just parse it.
package com.mydomain.myapp;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class MyApp {
private static String extract(String patternString, String target) {
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(target);
matcher.find();
return matcher.group(1);
}
private String getResponse(InputStream stream) throws Exception {
BufferedReader in = new BufferedReader(new InputStreamReader(stream));
String inputLine;
StringBuffer responseStringBuffer = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
responseStringBuffer.append(inputLine);
}
in.close();
return responseStringBuffer.toString();
}
private final static String BASE_URL = "https://www.volkswagen-car-net.com";
private final static String BASE_GUEST_URL = "/portal/en_GB/web/guest/home";
private void run() throws Exception {
CloseableHttpClient client = HttpClients.createDefault();
HttpGet httpGet = new HttpGet(BASE_URL + BASE_GUEST_URL);
CloseableHttpResponse getResponse = client.execute(httpGet);
HttpEntity responseEntity = getResponse.getEntity();
String data = getResponse(responseEntity.getContent());
EntityUtils.consume(responseEntity);
String csrf = extract("<meta name=\"_csrf\" content=\"(.*)\"/>", data);
System.out.println(csrf);
HttpPost post = new HttpPost(BASE_URL + "/portal/web/guest/home/-/csrftokenhandling/get-login-url");
post.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
post.setHeader("User-Agent'", "Mozilla/5.0 (Linux; Android 6.0.1; D5803 Build/23.5.A.1.291; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.111 Mobile Safari/537.36");
post.setHeader("Referer", BASE_URL + "/portal");
post.setHeader("X-CSRF-Token", csrf);
CloseableHttpResponse postResponse = client.execute(post);
HttpEntity postResponseEntity = postResponse.getEntity();
String postData = getResponse(postResponseEntity.getContent());
System.out.println(postData);
EntityUtils.consume(postResponseEntity);
postResponse.close();
}
public static void main(String[] args) throws Exception {
MyApp myApp = new MyApp();
myApp.run();
}
}
But I can't use the HttpClient library in my project. I need to be able to do the same thing with "just" HttpURLConnection.
But there is some magic going on with the HttpClient library that I cannot fathom. Because the response to my POST request using HttpURLConnection is just a redirect to a different webpage alltogheter.
Can someone point me in the right direction here?
Here's my current HttpURLConnection attempt:
package com.mydomain.myapp;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MyApp {
private static String extract(String patternString, String target) {
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(target);
matcher.find();
return matcher.group(1);
}
private final static String BASE_URL = "https://www.volkswagen-car-net.com";
private final static String BASE_GUEST_URL = "/portal/en_GB/web/guest/home";
private String getResponse(InputStream stream) throws Exception {
BufferedReader in = new BufferedReader(new InputStreamReader(stream));
String inputLine;
StringBuffer responseStringBuffer = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
responseStringBuffer.append(inputLine);
}
in.close();
return responseStringBuffer.toString();
}
private String getResponse(HttpURLConnection connection) throws Exception {
return getResponse(connection.getInputStream());
}
private void run() throws Exception {
HttpURLConnection getConnection1;
URL url = new URL(BASE_URL + BASE_GUEST_URL);
getConnection1 = (HttpURLConnection) url.openConnection();
getConnection1.setRequestMethod("GET");
if (getConnection1.getResponseCode() != HttpURLConnection.HTTP_OK) {
throw new Exception("Request failed");
}
String response = getResponse(getConnection1);
getConnection1.disconnect();
String csrf = extract("<meta name=\"_csrf\" content=\"(.*)\"/>", response);
System.out.println(csrf);
HttpURLConnection postRequest;
URL url2 = new URL(BASE_URL + "/portal/web/guest/home/-/csrftokenhandling/get-login-url");
postRequest = (HttpURLConnection) url2.openConnection();
postRequest.setDoOutput(true);
postRequest.setRequestMethod("POST");
postRequest.setInstanceFollowRedirects(false);
postRequest.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
postRequest.setRequestProperty("User-Agent'", "Mozilla/5.0 (Linux; Android 6.0.1; D5803 Build/23.5.A.1.291; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.111 Mobile Safari/537.36");
postRequest.setRequestProperty("Referer", BASE_URL + "/portal");
postRequest.setRequestProperty("X-CSRF-Token", csrf);
postRequest.disconnect();
}
public static void main(String[] args) throws Exception {
MyApp myApp = new MyApp();
myApp.run();
}
}
Courtesy of a great programmer resource, e.g. MKYong (you know you've run into his site before ;-)) and I'll go over the gist of it in case the link ever goes down.
Gist:
The HttpURLConnection‘s follow redirect is just an indicator, in fact it won’t help you to do the “real” http redirection, you still need to handle it manually.
If a server is redirected from the original URL to another URL, the response code should be 301: Moved Permanently or 302: Temporary Redirect. And you can get the new redirected url by reading the “Location” header of the HTTP response header.
For example, access to the normal HTTP twitter website – http://www.twitter.com , it will auto redirect to the HTTPS twitter website – https://www.twitter.com.
Sample code
package com.mkyong.http;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class HttpRedirectExample {
public static void main(String[] args) {
try {
String url = "http://www.twitter.com";
URL obj = new URL(url);
HttpURLConnection conn = (HttpURLConnection) obj.openConnection();
conn.setReadTimeout(5000);
conn.addRequestProperty("Accept-Language", "en-US,en;q=0.8");
conn.addRequestProperty("User-Agent", "Mozilla");
conn.addRequestProperty("Referer", "google.com");
System.out.println("Request URL ... " + url);
boolean redirect = false;
// normally, 3xx is redirect
int status = conn.getResponseCode();
if (status != HttpURLConnection.HTTP_OK) {
if (status == HttpURLConnection.HTTP_MOVED_TEMP
|| status == HttpURLConnection.HTTP_MOVED_PERM
|| status == HttpURLConnection.HTTP_SEE_OTHER)
redirect = true;
}
System.out.println("Response Code ... " + status);
if (redirect) {
// get redirect url from "location" header field
String newUrl = conn.getHeaderField("Location");
// get the cookie if need, for login
String cookies = conn.getHeaderField("Set-Cookie");
// open the new connnection again
conn = (HttpURLConnection) new URL(newUrl).openConnection();
conn.setRequestProperty("Cookie", cookies);
conn.addRequestProperty("Accept-Language", "en-US,en;q=0.8");
conn.addRequestProperty("User-Agent", "Mozilla");
conn.addRequestProperty("Referer", "google.com");
System.out.println("Redirect to URL : " + newUrl);
}
BufferedReader in = new BufferedReader(
new InputStreamReader(conn.getInputStream()));
String inputLine;
StringBuffer html = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
html.append(inputLine);
}
in.close();
System.out.println("URL Content... \n" + html.toString());
System.out.println("Done");
} catch (Exception e) {
e.printStackTrace();
}
}
}

Using regex to extract links only in loop

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class google {
public static void main(String[] args) throws Exception
{
StringBuilder a = new StringBuilder();
String regex = "(https?):\\/\\/(www\\.)?[a-z0-9\\.:].*?(?=\\s)";
Pattern r = Pattern.compile(regex);
String key="myapikey";
String qry="tree";
URL url = new URL("https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=**************&q="+ qry +"&alt=json");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("Accept", "application/json");
BufferedReader br = new BufferedReader(new InputStreamReader(
(conn.getInputStream())));
String output;
System.out.println(url);
System.out.println("Output from Server .... \n");
StringBuffer sb = new StringBuffer();
while ((output = br.readLine()) != null) {
if(output.contains("jpg")){
// Matcher m = r.matcher(output);
a.append(output + "\n");
}
}
System.out.println(a); //Will print the google search links
conn.disconnect();
}
}
That program returns the following:
"url": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg"
"og:image": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg",
"twitter:image": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg",
"thumbnailurl": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault4.jpg",
"src": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault5.jpg"
"url": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault6.jpg",
"og:image": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault7.jpg",
"thumbnailurl": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault8.jpg",
"src": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault9.jpg"
But need it to only return this:
https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg
https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg
https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg
etc...
The regex that can match for only the link is this:
String regex = "(https?):\/\/(www\.)?[a-z0-9\.:].*?(?=\s)";
But having trouble implementing it in this program. Any ideas?
Thanks for your time
Found a solution in the end. Using a different expression. Thanks for the suggestions!
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class google {
public static void main(String[] args) throws Exception
{
StringBuilder results = new StringBuilder();
String key="myprivatekey";
String qry="tree";
URL url = new URL(
"https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=myprivatekey&q="+ qry +"&alt=json");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("Accept", "application/json");
BufferedReader br = new BufferedReader(new InputStreamReader(
(conn.getInputStream())));
String output;
System.out.println(url);
System.out.println("Output from Server .... \n");
while ((output = br.readLine()) != null) {
Pattern pattern = Pattern.compile("(?:(?:https?)+\\:\\/\\/+[a-zA-Z0-9\\/\\._-]{1,})+(?:(?:jpe?g|png|gif))");
Matcher matcher = pattern.matcher(output);
if(matcher.find()){
results.append(matcher.group() + "\n");
}
}
System.out.println(results);
conn.disconnect();
}
}

C drive memory overloading by running swing

I'm making a management software in java for a book store. There is a database on my server. I get my database data by php code.which is also in my server.I use the code below to get data by running the php code.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class HttpUtility {
private static HttpURLConnection httpConn;
public static HttpURLConnection sendGetRequest(String requestURL)
throws IOException {
URL url = new URL(requestURL);
httpConn = (HttpURLConnection) url.openConnection();
httpConn.setUseCaches(false);
httpConn.setDoInput(true);
httpConn.setDoOutput(false);
return httpConn;
}
public static HttpURLConnection sendPostRequest(String requestURL,
Map<String, String> params) throws IOException {
URL url = new URL(requestURL);
httpConn = (HttpURLConnection) url.openConnection();
httpConn.setUseCaches(false);
httpConn.setDoInput(true);
StringBuffer requestParams = new StringBuffer();
if (params != null && params.size() > 0) {
httpConn.setDoOutput(true);
Iterator<String> paramIterator = params.keySet().iterator();
while (paramIterator.hasNext()) {
String key = paramIterator.next();
String value = params.get(key);
requestParams.append(URLEncoder.encode(key, "UTF-8"));
requestParams.append("=").append(
URLEncoder.encode(value, "UTF-8"));
requestParams.append("&");
}
OutputStreamWriter writer = new OutputStreamWriter(
httpConn.getOutputStream());
writer.write(requestParams.toString());
writer.flush();
}
return httpConn;
}
public static String readSingleLineRespone() throws IOException {
InputStream inputStream = null;
if (httpConn != null) {
inputStream = httpConn.getInputStream();
} else {
throw new IOException("Connection is not established.");
}
BufferedReader reader = new BufferedReader(new InputStreamReader(
inputStream));
String response = reader.readLine();
reader.close();
return response;
}
public static String[] readMultipleLinesRespone() throws IOException {
InputStream inputStream = null;
if (httpConn != null) {
inputStream = httpConn.getInputStream();
} else {
throw new IOException("Connection is not established.");
}
BufferedReader reader = new BufferedReader(new InputStreamReader(
inputStream));
List<String> response = new ArrayList<String>();
String line = "";
while ((line = reader.readLine()) != null) {
response.add(line);
}
reader.close();
return (String[]) response.toArray(new String[0]);
}
public static void disconnect() {
if (httpConn != null) {
httpConn.disconnect();
}
}
}
Now I'm facing a problem like the image link (don't have enough reputation for the upload)below.There are 3 image link.
before log in
After the log in and before the search query
Another link is in the comment.
After every request making my C:// drive is loosing 10 MB space and I don't know why(Very noob coder sorry.)
Can anyone help me.

Reading source code from a webpage in java

I am trying to read source code from a webpage. My java code is
import java.net.*;
import java.io.*;
import java.util.*;
import javax.swing.JOptionPane;
class Testing{
public static void Connect() throws Exception{
URL url = new URL("http://excite.com/education");
URLConnection spoof = url.openConnection();
spoof.setRequestProperty( "User-Agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; H010818)" );
BufferedReader in = new BufferedReader(new InputStreamReader(spoof.getInputStream()));
String strLine = "";
while ((strLine = in.readLine()) != null){
System.out.println(strLine);
}
System.out.println("End of page.");
}
public static void main(String[] args){
try{
Connect();
}catch(Exception e){
}
}
When i compile and run this code, it gives the following output:
�I�%&/m�{J�J��t�$ؐ#������iG#)�*��eVe]f#�흼��{���{���;�N'���?\fdl��J�ɞ!���?~|?"~�$}�>�������4�����7N�����+�ӲM�N��?J�tZfM��G�j����R��!�9�?>JgE��Ge[����ⳏ���W�?�����8������
�|8�
���������ho����0׳���|փ:--�|�L�Uο�׫��m�zt�n3��l\�w��O^f�G[�CG<�y6K��gM�rg��ǟy�Eִy����h˜��ؗ˲X���l=�ڢZ�/����(կ^O�UU6�����&�6_�#yC}�p�y���lAH�ͯ��zF#�V�6_��}��)�v=J+�$��̤�G�Y�L�b���wS"�7�y^����Z�m���Y:ɛ���J<N_�Y=���U�f���,���y�Q2(J٩P!ͨ�i����1&F0&ૼn�?�x�T��h�Qzw�+����n�)�h��K��2����8g����⮥��A0
���1I�%����Q�Z����{��������w���?x����N�?�<d�S��۫�%a|4�j��z���k�Bak��k-�c�z�g��z���l>���֎s^,��5��/B�{����]]����Ý�ֳ���y{�_l�8g�k�ӫ�b���"+|��(��M��^[���J�P��_�..?������x�Z�$������E>��느�u���E~����{媘���f�e1ͷ�QZ,�����f��e�3Jٻb�^��4��۴���>��y��;��<렛{�l��ZfW
S# {�]��1��Q�����n[�,t�?����~�n�S�u#SL��n�^��������EC��q�/�y���FE�tpm������e&��oB���z9eY��������P��IK?����̦����w�N��;�;J?����;�/��5���M���rZ��q��]��C�dᖣ��F�nd���}���A5���M�5�.�:��/�_D�?�3����'�c�Z7��}��(OI),ۏi����{�<�w�������DZ?e����'q���eY]=���kj���߬������\qhrRn���l�o-��.���k��_���oD8��GA�P�r��|$��ȈPv~Y�:�[q?�sH�� <��C��ˬ�^N�[ v(��S��l�c�C����3���E5&5�VӪL�T��۔���oQrĈ��/���#[f�5�5"����[���t�vm�\��.0�nh����aڌWYM
^T�|\,��퓜�L�u����B�̌�C�r������ �������'�%�{��)�);�fV�]��g,�>�C �c2���p�4��}H���P��(�%j"�}�&�:�Oh\5I�l�氪��{�/�]�LB�l��2��I"��=��Y�|�>�֏n�������}�����~�[��'��O��
��:/�)�Wz�3��lo�.5�k�&����H[ji�����b������WWy}�5�֝Q�|f�����]�KjH5��}yNm�����g�ӷ�ǣ��>��'o��泏��<���G�g���>->�xQM�����%<�|����u�.��3���[�[r���ٝ;���]4E��6[����]����1���*�8}��n�w�������ݽ����|����}|qo|�~u����w|�i�i���Z�`z�ŧ����Q}�u��!���w �O���R9�)�~��g~߻w6��{���wd�o��/Z�uUS��݄l��I^�����>��[�U1�o�_��J��}��#�#�U�/��/?���i�7|CZT?(�2b~����c�W�c5'����EeFĿꇙ�0��T��{��W�2����/���O���YJj����K/���>��:'_l�
Other than URLs from this directory i.e. "excite.com/education" all URLs are giving correct source codes but these URLs are creating problems.
Anyone Please Help.
Thanks in advance.
It works for me.
private static String getWebPabeSource(String sURL) throws IOException {
URL url = new URL(sURL);
URLConnection urlCon = url.openConnection();
BufferedReader in = null;
if (urlCon.getHeaderField("Content-Encoding") != null
&& urlCon.getHeaderField("Content-Encoding").equals("gzip")) {
in = new BufferedReader(new InputStreamReader(new GZIPInputStream(
urlCon.getInputStream())));
} else {
in = new BufferedReader(new InputStreamReader(
urlCon.getInputStream()));
}
String inputLine;
StringBuilder sb = new StringBuilder();
while ((inputLine = in.readLine()) != null)
sb.append(inputLine);
in.close();
return sb.toString();
}
Try reading it this way:
private static String getUrlSource(String url) throws IOException {
URL url = new URL(url);
URLConnection urlConn = url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(
urlConn.getInputStream(), "UTF-8"));
String inputLine;
StringBuilder a = new StringBuilder();
while ((inputLine = in.readLine()) != null)
a.append(inputLine);
in.close();
return a.toString();
}
and set your encoding according to the web page - notice this line:
BufferedReader in = new BufferedReader(new InputStreamReader(
urlConn.getInputStream(), "UTF-8"));
First you have to uncompress the content using GZIPInputStream. Then put the uncompressed stream to Input Stream and read it using BufferedReader
Use Apache HTTP Client 4.1.1
Maven dependency
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.1.1</version>
</dependency>
Sample Code to parse gzip content.
package com.gzip.simple;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
public class GZIPFetcher {
public static void main(String[] args) {
try {
DefaultHttpClient httpClient = new DefaultHttpClient();
HttpGet getRequest = new HttpGet("http://excite.com/education");
getRequest.addHeader("accept", "application/json");
HttpResponse response = httpClient.execute(getRequest);
if (response.getStatusLine().getStatusCode() != 200) {
throw new RuntimeException("Failed : HTTP error code : "
+ response.getStatusLine().getStatusCode());
}
InputStream instream = response.getEntity().getContent();
// Check whether the content-encoding is gzip or not.
Header contentEncoding = response
.getFirstHeader("Content-Encoding");
if (contentEncoding != null
&& contentEncoding.getValue().equalsIgnoreCase("gzip")) {
instream = new GZIPInputStream(instream);
}
BufferedReader in = new BufferedReader(new InputStreamReader(
instream));
String content;
System.out.println("Output from Server .... \n");
while ((content = in.readLine()) != null)
System.out.println(content);
httpClient.getConnectionManager().shutdown();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

How to get a web page's source code from Java [duplicate]

This question already has answers here:
How do you Programmatically Download a Webpage in Java
(11 answers)
Closed 7 years ago.
I just want to retrieve any web page's source code from Java. I found lots of solutions so far, but I couldn't find any code that works for all the links below:
http://www.cumhuriyet.com.tr?hn=298710
http://www.fotomac.com.tr/Yazarlar/Olcay%20%C3%87ak%C4%B1r/2011/11/23/hesap-makinesi
http://www.sabah.com.tr/Gundem/2011/12/23/basbakan-konferansta-konusuyor#
The main problem for me is that some codes retrieve web page source code, but with missing ones. For example the code below does not work for the first link.
InputStream is = fURL.openStream(); //fURL can be one of the links above
BufferedReader buffer = null;
buffer = new BufferedReader(new InputStreamReader(is, "iso-8859-9"));
int byteRead;
while ((byteRead = buffer.read()) != -1) {
builder.append((char) byteRead);
}
buffer.close();
System.out.println(builder.toString());
Try the following code with an added request property:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
public class SocketConnection
{
public static String getURLSource(String url) throws IOException
{
URL urlObject = new URL(url);
URLConnection urlConnection = urlObject.openConnection();
urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
return toString(urlConnection.getInputStream());
}
private static String toString(InputStream inputStream) throws IOException
{
try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")))
{
String inputLine;
StringBuilder stringBuilder = new StringBuilder();
while ((inputLine = bufferedReader.readLine()) != null)
{
stringBuilder.append(inputLine);
}
return stringBuilder.toString();
}
}
}
URL yahoo = new URL("http://www.yahoo.com/");
BufferedReader in = new BufferedReader(
new InputStreamReader(
yahoo.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null)
System.out.println(inputLine);
in.close();
I am sure that you have found a solution somewhere over the past 2 years but the following is a solution that works for your requested site
package javasandbox;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
/**
*
* #author Ryan.Oglesby
*/
public class JavaSandbox {
private static String sURL;
/**
* #param args the command line arguments
*/
public static void main(String[] args) throws MalformedURLException, IOException {
sURL = "http://www.cumhuriyet.com.tr/?hn=298710";
System.out.println(sURL);
URL url = new URL(sURL);
HttpURLConnection httpCon = (HttpURLConnection) url.openConnection();
//set http request headers
httpCon.addRequestProperty("Host", "www.cumhuriyet.com.tr");
httpCon.addRequestProperty("Connection", "keep-alive");
httpCon.addRequestProperty("Cache-Control", "max-age=0");
httpCon.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
httpCon.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
httpCon.addRequestProperty("Accept-Encoding", "gzip,deflate,sdch");
httpCon.addRequestProperty("Accept-Language", "en-US,en;q=0.8");
//httpCon.addRequestProperty("Cookie", "JSESSIONID=EC0F373FCC023CD3B8B9C1E2E2F7606C; lang=tr; __utma=169322547.1217782332.1386173665.1386173665.1386173665.1; __utmb=169322547.1.10.1386173665; __utmc=169322547; __utmz=169322547.1386173665.1.1.utmcsr=stackoverflow.com|utmccn=(referral)|utmcmd=referral|utmcct=/questions/8616781/how-to-get-a-web-pages-source-code-from-java; __gads=ID=3ab4e50d8713e391:T=1386173664:S=ALNI_Mb8N_wW0xS_wRa68vhR0gTRl8MwFA; scrElm=body");
HttpURLConnection.setFollowRedirects(false);
httpCon.setInstanceFollowRedirects(false);
httpCon.setDoOutput(true);
httpCon.setUseCaches(true);
httpCon.setRequestMethod("GET");
BufferedReader in = new BufferedReader(new InputStreamReader(httpCon.getInputStream(), "UTF-8"));
String inputLine;
StringBuilder a = new StringBuilder();
while ((inputLine = in.readLine()) != null)
a.append(inputLine);
in.close();
System.out.println(a.toString());
httpCon.disconnect();
}
}

Categories

Resources