Using regex to extract links only in loop - java

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class google {
public static void main(String[] args) throws Exception
{
StringBuilder a = new StringBuilder();
String regex = "(https?):\\/\\/(www\\.)?[a-z0-9\\.:].*?(?=\\s)";
Pattern r = Pattern.compile(regex);
String key="myapikey";
String qry="tree";
URL url = new URL("https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=**************&q="+ qry +"&alt=json");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("Accept", "application/json");
BufferedReader br = new BufferedReader(new InputStreamReader(
(conn.getInputStream())));
String output;
System.out.println(url);
System.out.println("Output from Server .... \n");
StringBuffer sb = new StringBuffer();
while ((output = br.readLine()) != null) {
if(output.contains("jpg")){
// Matcher m = r.matcher(output);
a.append(output + "\n");
}
}
System.out.println(a); //Will print the google search links
conn.disconnect();
}
}
That program returns the following:
"url": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg"
"og:image": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg",
"twitter:image": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg",
"thumbnailurl": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault4.jpg",
"src": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault5.jpg"
"url": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault6.jpg",
"og:image": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault7.jpg",
"thumbnailurl": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault8.jpg",
"src": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault9.jpg"
But need it to only return this:
https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg
https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg
https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg
etc...
The regex that can match for only the link is this:
String regex = "(https?):\/\/(www\.)?[a-z0-9\.:].*?(?=\s)";
But having trouble implementing it in this program. Any ideas?
Thanks for your time

Found a solution in the end. Using a different expression. Thanks for the suggestions!
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class google {
public static void main(String[] args) throws Exception
{
StringBuilder results = new StringBuilder();
String key="myprivatekey";
String qry="tree";
URL url = new URL(
"https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=myprivatekey&q="+ qry +"&alt=json");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("Accept", "application/json");
BufferedReader br = new BufferedReader(new InputStreamReader(
(conn.getInputStream())));
String output;
System.out.println(url);
System.out.println("Output from Server .... \n");
while ((output = br.readLine()) != null) {
Pattern pattern = Pattern.compile("(?:(?:https?)+\\:\\/\\/+[a-zA-Z0-9\\/\\._-]{1,})+(?:(?:jpe?g|png|gif))");
Matcher matcher = pattern.matcher(output);
if(matcher.find()){
results.append(matcher.group() + "\n");
}
}
System.out.println(results);
conn.disconnect();
}
}

Related

How to send HTTPS Post request along with JSON object in java?

I want to send JSON Data to an application using HTTPS connection from java code. please guide me.
`import java.net.URL;
import java.io.*;
import javax.net.ssl.HttpsURLConnection;
public class JavaHttpsExample
{
public static void main(String[] args) throws Exception {
String httpsURL = "https://your.https.url.here/";
URL myUrl = new URL(httpsURL);
HttpsURLConnection conn = (HttpsURLConnection)myUrl.openConnection();
InputStream is = conn.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String inputLine;
while ((inputLine = br.readLine()) != null) {
System.out.println(inputLine);
}
br.close();
}
}`

Getting 403 error with java

When I pass the URL "http://echo.jsontest.com/key/value/one/two" to the code below it returns JSON data.
But when I pass "http://jsonplaceholder.typicode.com/comments?postId=1" instead, I get 403 forbidden error.
I'm not sure what's going on. Any advice?
package automation_Demo_First;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.nio.charset.Charset;
import org.testng.annotations.Test;
public class JsonReader{
//http://jsonplaceholder.typicode.com/comments?postId=1
public String url ="http://echo.jsontest.com/key/value/one/two";
#Test
public void testJson() throws IOException{
String data = getDataByJavaIO(url);
System.out.println(data);
}
public String getDataByJavaIO(String url) throws IOException{
InputStream inputstream = null;
BufferedReader bufferreader = null;
try{
inputstream = new URL(url).openStream();
bufferreader = new BufferedReader(new InputStreamReader(inputstream, Charset.forName("UTF-8")));
return readData(bufferreader);
}catch(IOException e){
throw e;
}
finally{
closeResource(inputstream);
closeResource(bufferreader);
}
}
public String readData(Reader reader) throws IOException{
StringBuilder stringbuilder = new StringBuilder();
int cp;
while((cp=reader.read())!=-1){
stringbuilder.append((char)cp);
}
return stringbuilder.toString();
}
public void closeResource(AutoCloseable closable){
try{
if(closable!=null){
closable.close();
System.out.println("\n" +closable.getClass().getName() + "closed ..." );
}
}
catch(Exception e){
e.printStackTrace(System.err);
}
}
}
In your try block,
try {
inputstream = new URL(url).openStream();
bufferreader = new BufferedReader(new InputStreamReader(inputstream, Charset.forName("UTF-8")));
return readData(bufferreader);
}
change it to
try {
HttpURLConnection httpCon = (HttpURLConnection) new URL(url).openConnection();
httpCon.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36");
inputstream = httpCon.getInputStream();
bufferreader = new BufferedReader(new InputStreamReader(inputstream, Charset.forName("UTF-8")));
return readData(bufferreader);
}
Source: https://stackoverflow.com/a/18889991/3903483
Use:
URLConnection hc = new URL(url).openConnection();
hc.setRequestProperty("User-Agent", "");
inputstream = hc.getInputStream();
Instead of:
inputstream = new URL(url).openStream();

C drive memory overloading by running swing

I'm making a management software in java for a book store. There is a database on my server. I get my database data by php code.which is also in my server.I use the code below to get data by running the php code.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class HttpUtility {
private static HttpURLConnection httpConn;
public static HttpURLConnection sendGetRequest(String requestURL)
throws IOException {
URL url = new URL(requestURL);
httpConn = (HttpURLConnection) url.openConnection();
httpConn.setUseCaches(false);
httpConn.setDoInput(true);
httpConn.setDoOutput(false);
return httpConn;
}
public static HttpURLConnection sendPostRequest(String requestURL,
Map<String, String> params) throws IOException {
URL url = new URL(requestURL);
httpConn = (HttpURLConnection) url.openConnection();
httpConn.setUseCaches(false);
httpConn.setDoInput(true);
StringBuffer requestParams = new StringBuffer();
if (params != null && params.size() > 0) {
httpConn.setDoOutput(true);
Iterator<String> paramIterator = params.keySet().iterator();
while (paramIterator.hasNext()) {
String key = paramIterator.next();
String value = params.get(key);
requestParams.append(URLEncoder.encode(key, "UTF-8"));
requestParams.append("=").append(
URLEncoder.encode(value, "UTF-8"));
requestParams.append("&");
}
OutputStreamWriter writer = new OutputStreamWriter(
httpConn.getOutputStream());
writer.write(requestParams.toString());
writer.flush();
}
return httpConn;
}
public static String readSingleLineRespone() throws IOException {
InputStream inputStream = null;
if (httpConn != null) {
inputStream = httpConn.getInputStream();
} else {
throw new IOException("Connection is not established.");
}
BufferedReader reader = new BufferedReader(new InputStreamReader(
inputStream));
String response = reader.readLine();
reader.close();
return response;
}
public static String[] readMultipleLinesRespone() throws IOException {
InputStream inputStream = null;
if (httpConn != null) {
inputStream = httpConn.getInputStream();
} else {
throw new IOException("Connection is not established.");
}
BufferedReader reader = new BufferedReader(new InputStreamReader(
inputStream));
List<String> response = new ArrayList<String>();
String line = "";
while ((line = reader.readLine()) != null) {
response.add(line);
}
reader.close();
return (String[]) response.toArray(new String[0]);
}
public static void disconnect() {
if (httpConn != null) {
httpConn.disconnect();
}
}
}
Now I'm facing a problem like the image link (don't have enough reputation for the upload)below.There are 3 image link.
before log in
After the log in and before the search query
Another link is in the comment.
After every request making my C:// drive is loosing 10 MB space and I don't know why(Very noob coder sorry.)
Can anyone help me.

Reading source code from a webpage in java

I am trying to read source code from a webpage. My java code is
import java.net.*;
import java.io.*;
import java.util.*;
import javax.swing.JOptionPane;
class Testing{
public static void Connect() throws Exception{
URL url = new URL("http://excite.com/education");
URLConnection spoof = url.openConnection();
spoof.setRequestProperty( "User-Agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; H010818)" );
BufferedReader in = new BufferedReader(new InputStreamReader(spoof.getInputStream()));
String strLine = "";
while ((strLine = in.readLine()) != null){
System.out.println(strLine);
}
System.out.println("End of page.");
}
public static void main(String[] args){
try{
Connect();
}catch(Exception e){
}
}
When i compile and run this code, it gives the following output:
�I�%&/m�{J�J��t�$ؐ#������iG#)�*��eVe]f#�흼��{���{���;�N'���?\fdl��J�ɞ!���?~|?"~�$}�>�������4�����7N�����+�ӲM�N��?J�tZfM��G�j����R��!�9�?>JgE��Ge[����ⳏ���W�?�����8������
�|8�
���������ho����0׳���|փ:--�|�L�Uο�׫��m�zt�n3��l\�w��O^f�G[�CG<�y6K��gM�rg��ǟy�Eִy����h˜��ؗ˲X���l=�ڢZ�/����(կ^O�UU6�����&�6_�#yC}�p�y���lAH�ͯ��zF#�V�6_��}��)�v=J+�$��̤�G�Y�L�b���wS"�7�y^����Z�m���Y:ɛ���J<N_�Y=���U�f���,���y�Q2(J٩P!ͨ�i����1&F0&ૼn�?�x�T��h�Qzw�+����n�)�h��K��2����8g����⮥��A0
���1I�%����Q�Z����{��������w���?x����N�?�<d�S��۫�%a|4�j��z���k�Bak��k-�c�z�g��z���l>���֎s^,��5��/B�{����]]����Ý�ֳ���y{�_l�8g�k�ӫ�b���"+|��(��M��^[���J�P��_�..?������x�Z�$������E>��느�u���E~����{媘���f�e1ͷ�QZ,�����f��e�3Jٻb�^��4��۴���>��y��;��<렛{�l��ZfW
S# {�]��1��Q�����n[�,t�?����~�n�S�u#SL��n�^��������EC��q�/�y���FE�tpm������e&��oB���z9eY��������P��IK?����̦����w�N��;�;J?����;�/��5���M���rZ��q��]��C�dᖣ��F�nd���}���A5���M�5�.�:��/�_D�?�3����'�c�Z7��}��(OI),ۏi����{�<�w�������DZ?e����'q���eY]=���kj���߬������\qhrRn���l�o-��.���k��_���oD8��GA�P�r��|$��ȈPv~Y�:�[q?�sH�� <��C��ˬ�^N�[ v(��S��l�c�C����3���E5&5�VӪL�T��۔���oQrĈ��/���#[f�5�5"����[���t�vm�\��.0�nh����aڌWYM
^T�|\,��퓜�L�u����B�̌�C�r������ �������'�%�{��)�);�fV�]��g,�>�C �c2���p�4��}H���P��(�%j"�}�&�:�Oh\5I�l�氪��{�/�]�LB�l��2��I"��=��Y�|�>�֏n�������}�����~�[��'��O��
��:/�)�Wz�3��lo�.5�k�&����H[ji�����b������WWy}�5�֝Q�|f�����]�KjH5��}yNm�����g�ӷ�ǣ��>��'o��泏��<���G�g���>->�xQM�����%<�|����u�.��3���[�[r���ٝ;���]4E��6[����]����1���*�8}��n�w�������ݽ����|����}|qo|�~u����w|�i�i���Z�`z�ŧ����Q}�u��!���w �O���R9�)�~��g~߻w6��{���wd�o��/Z�uUS��݄l��I^�����>��[�U1�o�_��J��}��#�#�U�/��/?���i�7|CZT?(�2b~����c�W�c5'����EeFĿꇙ�0��T��{��W�2����/���O���YJj����K/���>��:'_l�
Other than URLs from this directory i.e. "excite.com/education" all URLs are giving correct source codes but these URLs are creating problems.
Anyone Please Help.
Thanks in advance.
It works for me.
private static String getWebPabeSource(String sURL) throws IOException {
URL url = new URL(sURL);
URLConnection urlCon = url.openConnection();
BufferedReader in = null;
if (urlCon.getHeaderField("Content-Encoding") != null
&& urlCon.getHeaderField("Content-Encoding").equals("gzip")) {
in = new BufferedReader(new InputStreamReader(new GZIPInputStream(
urlCon.getInputStream())));
} else {
in = new BufferedReader(new InputStreamReader(
urlCon.getInputStream()));
}
String inputLine;
StringBuilder sb = new StringBuilder();
while ((inputLine = in.readLine()) != null)
sb.append(inputLine);
in.close();
return sb.toString();
}
Try reading it this way:
private static String getUrlSource(String url) throws IOException {
URL url = new URL(url);
URLConnection urlConn = url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(
urlConn.getInputStream(), "UTF-8"));
String inputLine;
StringBuilder a = new StringBuilder();
while ((inputLine = in.readLine()) != null)
a.append(inputLine);
in.close();
return a.toString();
}
and set your encoding according to the web page - notice this line:
BufferedReader in = new BufferedReader(new InputStreamReader(
urlConn.getInputStream(), "UTF-8"));
First you have to uncompress the content using GZIPInputStream. Then put the uncompressed stream to Input Stream and read it using BufferedReader
Use Apache HTTP Client 4.1.1
Maven dependency
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.1.1</version>
</dependency>
Sample Code to parse gzip content.
package com.gzip.simple;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
public class GZIPFetcher {
public static void main(String[] args) {
try {
DefaultHttpClient httpClient = new DefaultHttpClient();
HttpGet getRequest = new HttpGet("http://excite.com/education");
getRequest.addHeader("accept", "application/json");
HttpResponse response = httpClient.execute(getRequest);
if (response.getStatusLine().getStatusCode() != 200) {
throw new RuntimeException("Failed : HTTP error code : "
+ response.getStatusLine().getStatusCode());
}
InputStream instream = response.getEntity().getContent();
// Check whether the content-encoding is gzip or not.
Header contentEncoding = response
.getFirstHeader("Content-Encoding");
if (contentEncoding != null
&& contentEncoding.getValue().equalsIgnoreCase("gzip")) {
instream = new GZIPInputStream(instream);
}
BufferedReader in = new BufferedReader(new InputStreamReader(
instream));
String content;
System.out.println("Output from Server .... \n");
while ((content = in.readLine()) != null)
System.out.println(content);
httpClient.getConnectionManager().shutdown();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

I want to view "source code" of the response generated from HTTP request in Java

My code can only return response status message or status code, but I need my code to return response in "view source" format i.e. in XML format :
import java.io.IOException;
import java.net.URL;
import java.net.HttpURLConnection;
public class API{
public static void main(String args[]) throws IOException
{
URL url = new URL("http://example.com");
HttpURLConnection http = (HttpURLConnection)url.openConnection();
String statusCode = http.getResponseMessage();
System.out.println(statusCode);
}
}
You have to get stream and then read it. Like this:
import java.io.IOException;
import java.net.URL;
import java.net.HttpURLConnection;
public class API{
public static void main(String args[]) throws IOException
{
URL url = new URL("http://example.com");
HttpURLConnection http = (HttpURLConnection)url.openConnection();
String statusCode = http.getResponseMessage();
System.out.println(statusCode);
//read the result from the server
BufferedReader rd = new BufferedReader(new InputStreamReader(http.getInputStream()));
StringBuilder sb = new StringBuilder();
String line = null;
while ((line = rd.readLine()) != null)
{
sb.append(line);
}
System.out.println(sb.toString());
}
}

Categories

Resources