I've been looking for easy way to add ID to HTML tags and spent few hours here jumping form one tool to another before I came up with this little test solving my issues. Hence my sprint backlog is almost empty I have some time to share. Feel free to make it clear and enjoy those whom are asked by QA to add the ID. Just change the tag, path and run :)
Had some issue here to make proper lambda due to lack of coffee today...
how to replace first occurence only, in single lambda? in files I had many lines having same tags.
private void replace(String path, String replace, String replaceWith) {
try (Stream<String> lines = Files.lines(Paths.get(path))) {
List<String> replaced = lines
.map(line -> line.replace(replace, replaceWith))
.collect(Collectors.toList());
Files.write(Paths.get(path), replaced);
} catch (IOException e) {
e.printStackTrace();
}
}
Above was replacing all lines as it found text to replace in next lines. Proper matcher with repleace that has autoincrement would be better to use within this method body isntead of preparing the replaceWith value before the call. If I'll ever need this again I'll add you another final version .
Final version to not waste more time (phase green):
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.runners.MockitoJUnitRunner;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
#RunWith(MockitoJUnitRunner.class)
public class RepalceInFilesWithAutoIncrement {
private int incremented = 100;
/**
* The tag you would like to add Id to
* */
private static final String tag = "label";
/**
* Regex to find the tag
* */
private static final Pattern TAG_REGEX = Pattern.compile("<" + tag + " (.+?)/>", Pattern.DOTALL);
private static final Pattern ID_REGEX = Pattern.compile("id=", Pattern.DOTALL);
#Test
public void replaceInFiles() throws IOException {
String nextId = " id=\"" + tag + "_%s\" ";
String path = "C:\\YourPath";
try (Stream<Path> paths = Files.walk(Paths.get(path))) {
paths.forEach(filePath -> {
if (Files.isRegularFile(filePath)) {
try {
List<String> foundInFiles = getTagValues(readFile(filePath.toAbsolutePath().toString()));
if (!foundInFiles.isEmpty()) {
for (String tagEl : foundInFiles) {
incremented++;
String id = String.format(nextId, incremented);
String replace = tagEl.split("\\r?\\n")[0];
replace = replace.replace("<" + tag, "<" + tag + id);
replace(filePath.toAbsolutePath().toString(), tagEl.split("\\r?\\n")[0], replace, false);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
});
}
System.out.println(String.format("Finished with (%s) changes", incremented - 100));
}
private String readFile(String path)
throws IOException {
byte[] encoded = Files.readAllBytes(Paths.get(path));
return new String(encoded, StandardCharsets.UTF_8);
}
private List<String> getTagValues(final String str) {
final List<String> tagValues = new ArrayList<>();
final Matcher matcher = TAG_REGEX.matcher(str);
while (matcher.find()) {
if (!ID_REGEX.matcher(matcher.group()).find())
tagValues.add(matcher.group());
}
return tagValues;
}
private void replace(String path, String replace, String replaceWith, boolean log) {
if (log) {
System.out.println("path = [" + path + "], replace = [" + replace + "], replaceWith = [" + replaceWith + "], log = [" + log + "]");
}
try (Stream<String> lines = Files.lines(Paths.get(path))) {
List<String> replaced = new ArrayList<>();
boolean alreadyReplaced = false;
for (String line : lines.collect(Collectors.toList())) {
if (line.contains(replace) && !alreadyReplaced) {
line = line.replace(replace, replaceWith);
alreadyReplaced = true;
}
replaced.add(line);
}
Files.write(Paths.get(path), replaced);
} catch (IOException e) {
e.printStackTrace();
}
}
}
Try it with Jsoup.
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupTest {
public static void main(String argv[]) {
String html = "<html><head><title>Try it with Jsoup</title></head>"
+ "<body><p>P first</p><p>P second</p><p>P third</p></body></html>";
Document doc = Jsoup.parse(html);
Elements ps = doc.select("p"); // The tag you would like to add Id to
int i = 12;
for(Element p : ps){
p.attr("id",String.valueOf(i));
i++;
}
System.out.println(doc.toString());
}
}
Related
All the above and then print out the line number of the misspelled word and the line. Ex 1: I am a fox that lives in a huse
All of this should be printed on the command line
I have used rapid jSpell checker for spell check
note:- generate your own API key
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SOTest
{
public static void main(String[] args) {
try {
//Load text file into List
List<String> list = Files.readAllLines(Paths.get("Words.txt"), StandardCharsets.UTF_8);
//Iterate List to scan the spell error
for (String line : list) {
for(String err:spellCheck(line)){
if(line.contains(err)) {
line = line.replace(err, "^^"+err+"^^");
}
}
System.out.println(line);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static List<String> spellCheck(String input) throws IOException {
List<String> results = new ArrayList<String>();
byte[] postData = new String("{\t\"language\": \"enUS\",\t\"fieldvalues\": \""+input+"\",\t\"config\": {\t\t\"forceUpperCase\": false,\t\t\"ignoreIrregularCaps\": false,\t\t\"ignoreFirstCaps\": true,\t\t\"ignoreNumbers\": true,\t\t\"ignoreUpper\": false,\t\t\"ignoreDouble\": false,\t\t\"ignoreWordsWithNumbers\": true\t}}").getBytes();
String request = "https://jspell-checker.p.rapidapi.com/check";
URL url = new URL(request);
HttpURLConnection conn= (HttpURLConnection) url.openConnection();
conn.setDoOutput( true );
conn.setRequestMethod( "POST" );
conn.setRequestProperty( "x-rapidapi-host", "jspell-checker.p.rapidapi.com");
conn.setRequestProperty( "x-rapidapi-key", "32efb09328msh3e3b62d34ac8cfcp1467a4jsnb3ef821b4b23");
conn.setRequestProperty( "content-type", "application/json");
conn.setRequestProperty( "accept", "application/json");
conn.setRequestProperty( "useQueryString", "true");
conn.setUseCaches( false );
try( DataOutputStream wr = new DataOutputStream( conn.getOutputStream())) {
wr.write( postData );
}
BufferedReader br = new BufferedReader(new InputStreamReader((conn.getInputStream())));
String strCurrentLine;
while ((strCurrentLine = br.readLine()) != null) {
final String regex = "\"word\":\"[a-zA-Z]*\"";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(strCurrentLine);
while (matcher.find()) {
results.add(matcher.group(0).split(":")[1].replace("\"", ""));
}
}
return results;
}
}
input inside Words.txt
this is example
thiss is example
my Dog is hudden under cor
output
this is example
^^thiss^^ is example
my Dog is ^^hudden^^ under ^^cor^^
This types of problem's solution is best to solve with hashTable. However, you need to do it this way, here is the solution without jSpell.
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
public class FinalProject {
private static List<String> word;
public static void main(String[] args) {
if(args.length == 0)
{
System.out.println("File name not specified");
System.exit(1);
}
SpellCheck(args[0]);
}
public static List<String> ScanFile(String file){
Path path = Paths.get(file);
try{
word = Files.readAllLines(path, StandardCharsets.UTF_8);
} catch(IOException e) {
}
return word;
}
public static void SpellCheck(String file)
{
int lineNumber;
List<String> word_list = ScanFile("..\\test\\dict.txt");
List<String> check_file = ScanFile(file);
String[] lineWords;
Boolean correctSpelled;
String currWord, dictWord ;
for(int i = 0; i < check_file.size(); i++)
{
lineWords = check_file.get(i).trim().replaceAll("[^a-zA-Z ]","").split(" ");
lineNumber = i+1;
for (int j = 0; j < lineWords.length; j++)
{
currWord = lineWords[j].toLowerCase();
correctSpelled = false;
for(int k = 0; k < word_list.size() && !correctSpelled; k++)
{
dictWord = word_list.get(k).trim().toLowerCase();
if(currWord.equals(dictWord))
{
correctSpelled = true;
}
}
if(!correctSpelled)
{
System.out.println("Error found on line number " + lineNumber);
System.out.println(check_file.toString() + " -> " + "\"" + currWord + "\"");
}
}// every word for loop
}// every line for loop
}//end spellChecker
}// end class
inputFile.txt contains
I am a fox that lives in a huse
args[0] is your dictionary.txt file
output:
Error found on line number 1
[I am a fox that lives in a huse] -> "huse"
I would try for that underline ^^^ thing. However, I have to go out. The main problem is solved. Try it. If it works, hit me a thumbs up. :)
Could somebody point me, using Twilio Java API, NOT! REST requests, how can I get the recorded file (.wav) of a concrete call.
I have read all the related articles to recording (https://support.twilio.com/hc/en-us/sections/205104748-Recording), but none of them shows how to do that with Java API.
I use this code, as an starting point, assuming the CALL_SID is known:
import com.twilio.Twilio;
import com.twilio.base.ResourceSet;
import com.twilio.rest.api.v2010.account.Recording;
import com.twilio.rest.api.v2010.account.RecordingReader;
public class DeleteRecordings1 {
private static final String ACCOUNT_SID = "ACXXXXXXXXXXXXXXXXX";
private static final String AUTH_TOKEN = "999aa999aaa999aaaa999";
private static final String CALL_SID = "CA83837718818gdgdg";
public static void main(String[] args) {
try {
Twilio.init(ACCOUNT_SID, AUTH_TOKEN);
RecordingReader recordingReader = Recording.reader();
recordingReader.setCallSid(CALL_SID);
ResourceSet<Recording> recordings = recordingReader.read();
String recordingSid;
for (Recording recording: recordings) {
recordingSid = recording.getSid();
//HERE! I want to restore the .wav file associated with that RECORD_SID ?¿
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
I put the final code, in case it can help somebody:
import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import com.twilio.Twilio;
import com.twilio.base.ResourceSet;
import com.twilio.rest.api.v2010.account.Recording;
import com.twilio.rest.api.v2010.account.RecordingReader;
public class GetCallRecordings {
private static final String ACCOUNT_SID = "ACXXXXXXXXXXXXXXXXX";
private static final String AUTH_TOKEN = "999aa999aaa999aaaa999";
private static final String CALL_SID = "CA83837718818gdgdg";
private static final String TWILIO_RES_URL = "https://api.twilio.com/2010-04-01/Accounts";
private static final String REC_EXT = ".mp3";
private static final String RUTA_RECS = "C:/recursos/grabaciones/";
public static void main(String[] args) {
try {
Twilio.init(ACCOUNT_SID, AUTH_TOKEN);
RecordingReader recordingReader = Recording.reader();
recordingReader.setCallSid(CALL_SID);
ResourceSet<Recording> recordings = recordingReader.read();
String recordingSid;
String urlGrabacion;
String locGrabacion;
InputStream in;
for (Recording recording : recordings) {
recordingSid = recording.getSid();
urlGrabacion = TWILIO_RES_URL + "/" + ACCOUNT_SID + "/Recordings/" + recordingSid + REC_EXT;
locGrabacion = RUTA_RECS + CALL_SID + "_" + recordingSid + REC_EXT;
System.out.println("Recuperando grabacion " + recordingSid);
System.out.println("Ubicacion remota " + urlGrabacion);
if (!new File(RUTA_RECS).exists()) {
new File(RUTA_RECS).mkdirs();
}
in = new URL(urlGrabacion).openStream();
Files.copy(in, Paths.get(locGrabacion), StandardCopyOption.REPLACE_EXISTING);
System.out.println("Ubicacion local " + locGrabacion);
in.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
Once you know the recordingSid for example RE557ce644e5ab84fa21cc21112e22c485
you can get a .wav file at https://api.twilio.com/2010-04-01/Accounts/ACXXXXX.../Recordings/RE557ce644e5ab84fa21cc21112e22c485.wav
You can get a .mp3 file at https://api.twilio.com/2010-04-01/Accounts/ACXXXXX.../Recordings/RE557ce644e5ab84fa21cc21112e22c485.mp3
where ACXXXXX... is your Twilio account SID (ACCOUNT_SID)
#JasonPlutext,
Hi Jason! I tried the above code but it just replaces an totally the image deleting the whole template.
I would like to just replace/add a particular relationship of the image ,say
<Relationship Id="rId8" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="../media/image10.png"/>
in place of rId8 i would like to replace rId7 image.
My Source Code:
public static void main(String[] args) throws Exception {
String inputfilepath = "C:\\Users\\saranyac\\QUERIES\\Estimation\\PPT-PSR\\PSR_Dev0ps\\PSRAutomationTemplate.pptx";
PresentationMLPackage presentationMLPackage = (PresentationMLPackage)OpcPackage.load(new java.io.File(inputfilepath));
MainPresentationPart pp = presentationMLPackage.getMainPresentationPart();
SlidePart slidePart = presentationMLPackage.getMainPresentationPart().getSlide(0);
SlideLayoutPart layoutPart = slidePart.getSlideLayoutPart();
System.out.println("SlidePart Name:::::"+slidePart.getPartName().getName());
String layoutName = layoutPart.getJaxbElement().getCSld().getName();
System.out.println("layout: " + layoutPart.getPartName().getName() + " with cSld/#name='" + layoutName + "'");
System.out.println("Master: " + layoutPart.getSlideMasterPart().getPartName().getName());
System.out.println("layoutPart.getContents()::::::::s: " + layoutPart.getContents());
//layoutPart.setContents( (SldLayout)XmlUtils.unmarshalString(SAMPLE_PICTURE, Context.jcPML));
// Add image part
File file = new File("C:\\Users\\saranyac\\PPT-PSR\\PSR_Dev0ps\\ppt\\media\\image10.png" );
BinaryPartAbstractImage imagePart
= BinaryPartAbstractImage.createImagePart(presentationMLPackage, slidePart, file);
Relationship rel = pp.getRelationshipsPart().getRelationshipByID("rId8");
System.out.println("Relationship:::::::s: " +imagePart.getSourceRelationship().getId());
// pp.removeSlide(rel);
java.util.HashMap<String, String>mappings = new java.util.HashMap<String, String>();
mappings.put("rId8", imagePart.getSourceRelationship().getId());
String outputfilepath = "C:\\Work\\24Jan2018_CheckOut\\PPT-TRAILS\\Success.pptx";
//presentationMLPackage.save(new java.io.File(outputfilepath));
SaveToZipFile saver = new SaveToZipFile(presentationMLPackage);
saver.save(outputfilepath);
System.out.println("\n\n done .. saved " + outputfilepath);
}
Please help me how to replace an image in the generated PPT.
With Regards,
Saranya
See https://github.com/plutext/docx4j/blob/master/src/samples/pptx4j/org/pptx4j/samples/TemplateReplaceSimple.java (just added):
package org.pptx4j.samples;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import javax.xml.bind.JAXBException;
import org.apache.commons.io.FileUtils;
import org.docx4j.TraversalUtil;
import org.docx4j.TraversalUtil.CallbackImpl;
import org.docx4j.dml.CTBlip;
import org.docx4j.dml.CTBlipFillProperties;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.OpcPackage;
import org.docx4j.openpackaging.packages.PresentationMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.PresentationML.SlidePart;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
import org.pptx4j.Pptx4jException;
/**
* Example of how to replace text and images in a Pptx.
*
* Text is replaced using the familiar VariableReplace approach.
*
* Images are replaced by replacing their byte content.
*
* #author jharrop
*
*/
public class TemplateReplaceSimple {
public static void main(String[] args) throws Docx4JException, Pptx4jException, JAXBException, IOException {
// Input file
String inputfilepath = System.getProperty("user.dir") + "/sample-docs/pptx/image.pptx";
// String replacements
HashMap<String, String> mappings = new HashMap<String, String>();
mappings.put("colour", "green");
// Image replacements
List<ImageReplacementDetails> imageReplacements = new ArrayList<ImageReplacementDetails>();
ImageReplacementDetails example1 = new ImageReplacementDetails();
example1.slideIndex = 0;
example1.imageRelId = "rId2";
example1.replacementImageBytes = FileUtils.readFileToByteArray(new File("test.png"));
imageReplacements.add(example1);
PresentationMLPackage presentationMLPackage =
(PresentationMLPackage)OpcPackage.load(new java.io.File(inputfilepath));
// First, the text replacements
List<SlidePart> slideParts=
presentationMLPackage.getMainPresentationPart().getSlideParts();
for (SlidePart slidePart : slideParts) {
slidePart.variableReplace(mappings);
}
// Second, the image replacements.
// We have a design choice here.
// Either we can replace text placeholders with images,
// or we can replace existing images with new images, but keep the XML specifying size etc
// Here I opt for the latter, so what we need is the relId and image bytes.
for( ImageReplacementDetails ird : imageReplacements) {
// its a bit inefficient to potentially traverse a single slide
// multiple times, but I've done it this way to keep this example simple
SlidePart slidePart=
presentationMLPackage.getMainPresentationPart().getSlide(ird.slideIndex);
SlidePicFinder traverser = new SlidePicFinder();
new TraversalUtil(slidePart.getJaxbElement().getCSld().getSpTree().getSpOrGrpSpOrGraphicFrame(), traverser);
for(org.pptx4j.pml.Pic pic : traverser.pics) {
CTBlipFillProperties blipFill = pic.getBlipFill();
if (blipFill!=null) {
CTBlip blip = blipFill.getBlip();
if (blip.getEmbed()!=null) {
String relId = blip.getEmbed();
// is this the one we want?
if (relId.equals(ird.imageRelId)) {
Part part = slidePart.getRelationshipsPart().getPart(relId);
try {
BinaryPartAbstractImage imagePart = (BinaryPartAbstractImage)part;
// you'll need to ensure that you replace like with like,
// ie png for png, not eg jpeg for png!
imagePart.setBinaryData(ird.replacementImageBytes);
} catch (ClassCastException cce) {
System.out.println(part.getClass().getName());
}
} else {
System.out.println(relId + " isn't a match for this replacement. ");
}
} else {
System.out.println("No a:blip/#r:embed");
}
}
}
}
System.out.println("\n\n saving .. \n\n");
String outputfilepath = System.getProperty("user.dir") + "/OUT_VariableReplace.pptx";
presentationMLPackage.save(new java.io.File(outputfilepath));
System.out.println("\n\n done .. \n\n");
}
static class ImageReplacementDetails {
int slideIndex;
String imageRelId;
byte[] replacementImageBytes;
}
static class SlidePicFinder extends CallbackImpl {
List<org.pptx4j.pml.Pic> pics = new ArrayList<org.pptx4j.pml.Pic>();
public List<Object> apply(Object o) {
if (o instanceof org.pptx4j.pml.Pic) {
pics.add((org.pptx4j.pml.Pic) o);
System.out.println("added pic");
}
return null;
}
}
}
I am working on a Get object as retrieved from a table in Habse. I want to dynamically retrieve all column values related to that get since I don't know the exact name of column families
val result1 = hTable.get(g)
if (!result1.isEmpty) {
//binaryEpisodes = result1.getValue(Bytes.toBytes("episodes"),Bytes.toBytes("episodes"))
//instead of above retrieve all values dynamically
}
Simple way :
get rawcells and knowing CF , columns information.
You have to do something like below example
public static void printResult(Result result, Logger logger) {
logger.info("Row: ");
for (Cell cell : result.rawCells()) {
byte[] family = CellUtil.cloneFamily(cell);
byte[] column = CellUtil.cloneQualifier(cell);
byte[] value = CellUtil.cloneValue(cell);
logger.info("\t" + Bytes.toString(family) + ":" + Bytes.toString(column) + " = " + Bytes.toString(value));
}
}
Hbase Admin way : Hbase client API was exposed by HbaseAdmin class like below...
Client would be like
package mytest;
import com.usertest.*;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ListHbaseTablesAndColumns {
public static void main(String[] args) {
try {
HbaseMetaData hbaseMetaData =new HbaseMetaData();
for(String hbaseTable:hbaseMetaData .getTableNames(".*yourtables.*")){
for (String column : hbaseMetaData .getColumns(hbaseTable, 10000)) {
System.out.println(hbaseTable + "," + column);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Use below class to Get HbaseMetaData..
package com.usertest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.PageFilter;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
public class HbaseMetaData {
private HBaseAdmin hBaseAdmin;
private Configuration hBaseConfiguration;
public HbaseMetaData () throws IOException {
this.hBaseConfiguration = HBaseConfiguration.create();
this.hBaseAdmin = new HBaseAdmin(hBaseConfiguration);
}
/** get all Table names **/
public List<String> getTableNames(String regex) throws IOException {
Pattern pattern=Pattern.compile(regex);
List<String> tableList = new ArrayList<String>();
TableName[] tableNames=hBaseAdmin.listTableNames();
for (TableName tableName:tableNames){
if(pattern.matcher(tableName.toString()).find()){
tableList.add(tableName.toString());
}
}
return tableList;
}
/** Get all columns **/
public Set<String> getColumns(String hbaseTable) throws IOException {
return getColumns(hbaseTable, 10000);
}
/** get all columns from the table **/
public Set<String> getColumns(String hbaseTable, int limitScan) throws IOException {
Set<String> columnList = new TreeSet<String>();
HTable hTable=new HTable(hBaseConfiguration, hbaseTable);
Scan scan=new Scan();
scan.setFilter(new PageFilter(limitScan));
ResultScanner results = hTable.getScanner(scan);
for(Result result:results){
for(KeyValue keyValue:result.list()){
columnList.add(
new String(keyValue.getFamily()) + ":" +
new String(keyValue.getQualifier())
);
}
}
return columnList;
}
}
I'm quite new to Java and I'm facing a situation I can't solve. I have some html code and I'm trying to run a regular expression to store all matches into an array. Here's my code:
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class RegexMatch{
boolean foundMatch = false;
public String[] arrayResults;
public String[] TestRegularExpression(String sourceCode, String pattern){
try{
Pattern regex = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
Matcher regexMatcher = regex.matcher(sourceCode);
while (regexMatcher.find()) {
arrayResults[matches] = regexMatcher.group();
matches ++;
}
} catch (PatternSyntaxException ex) {
// Exception occurred
}
return arrayResults;
}
}
I'm passing a string containing html code and the regular expression pattern to extract all meta tags and store them into the array. Here's how I instantiate the method:
RegexMatch regex = new RegexMatch();
regex.TestRegularExpression(sourceCode, "<meta.*?>");
String[] META_TAGS = regex.arrayResults;
Any hint?
Thanks!
Firstly, parsing HTML with regular expressions is a bad idea. There are alternatives which will convert the HTML into a DOM etc - you should look into those.
Assuming you still want the "match multiple results" idea though, it seems to me that a List<E> of some form would be more useful, so you don't need to know the size up-front. You can also build that in the method itself, rather than having state. For example:
import java.util.*;
import java.util.regex.*;
public class Test
{
public static void main(String[] args)
throws PatternSyntaxException
{
// Want to get x10 and x5 from this
String text = "x10 y5 x5 xyz";
String pattern = "x\\d+";
List<String> matches = getAllMatches(text, pattern);
for (String match : matches) {
System.out.println(match);
}
}
public static List<String> getAllMatches(String text, String pattern)
throws PatternSyntaxException
{
Pattern regex = Pattern.compile(pattern);
List<String> results = new ArrayList<String>();
Matcher regexMatcher = regex.matcher(text);
while (regexMatcher.find()) {
results.add(regexMatcher.group());
}
return results;
}
}
It's possible that there's something similar to this within the Matcher class itself, but I can't immediately see it...
With Jsoup, you could do something as simple as...
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class GetMeta {
private static final String META_QUERY = "meta";
public static List<String> parseForMeta(String htmlText) {
Document jsDocument = Jsoup.parse(htmlText);
Elements metaElements = jsDocument.select(META_QUERY);
List<String> metaList = new ArrayList<String>();
for (Element element : metaElements) {
metaList.add(element.toString());
}
return metaList;
}
}
For example:
import java.io.IOException;
import java.net.*;
import java.util.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class GetMeta {
private static final String META_QUERY = "meta";
private static final String MAIN_URL = "http://www.yahoo.com";
public static void main(String[] args) {
try {
Scanner scan = new Scanner(new URL(MAIN_URL).openStream());
StringBuilder sb = new StringBuilder();
while (scan.hasNextLine()) {
sb.append(scan.nextLine() + "\n");
}
List<String> metaList = parseForMeta(sb.toString());
for (String metaStr : metaList) {
System.out.println(metaStr);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static List<String> parseForMeta(String htmlText) {
Document jsDocument = Jsoup.parse(htmlText);
Elements metaElements = jsDocument.select(META_QUERY);
List<String> metaList = new ArrayList<String>();
for (Element element : metaElements) {
metaList.add(element.toString());
}
return metaList;
}
}