PDBBox number split when setting the setSortByPosition to true - java

I am trying to extract the text from the PDF document, I have inherited the PDFTextStripper as my use-case requied me to extract the text with more information not only the text part of it.
public class PDFBoxToTextBox extends PDFTextStripper {
private StringBuilder notHorizontalOriented;
public StringBuilder getNotHorizontalOriented() {
return notHorizontalOriented;
}
public void setNotHorizontalOriented(StringBuilder notHorizontalOriented) {
this.notHorizontalOriented = notHorizontalOriented;
}
public PDFBoxToTextBox() throws IOException {
super();
this.notHorizontalOriented = new StringBuilder();
// TODO Auto-generated constructor stub
}
public static String getTextBloks(InputStream in, Integer pageNum) {
try {
byte[] targetArray = IOUtils.toByteArray(in);
PDDocument document;
document = Loader.loadPDF(targetArray);
PDPage doc = document.getPage(pageNum - 1);
String page_height = String.valueOf(doc.getMediaBox().getHeight());
String page_width = String.valueOf(doc.getMediaBox().getWidth());
// Instantiate PDFTextStripper class
PDFBoxToTextBox pdfStripper = new PDFBoxToTextBox();
pdfStripper.setSortByPosition(true);
//pdfStripper.setShouldSeparateByBeads(false);
pdfStripper.setDropThreshold(0.5f);
pdfStripper.setIndentThreshold(.5f);
pdfStripper.setLineSeparator("\n");
pdfStripper.setWordSeparator("\t");
System.out.println("Line seperator ::" + pdfStripper.getWordSeparator() + "::");
// Retrieving text from PDF document
pdfStripper.setStartPage(pageNum);
pdfStripper.setEndPage(pageNum);
// System.out.println(pdfStripper.getTextMatrix());
String text = pdfStripper.getText(document);
System.out.println(text);
List tBlocks = new ArrayList<String>();
for (String w : text.split("\\|")) {
tBlocks.add(w);
// System.out.println(w);
}
LineCatcher lineCatcher = new LineCatcher(doc);
for(Map line :lineCatcher.getAllHorizontalPath(doc)){
ObjectMapper objectMapper = new ObjectMapper();
String json = objectMapper.writeValueAsString(line);
tBlocks.add(json);
}
Map<String, Object> result = new HashMap<String, Object>();
// Closing the document
document.close();
result.put("page_width", page_width);
result.put("page_height", page_height);
result.put("text_blocks", tBlocks);
ObjectMapper objectMapper = new ObjectMapper();
String json = objectMapper.writeValueAsString(result);
System.out.println("NOT horizontal tex : "+ pdfStripper.getNotHorizontalOriented().toString());
System.out.println("JSON to Return");
System.out.println(json);
return json;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return "";
}
private int getTextOrientation(TextPosition text) {
Matrix m = text.getTextMatrix().clone();
m.concatenate(text.getFont().getFontMatrix());
int angle = (int) Math.round(Math.toDegrees(Math.atan2(m.getShearY(), m.getScaleY())));
return angle;
}
#Override
public void setDropThreshold(float dropThresholdValue) {
// TODO Auto-generated method stub
super.setDropThreshold(dropThresholdValue);
}
#Override
public void setIndentThreshold(float indentThresholdValue) {
// TODO Auto-generated method stub
super.setIndentThreshold(indentThresholdValue);
}
#Override
protected void writeWordSeparator() throws IOException {
output.write(" | ");
}
#Override
protected void writeLineSeparator() throws IOException {
output.write(" | ");
}
#Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
// ("TextBlock", ["x", "y", "w", "h", "text", "bold", "font_style","data_type"])
Map<String, String> textBlock = new HashMap<String, String>();
String prevBaseFont = "";
String x = String.valueOf(textPositions.get(0).getXDirAdj());
String y = String.valueOf(textPositions.get(0).getYDirAdj());
String w = String.valueOf(textPositions.get(0).getWidth());
String h = String.valueOf(textPositions.get(0).getHeightDir());
int rotation = 0;
Map<Float, List> charHeightMap = new HashMap<Float, List>();
for (TextPosition position : textPositions) {
if (charHeightMap.containsKey(position.getHeightDir())) {
charHeightMap.get(position.getHeightDir()).add(position.getUnicode());
} else {
List<String> chars = new ArrayList<String>();
chars.add(position.getUnicode());
charHeightMap.put(position.getHeightDir(), chars);
}
}
Float maxHeight = null;
int maxSize = 0;
for (Float ht : charHeightMap.keySet()) {
if (charHeightMap.get(ht).size() > maxSize) {
maxHeight = ht;
}
}
StringBuilder builder = new StringBuilder();
StringBuilder fbuilder = new StringBuilder();
for (TextPosition position : textPositions) {
// if (position.getHeightDir() != maxHeight) {
// continue;
// }
position.getRotation();
String baseFont = position.getFont().getFontDescriptor().getFontName();
if (baseFont != null && !baseFont.equals(prevBaseFont)) {
// fbuilder.append('[').append(baseFont).append(']');
fbuilder.append(baseFont);
prevBaseFont = baseFont;
}
builder.append(position.getUnicode());
}
// System.out.println(rotation);
// System.out.println(textPositions.get(0));
Float wid = textPositions.get(textPositions.size() - 1).getXDirAdj() - textPositions.get(0).getXDirAdj();
String font = fbuilder.toString();
textBlock.put("text", builder.toString());
textBlock.put("font", font);
Boolean bold = true ? font.toLowerCase().contains("bold") : false;
textBlock.put("bold", bold.toString());
textBlock.put("x", x);
textBlock.put("y", y);
textBlock.put("w", wid.toString());
textBlock.put("h", h);
ObjectMapper objectMapper = new ObjectMapper();
String json = objectMapper.writeValueAsString(textBlock);
writeString(json);
}
protected void processTextPosition(TextPosition text) {
java.awt.Composite com;
Color col;
// System.out.println(this.getGraphicsState().getTextState().getRenderingMode());
System.out.println(text);
System.out.println(this.getTextOrientation(text));
switch (this.getGraphicsState().getTextState().getRenderingMode()) {
case FILL:
if (this.getTextOrientation(text) == 0 || this.getTextOrientation(text) == 90) {
super.processTextPosition(text);
} else {
this.notHorizontalOriented.append(text.getUnicode());
}
break;
case STROKE:
if (this.getTextOrientation(text) == 0 || this.getTextOrientation(text) == 90) {
super.processTextPosition(text);
} else {
System.out.println(text.getUnicode() + ", X translate : "
+ this.getGraphicsState().getCurrentTransformationMatrix().getShearX() + ", Y translate : "
+ this.getGraphicsState().getCurrentTransformationMatrix().getTranslateY() + ", angle: "
+ this.getTextOrientation(text));
this.notHorizontalOriented.append(text.getUnicode());
}
break;
case NEITHER:
System.out.println(this.getGraphicsState().getNonStrokingJavaComposite().toString());
break;
default:
System.out.println(this.getGraphicsState().getNonStrokingJavaComposite().toString());
System.out.println(this.getGraphicsState().getNonStrokingColor().getColorSpace().getName());
}
}
}
Unfortunately I cannot upload the PDF file, but I have included the image(cropped) to understand the text layout and associated values.
There are two issues that I am facing now:
Some of the numerical value is getting split, in the image value 2,020,735 is getting split as 2 and ,020,735 two separate text blocks. When I set the sortByPosition to false I am getting the correct numerical values but, I need the sortByPosition set to true
The orientation angle is 90 in this case for the text in PDF which is surprising, as all the text is horizontal as displayed in the image above
I am not sure when I am going wrong.

Related

Lucene 6.1 Custom Tokenizer and Analyzer

I'm asking for some help with Lucene 6.1 API.
I tried to extend Lucene's Tokenizer and Analyzer, but I don't understand all guides. In all tutorials, User's Tokenizer overrides the increment. In constructor they have Reader class and in User's Analyzer class they override createComponents method. But in Lucene it has only 1 String argument, so how can I add Reader to my Analyzer?
My code:
public class ChemTokenizer extends Tokenizer{
protected CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
protected String stringToTokenize;
protected int position = 0;
protected List<int[]> chemicals = new ArrayList<>();
#Override
public boolean incrementToken() throws IOException {
// Clear anything that is already saved in this.charTermAttribute
this.charTermAttribute.setEmpty();
// Get the position of the next symbol
int nextIndex = -1;
Pattern p = Pattern.compile("[^A-zА-я]");
Matcher m = p.matcher(stringToTokenize.substring(position));
nextIndex = m.start();
// Did we lose chemicals?
for (int[] pair: chemicals) {
if (pair[0] < nextIndex && pair[1] > nextIndex) {
//We are in the chemical name
if (position == pair[0]) {
nextIndex = pair[1];
}
else {
nextIndex = pair[0];
}
}
}
// Next separator was found
if (nextIndex != -1) {
String nextToken = stringToTokenize.substring(position, nextIndex);
charTermAttribute.append(nextToken);
position = nextIndex + 1;
return true;
}
// Last part of text
else if (position < stringToTokenize.length()) {
String nextToken = stringToTokenize.substring(position);
charTermAttribute.append(nextToken);
position = stringToTokenize.length();
return true;
}
else {
return false;
}
}
public ChemTokenizer(Reader reader,List<String> additionalKeywords) {
int numChars;
char[] buffer = new char[1024];
StringBuilder stringBuilder = new StringBuilder();
try {
while ((numChars =
reader.read(buffer, 0, buffer.length)) != -1) {
stringBuilder.append(buffer, 0, numChars);
}
}
catch (IOException e) {
throw new RuntimeException(e);
}
stringToTokenize = stringBuilder.toString();
//Checking for keywords
//Doesnt work properly if text has chemical synonyms
for (String keyword: additionalKeywords) {
int[] tmp = new int[2];
//Start of keyword
tmp[0] = stringToTokenize.indexOf(keyword);
tmp[1] = tmp[0] + keyword.length() - 1;
chemicals.add(tmp);
}
}
/* Reset the stored position for this object when reset() is called.
*/
#Override
public void reset() throws IOException {
super.reset();
position = 0;
chemicals = new ArrayList<>();
}
}
And code for Analyzer:
public class ChemAnalyzer extends Analyzer{
List<String> additionalKeywords;
public ChemAnalyzer(List<String> ad) {
additionalKeywords = ad;
}
#Override
protected TokenStreamComponents createComponents(String s, Reader reader) {
Tokenizer tokenizer = new ChemTokenizer(reader,additionalKeywords);
TokenStream filter = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
}
The problem is that this code doesn't work with Lucene 6
This is what I found in github search, guess you have to create a new tokenizer with out read.
#Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new WhitespaceTokenizer()); }

Get metadata from shoutcast stream

I'm developing a radio app with multiple radios, the stream is playing fine. But I'm struggling to show artist and music playing at the moment.
This is the class I'm using to get metadata from shoutcast stream:
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IcyStreamMeta<Message> {
protected URL streamUrl;
private Map<String, String> metadata;
private boolean isError;
public IcyStreamMeta(URL streamUrl) {
setStreamUrl(streamUrl);
isError = false;
}
/**
* Get artist using stream's title
*
* #return String
* #throws IOException
*/
public String getArtist() throws IOException {
Map<String, String> data = getMetadata();
if (!data.containsKey("StreamTitle"))
return "";
String streamTitle = data.get("StreamTitle");
String title = streamTitle.substring(0, streamTitle.indexOf("-"));
return title.trim();
}
/**
* Get title using stream's title
*
* #return String
* #throws IOException
*/
public String getTitle() throws IOException {
Map<String, String> data = getMetadata();
if (!data.containsKey("StreamTitle"))
return "";
String streamTitle = data.get("StreamTitle");
String artist = streamTitle.substring(streamTitle.indexOf("-")+1);
return artist.trim();
}
public Map<String, String> getMetadata() throws IOException {
if (metadata == null) {
refreshMeta();
}
return metadata;
}
public void refreshMeta() throws IOException {
retreiveMetadata();
}
private void retreiveMetadata() throws IOException {
URLConnection con = streamUrl.openConnection();
con.setRequestProperty("Icy-MetaData", "1");
con.setRequestProperty("Connection", "close");
con.setRequestProperty("Accept", null);
con.connect();
int metaDataOffset = 0;
Map<String, List<String>> headers = con.getHeaderFields();
InputStream stream = con.getInputStream();
if (headers.containsKey("icy-metaint")) {
// Headers are sent via HTTP
metaDataOffset = Integer.parseInt(headers.get("icy-metaint").get(0));
} else {
// Headers are sent within a stream
StringBuilder strHeaders = new StringBuilder();
char c;
while ((c = (char)stream.read()) != -1) {
strHeaders.append(c);
if (strHeaders.length() > 5 && (strHeaders.substring((strHeaders.length() - 4), strHeaders.length()).equals("\r\n\r\n"))) {
// end of headers
break;
}
}
// Match headers to get metadata offset within a stream
Pattern p = Pattern.compile("\\r\\n(icy-metaint):\\s*(.*)\\r\\n");
Matcher m = p.matcher(strHeaders.toString());
if (m.find()) {
metaDataOffset = Integer.parseInt(m.group(2));
}
}
// In case no data was sent
if (metaDataOffset == 0) {
isError = true;
return;
}
// Read metadata
int b;
int count = 0;
int metaDataLength = 4080; // 4080 is the max length
boolean inData = false;
StringBuilder metaData = new StringBuilder();
// Stream position should be either at the beginning or right after headers
while ((b = stream.read()) != -1) {
count++;
// Length of the metadata
if (count == metaDataOffset + 1) {
metaDataLength = b * 16;
}
if (count > metaDataOffset + 1 && count < (metaDataOffset + metaDataLength)) {
inData = true;
} else {
inData = false;
}
if (inData) {
if (b != 0) {
metaData.append((char)b);
}
}
if (count > (metaDataOffset + metaDataLength)) {
break;
}
}
// Set the data
metadata = IcyStreamMeta.parseMetadata(metaData.toString());
// Close
stream.close();
}
public boolean isError() {
return isError;
}
public URL getStreamUrl() {
return streamUrl;
}
public void setStreamUrl(URL streamUrl) {
this.metadata = null;
this.streamUrl = streamUrl;
this.isError = false;
}
public static Map<String, String> parseMetadata(String metaString) {
Map<String, String> metadata = new HashMap();
String[] metaParts = metaString.split(";");
Pattern p = Pattern.compile("^([a-zA-Z]+)=\\'([^\\']*)\\'$");
Matcher m;
for (int i = 0; i < metaParts.length; i++) {
m = p.matcher(metaParts[i]);
if (m.find()) {
metadata.put(m.group(1), m.group(2));
}
}
return metadata;
}
}
And the method on MainActivity to get the metadata every 10 seconds
private void getMeta()
{
Timer timer = new Timer();
timer.schedule(new TimerTask() {
public void run() {
try {
IcyStreamMeta icy = new IcyStreamMeta(new URL(RadiophonyService.getRadioURL()));
final String data = icy.getArtist() + " - " + icy.getTitle();
final TextView meta = (TextView) findViewById(R.id.now_playing);
runOnUiThread(new Runnable() {
public void run() {
meta.setText(data);
}
});
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}, 0, 10000);
}
Initially, when I select one station, it plays but does not show metadata and when I select another station the app crashes with this runtime error:
E/AndroidRuntime: FATAL EXCEPTION: Timer-0
Process: com.example.app, PID: 23597
java.lang.StringIndexOutOfBoundsException: length=0; regionStart=0; regionLength=-1
at java.lang.String.startEndAndLength(String.java:504)
at java.lang.String.substring(String.java:1333)
at com.example.app.utilities.IcyStreamMeta.getArtist(IcyStreamMeta.java:41)
at com.example.app.activities.MainActivity$8.run(MainActivity.java:306)
at java.util.Timer$TimerImpl.run(Timer.java:284)
I'm stuck at this for days, and I tried other solutions but nothing works!
Looks like this line
String title = streamTitle.substring(0, streamTitle.indexOf("-"));
is the culprit.
If streamTitle does not have a dash in its title, indexOf() will return a -1, and substring() is choking because you can't have an end index less than the start index.
Maybe you need something like this:
int pos = streamTitle.indexOf("-");
String title = (pos == -1) ? streamTitle : streamTitle.substring(0, pos);

OrientDB slow when browsing cluster

Well what i am trying to achieve is to save pairs of words in a sentence and if the word is already there , i am trying to save a list of words against one.
To save the pairing as there could many millions as my data set file is very large , i opted for orientdb. I dont know if i am approaching it correctly but orientdb is very slow. After 8 hours of running it has only made pairs for 12000 sentences.
As far as i have checked the major slowdown was in browsing cluster.
Attached is my code, please if ant one can give any pointers over my approach.
public static void main(String[] args) {
// TODO Auto-generated method stub
Main m = new Main();
m.openDatabase();
m.readFile("train_v2.txt");
m.closeDatabase();
}
}
class Main {
ODatabaseDocumentTx db;
Map<String, Object> index;
List<Object> list = null;
String pairing[];
ODocument doc;
Main() {
}
public void closeDatabase() {
if (!db.isClosed()) {
db.close();
}
}
void openDatabase() {
db = new ODatabaseDocumentTx("local:/databases/model").open("admin",
"admin");
doc = new ODocument("final");
}
public void readFile(String filename) {
InputStream ins = null; // raw byte-stream
Reader r = null; // cooked reader
int i = 1;
BufferedReader br = null; // buffered for readLine()
try {
String s;
ins = new FileInputStream(filename);
r = new InputStreamReader(ins, "UTF-8"); // leave charset out
// for
// default
br = new BufferedReader(r);
while ((s = br.readLine()) != null) {
System.out.println("" + i);
createTermPair(s.replaceAll("[^\\w ]", "").trim());
i++;
}
} catch (Exception e) {
System.err.println(e.getMessage()); // handle exception
} finally {
closeDatabase();
if (br != null) {
try {
br.close();
} catch (Throwable t) { /* ensure close happens */
}
}
if (r != null) {
try {
r.close();
} catch (Throwable t) { /* ensure close happens */
}
}
if (ins != null) {
try {
ins.close();
} catch (Throwable t) { /* ensure close happens */
}
}
}
}
private void createTermPair(String phrase) {
phrase = phrase + " .";
String[] word = phrase.split(" ");
for (int i = 0; i < word.length - 1; i++) {
if (!word[i].trim().equalsIgnoreCase("")
&& !word[i + 1].trim().equalsIgnoreCase("")) {
String wordFirst = word[i].toLowerCase().trim();
String wordSecond = word[i + 1].toLowerCase().trim();
String pair = wordFirst + " " + wordSecond;
checkForPairAndWrite(pair);
}
}
}
private void checkForPairAndWrite(String pair) {
try {
pairing = pair.trim().split(" ");
if (!pairing[1].equalsIgnoreCase(" ")) {
index = new HashMap<String, Object>();
for (ODocument docr : db.browseCluster("final")) {
list = docr.field(pairing[0]);
}
if (list == null) {
list = new ArrayList<>();
}
list.add("" + pairing[1]);
if (list.size() >= 1)
index.put(pairing[0], list);
doc.fields(index);
doc.save();
}// for (int i = 0; i < list.size(); i++) {
// System.out.println("" + list.get(i));
// }
} catch (Exception e) {
}
return;
}
}

Getting metadata from SHOUTcast using IcyStreamMeta

I am writing an app for Android that grabs meta data from SHOUTcast mp3 streams. I am using a pretty nifty class I found online that I slightly modified, but I am still having 2 problems.
1) I have to continuously ping the server to update the metadata using a TimerTask. I am not fond of this approach but it was all I could think of.
2) There is a metric tonne of garbage collection while my app is running. Removing the TimerTask got rid of the garbage collection issue so I am not sure if I am just doing it wrong or if this is normal.
Here is the class I am using:
public class IcyStreamMeta {
protected URL streamUrl;
private Map<String, String> metadata;
private boolean isError;
public IcyStreamMeta(URL streamUrl) {
setStreamUrl(streamUrl);
isError = false;
}
/**
* Get artist using stream's title
*
* #return String
* #throws IOException
*/
public String getArtist() throws IOException {
Map<String, String> data = getMetadata();
if (!data.containsKey("StreamTitle"))
return "";
try {
String streamTitle = data.get("StreamTitle");
String title = streamTitle.substring(0, streamTitle.indexOf("-"));
return title.trim();
}catch (StringIndexOutOfBoundsException e) {
return "";
}
}
/**
* Get title using stream's title
*
* #return String
* #throws IOException
*/
public String getTitle() throws IOException {
Map<String, String> data = getMetadata();
if (!data.containsKey("StreamTitle"))
return "";
try {
String streamTitle = data.get("StreamTitle");
String artist = streamTitle.substring(streamTitle.indexOf("-")+1);
return artist.trim();
} catch (StringIndexOutOfBoundsException e) {
return "";
}
}
public Map<String, String> getMetadata() throws IOException {
if (metadata == null) {
refreshMeta();
}
return metadata;
}
public void refreshMeta() throws IOException {
retreiveMetadata();
}
private void retreiveMetadata() throws IOException {
URLConnection con = streamUrl.openConnection();
con.setRequestProperty("Icy-MetaData", "1");
con.setRequestProperty("Connection", "close");
//con.setRequestProperty("Accept", null);
con.connect();
int metaDataOffset = 0;
Map<String, List<String>> headers = con.getHeaderFields();
InputStream stream = con.getInputStream();
if (headers.containsKey("icy-metaint")) {
// Headers are sent via HTTP
metaDataOffset = Integer.parseInt(headers.get("icy-metaint").get(0));
} else {
// Headers are sent within a stream
StringBuilder strHeaders = new StringBuilder();
char c;
while ((c = (char)stream.read()) != -1) {
strHeaders.append(c);
if (strHeaders.length() > 5 && (strHeaders.substring((strHeaders.length() - 4), strHeaders.length()).equals("\r\n\r\n"))) {
// end of headers
break;
}
}
// Match headers to get metadata offset within a stream
Pattern p = Pattern.compile("\\r\\n(icy-metaint):\\s*(.*)\\r\\n");
Matcher m = p.matcher(strHeaders.toString());
if (m.find()) {
metaDataOffset = Integer.parseInt(m.group(2));
}
}
// In case no data was sent
if (metaDataOffset == 0) {
isError = true;
return;
}
// Read metadata
int b;
int count = 0;
int metaDataLength = 4080; // 4080 is the max length
boolean inData = false;
StringBuilder metaData = new StringBuilder();
// Stream position should be either at the beginning or right after headers
while ((b = stream.read()) != -1) {
count++;
// Length of the metadata
if (count == metaDataOffset + 1) {
metaDataLength = b * 16;
}
if (count > metaDataOffset + 1 && count < (metaDataOffset + metaDataLength)) {
inData = true;
} else {
inData = false;
}
if (inData) {
if (b != 0) {
metaData.append((char)b);
}
}
if (count > (metaDataOffset + metaDataLength)) {
break;
}
}
// Set the data
metadata = IcyStreamMeta.parseMetadata(metaData.toString());
// Close
stream.close();
}
public boolean isError() {
return isError;
}
public URL getStreamUrl() {
return streamUrl;
}
public void setStreamUrl(URL streamUrl) {
this.metadata = null;
this.streamUrl = streamUrl;
this.isError = false;
}
public static Map<String, String> parseMetadata(String metaString) {
Map<String, String> metadata = new HashMap<String, String>();
String[] metaParts = metaString.split(";");
Pattern p = Pattern.compile("^([a-zA-Z]+)=\\'([^\\']*)\\'$");
Matcher m;
for (int i = 0; i < metaParts.length; i++) {
m = p.matcher(metaParts[i]);
if (m.find()) {
metadata.put((String)m.group(1), (String)m.group(2));
}
}
return metadata;
}
}
And here is my timer:
private void getMeta() {
timer.schedule(new TimerTask() {
public void run() {
try {
icy = new IcyStreamMeta(new URL(stationUrl));
runOnUiThread(new Runnable() {
public void run() {
try {
artist.setText(icy.getArtist());
title.setText(icy.getTitle());
} catch (IOException e) {
e.printStackTrace();
} catch (StringIndexOutOfBoundsException e) {
e.printStackTrace();
}
}
});
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
},0,5000);
}
Much appreciation for any assistance!
I've replaced the IcyStreamMeta class in my program and am getting the meta data from the 7.html file that is a part of the SHOUTcast spec. Far less data usage and all that so I feel it is a better option.
I am still using the TimerTask, which is acceptable. There is practically no GC any more and I am happy with using 7.html and a little regex. :)

Function based expression message renderer

I'm doing a simple MessageRenderer.
It's specification:
Render message based on an Context (it's a map that's contains all key/value pair parameters)
Supports simple render such as: Your username is << username >>. Assume username in the context is barcelona and the result will be Your username is Barcelona.
Supported function-like object. Example: Current time is << now() >>, now(): is an object that will returns a string of current date time. And result will be: Current time is 2011-05-30
Each parameter of function can also be templated: Current time is << now( << date_format >> ) >> . This template returns a string of current date time with format is the value of key 'date_format' retrieved from the Context. Assume date_format in Context is dd/MM/yyyy and the result will be: Current time is 30/05/2011
Each parameter of function can also be templated with a different method call: Time is << now_locale ( << getLocale() >> ). Assume that getLocale() is an function object that will be return a locale is en_US and the result will be: Time is 2011/05/30 11:20:34 PM
Template can be nested. Example: Your user name is << << username >> >>. It means, Key username has value param1, Key param1 has value is barcelona so the final result will be: Your user name is Barcelona.
My classes and interfaces:
RenderContext.java
public interface RenderContext {
public String getParameter(String key);
}
MessageRenderer.java
public interface MessageRenderer {
public String render(String s, RenderContext... context);
}
MethodExpressionEvaluator.java
// Using this class to implements the method evaluation, such as now(), now_locale()
public interface MethodExpressionEvaluator {
public String evaluate(String[] methodParams, RenderContext... context);
}
AbstractMessageRenderer.java
public abstract class AbstractMessageRenderer implements MessageRenderer {
public static final String DEFAULT_NULL = "###";
public static final String PLACEHOLDER_START_TOKEN = "<<";
public static final String PLACEHOLDER_END_TOKEN = ">>";
protected int lenPlaceholderStartToken = 0;
protected int lenPlaceholderEndToken = 0;
protected String nullToken;
protected String placeholderStartToken;
protected String placeholderEndToken;
protected boolean escape = true;
public AbstractMessageRenderer() {
placeholderStartToken = PLACEHOLDER_START_TOKEN;
placeholderEndToken = PLACEHOLDER_END_TOKEN;
lenPlaceholderStartToken = placeholderStartToken.length();
lenPlaceholderEndToken = placeholderEndToken.length();
nullToken = DEFAULT_NULL;
}
public String getNullToken() {
return nullToken;
}
public void setNullToken(String defaultNull) {
this.nullToken = defaultNull;
}
public String getPlaceholderStartToken() {
return placeholderStartToken;
}
public void setPlaceholderStartToken(String placeholderStartToken) {
this.placeholderStartToken = placeholderStartToken;
lenPlaceholderStartToken = placeholderStartToken.length();
}
public String getPlaceholderEndToken() {
return placeholderEndToken;
}
public void setPlaceholderEndToken(String placeholderEndToken) {
this.placeholderEndToken = placeholderEndToken;
lenPlaceholderEndToken = placeholderEndToken.length();
}
public boolean isEscape() {
return escape;
}
public boolean getEscape() {
return escape;
}
public void setEscape(boolean escape) {
this.escape = escape;
}
public String getParam(String key, RenderContext... context) {
if(context != null)
{
for(RenderContext param:context)
{
if(param != null)
{
String value = param.getParameter(key);
if(!StringUtil.isEmpty(value))
{
return value;
}
}
}
}
return nullToken;
}
public String render(String s, RenderContext... context) {
// handle trivial cases of empty template or no placeholders
if (s == null)
{
Log4j.app.debug("Message is null in template. Cannot render null message.");
return nullToken;
}
if (context == null)
{
Log4j.app.debug("RenderContext is null. Cannot render message with null RenderContext.");
return nullToken;
}
if (s.indexOf(placeholderStartToken) < 0)
{
return s;
}
String msg = nullToken;
try
{
// private int renderTemplate(Renderable r, String src, StringBuffer dst, String nil, int i, String[] marks, StringBuffer end,boolean escapes)
msg = doRender(s, context);
}
catch (Exception e)
{
Log4j.app.error("Exception in rendering template: " + e.getMessage(), e);
return nullToken;
}
return msg;
}
protected abstract String doRender(String s, RenderContext... context);
}
MethodExpressionRenderer.java
public class MethodExpressionRenderer extends AbstractMessageRenderer {
private boolean inSingleQuote = false;
private boolean inDoubleQuote=false;
private int placeholders;
private Stack<String> methodStack;
private String[] endTokens;
private String marker;
private List<String> methodParams;
private String prefix = "&";
public MethodExpressionRenderer() {
super();
methodStack = new Stack<String>();
marker = ",";
endTokens = new String[] { placeholderEndToken, marker, "(", ")" };
methodParams = new ArrayList<String>();
}
public String getPrefix() {
return prefix;
}
public void setPrefix(String prefix) {
this.prefix = prefix;
}
public String getMarker() {
return marker;
}
public void setMarker(String marker) {
this.marker = marker;
endTokens = new String[] { placeholderEndToken, marker };
}
#Override
public void setPlaceholderEndToken(String placeholderEndToken) {
super.setPlaceholderEndToken(placeholderEndToken);
endTokens = new String[] { placeholderEndToken, marker };
}
protected String doRender(String s, RenderContext... context) {
StringBuffer sb = new StringBuffer();
try
{
renderTemplate(s, sb, nullToken, 0, endTokens, null, context);
}
catch (Exception e)
{
Log4j.app.error("Exception in rendering method expression message emplate: " + e.getMessage(), e);
return nullToken;
}
return sb.toString();
}
private int renderTemplate(String src, StringBuffer dst, String nil, int i, String[] marks, StringBuffer end, RenderContext... context) {
int len = src.length();
while (i < len)
{
char c = src.charAt(i);
if (escape)
{
if (c=='\\')
{
i++;
char ch = src.charAt(i);
if(inSingleQuote)
{
if(ch=='\'')
{
inSingleQuote=false;
}
}
else if(inDoubleQuote)
{
if(ch=='"')
{
inDoubleQuote=false;
}
}
else
{
if(ch=='\'')
{
inSingleQuote=true;
}
else if(ch=='"')
{
inDoubleQuote=true;
}
}
dst.append(ch);
i++;
continue;
}
}
if(inSingleQuote)
{
if(c=='\'')
{
inSingleQuote=false;
}
}
else if(inDoubleQuote)
{
if(c=='"')
{
inDoubleQuote=false;
}
}
else
{
if(c=='\'')
{
inSingleQuote=true;
}
else if(c=='"')
{
inDoubleQuote=true;
}
}
// check for end marker
if (marks != null && !inSingleQuote && !inDoubleQuote)
{
for (int m = 0; m < marks.length; m++)
{
// If one of markers found
if (src.regionMatches(i, marks[m], 0, marks[m].length()))
{
// return marker if required
if (end != null)
{
end.append(marks[m]);
}
return i+marks[m].length();
}
}
}
// check for start of placeholder
if (src.regionMatches(i, placeholderStartToken, i, lenPlaceholderStartToken))
{
synchronized(this)
{
++placeholders;
}
i = renderPlaceholder(src, dst, nil, i, new ArrayList<String>(), context);
continue;
}
// just add plain character
if(c != '\'' && c!= '"')
{
dst.append(c);
}
i++;
}
return i;
}
private int renderPlaceholder(String src, StringBuffer dst, String nil, int i, List<String> params, RenderContext... context){
StringBuffer token = new StringBuffer(); // placeholder token
StringBuffer end = new StringBuffer(); // placeholder end marker
String value;
i = renderTemplate(src, token, nil, i+lenPlaceholderStartToken, endTokens, end);
String sToken = token.toString().trim();
String sEnd = end.toString().trim();
boolean isFunction = sEnd.equals("(");
// This is method name
if(isFunction && placeholders > methodStack.size())
{ // Method
synchronized(this)
{
methodStack.push(sToken); // put method into stack
}
}
else if(!isFunction && (methodStack.size()==0) && sEnd.equals(placeholderEndToken)) // Single template param such as <<param>>
{
value = getParam(sToken, context);
if(value != null)
{
if(value.trim().startsWith(placeholderStartToken))
{
value = render(src, context);
}
dst.append(value);
return i;
}
}
// TODO: Process method parameters to invoke
//.... ?????????
// Found end method token ')'
// Pop method out of stack to invoke
if ( (methodStack.size() >0) && (sEnd.length() == 0 || sEnd.equals(")")))
{
String method = null;
synchronized(this)
{
// Pop method out of stack to invoke
method = methodStack.pop();
--placeholders;
dst.append(invokeMethodEvaluator(method, methodParams.toArray(new String[0]), context));
methodParams.clear();
}
}
return i;
}
// Currently this method just implement to test so it just printout the method name
// and its parameter
// We can register MethodExpressionEvaluator to process
protected String invokeMethodEvaluator(String method, String[] params, RenderContext... context){
StringBuffer result = new StringBuffer();
result.append("[ ")
.append(method)
.append(" ( ");
if(params != null)
{
for(int i=0; i<params.length; i++)
{
result.append(params[i]);
if(i != params.length-1)
{
result.append(" , ");
}
}
}
result.append(" ) ")
.append(" ] ");
return result.toString();
}
}
We can easily register more method to the renderer to invoke. Each method will be an object and can be reused. But I'm in trouble how to resolve the nested method parameter. Can anyone give me an advice how we can process nested template of method parameter to invoke??? The line has TODO. Will my code in on the right way???
When you evaluate something like << count( << getTransId() >> ) >> you can either:
perform direct-evaluation as you parse, and push each function onto a stack, so that once you've evaluated getTransId() you pop the stack and use the return value (from the stack) as an argument for count(), or
you can build a parse tree to represent all the function calls that will be made, and then evaluate your parse tree after building it. (Building a tree probably doesn't buy you anything; since you're writing a template engine there is probably no high-level tree operation 'optimizations' that you could perform.)
An excellent little book I really enjoyed was Language Implementation Patterns by Parr. He walks through building simple to complex languages, and covers decisions like this in some depth. (Yes, he uses the ANTLR parser generator throughout, but your code looks like you're familiar enough with hand-generated parsers that different tools won't be a distraction for you.)
I found the bug and fixed it.
This is my new source:
// AbstractMethodExpressionRenderer.java
public class AbstractMethodExpressionRenderer extends AbstractMessageRenderer {
private boolean inSingleQuote = false;
private boolean inDoubleQuote=false;
private Stack<MethodExpressionDescriptor> functionStack;
private String[] endTokens;
private String marker;
private String prefix = "~";
public AbstractMethodExpressionRenderer() {
super();
functionStack = new Stack<MethodExpressionDescriptor>();
marker = ",";
endTokens = new String[] { placeholderEndToken, "(", ")", };
}
private class MethodExpressionDescriptor {
public List<String> params;
public String function;
public MethodExpressionDescriptor() {
params = new ArrayList<String>();
}
public MethodExpressionDescriptor(String name) {
this();
this.function = name;
}
}
public String getPrefix() {
return prefix;
}
public void setPrefix(String prefix) {
this.prefix = prefix;
}
public String getMarker() {
return marker;
}
public void setMarker(String marker) {
this.marker = marker;
endTokens = new String[] { placeholderEndToken, marker };
}
#Override
public void setPlaceholderEndToken(String placeholderEndToken) {
super.setPlaceholderEndToken(placeholderEndToken);
endTokens = new String[] { placeholderEndToken, marker };
}
protected String doRender(String s, RenderContext... context) {
StringBuffer sb = new StringBuffer();
try
{
renderTemplate(s, sb, nullToken, 0, endTokens, null, context);
}
catch (Exception e)
{
Log4j.app.error("Exception in rendering method expression message emplate: " + e.getMessage(), e);
return nullToken;
}
return sb.toString();
}
private int renderTemplate(String src, StringBuffer dst, String nil, int i, String[] marks, StringBuffer end, RenderContext... context) {
int len = src.length();
while (i < len)
{
char c = src.charAt(i);
if (escape)
{
if (c=='\\')
{
i++;
char ch = src.charAt(i);
if(inSingleQuote)
{
if(ch=='\'')
{
inSingleQuote=false;
}
}
else if(inDoubleQuote)
{
if(ch=='"')
{
inDoubleQuote=false;
}
}
else
{
if(ch=='\'')
{
inSingleQuote=true;
}
else if(ch=='"')
{
inDoubleQuote=true;
}
}
dst.append(ch);
i++;
continue;
}
}
if(inSingleQuote)
{
if(c=='\'')
{
inSingleQuote=false;
}
}
else if(inDoubleQuote)
{
if(c=='"')
{
inDoubleQuote=false;
}
}
else
{
if(c=='\'')
{
inSingleQuote=true;
}
else if(c=='"')
{
inDoubleQuote=true;
}
}
// check for end marker
if (marks != null && !inSingleQuote && !inDoubleQuote)
{
for (int m = 0; m < marks.length; m++)
{
// If one of markers found
if (src.regionMatches(i, marks[m], 0, marks[m].length()))
{
// return marker if required
if (end != null)
{
end.append(marks[m]);
}
return i+marks[m].length();
}
}
}
// check for start of placeholder
if (src.regionMatches(i, placeholderStartToken, 0, lenPlaceholderStartToken))
{
i = renderPlaceholder(src, dst, nil, i, new ArrayList<String>(), context);
continue;
}
// just add plain character
if(c != '\'' && c!= '"')
{
dst.append(c);
}
i++;
}
return i;
}
/**
* Render a placeholder as follows:
*
* <<key>>: Simple render, key value map
* <<function(<<param1>>, <<param2>>)>> : Function object render
*
* #param src
* #param dst
* #param nil
* #param i
* #param params
* #param context
* #return
*/
private int renderPlaceholder(String src, StringBuffer dst, String nil, int i, List<String> params, RenderContext... context){
StringBuffer token = new StringBuffer(); // placeholder token
StringBuffer end = new StringBuffer(); // placeholder end marker
String value = null;
// Simple key
i = renderTemplate(src, token, nil, i+lenPlaceholderStartToken, endTokens, end, context);
String sToken = token.toString().trim();
String sEnd = end.toString().trim();
// This is method name
if(sEnd.equals("("))
{ // Method
functionStack.add(new MethodExpressionDescriptor(sToken));
}
else // Try to resolve value
{
if(sToken.startsWith(placeholderStartToken))
{
value = render(sToken, context);
}
else if(sToken.startsWith(prefix))
{
if(functionStack.size() > 0)
{
functionStack.peek().params.add(sToken.substring(1));
}
return i;
}
else
{
value = getParam(sToken, context);
}
}
if (sEnd.length() == 0 || sEnd.equals(placeholderEndToken))
{
// No method found but found the end of placeholder token
if(functionStack.size() == 0)
{
if(value != null)
{
dst.append(value);
}
else
{
dst.append(nil);
}
}
else
{
functionStack.peek().params.add(value);
}
}
else
{
if(value != null)
{
value = value.trim();
}
if(end.substring(0, 1).equals("(") ||
end.substring(0, 1).equals(marker))
{
// right hand side is remainder of placeholder
StringBuffer tmp = new StringBuffer();
end = new StringBuffer();
i = renderTemplate(src, tmp, nil, i, endTokens, end, context);
}
if(end.substring(0, 1).equals(")"))
{
if ( functionStack.size() > 0 )
{
// Pop method out of stack to invoke
MethodExpressionDescriptor descriptor = functionStack.pop();
if(functionStack.size() > 0 )
{
functionStack.peek().params.add(invokeMethodEvaluator(descriptor.function, descriptor.params.toArray(new String[0]), context));
}
else
{
dst.append(invokeMethodEvaluator(descriptor.function, descriptor.params.toArray(new String[0]), context));
}
end = new StringBuffer();
StringBuffer tmp = new StringBuffer();
i = renderTemplate(src, tmp, nil, i, endTokens, end, context);
}
}
}
return i;
}
protected String invokeMethodEvaluator(String method, String[] params, RenderContext... context){
StringBuffer result = new StringBuffer();
result.append("[ ")
.append(method)
.append(" ( ");
if(params != null)
{
for(int i=0; i<params.length; i++)
{
result.append(params[i]);
if(i != params.length-1)
{
result.append(" , ");
}
}
}
result.append(" ) ")
.append(" ] ");
return result.toString();
}
}

Categories

Resources