Fetch HTML part in java - java

I have some troubles understanding how can I download only part of html page. I tryed traditional way through URL::openStream method and BufferedReader but I'm not quite sure if this way pushes me to download whole page.
The problem is: I have quite big HTML page and I need to parse 2 numbers from it, which updating at least once a second. Way above helps to detect changes once in 2-3 seconds and I wonder if there is way to make it faster. So I thought if fetching page partly can help me.

Wrote helper to read url content. Parser for elements in another class.
public class HTMLReaderHelper {
private final URL currentURL;
HTMLReaderHelper(URL url){
currentURL = url;
}
public CharIterator charIterator(){
CharIterator iterator;
try {
iterator = new CharIterator();
} catch(IOException ex){
return null;
}
return iterator;
}
public StringIterator stringIterator(){
return new StringIterator();
}
class CharIterator implements java.util.Iterator<Character>{
private InputStream urlStream;
private boolean isValid;
private Queue<Character> buffer;
private CharIterator() throws IOException {
urlStream = currentURL.openStream();
isValid = true;
buffer = new ArrayDeque<>();
}
#Override
public boolean hasNext() {
char c;
try {
c = (char)urlStream.read();
buffer.add(c);
} catch (IOException ex) {
markInvalid();
return false;
}
return c != (char) -1;
}
#Override
public Character next() {
if(!isValid){
return null;
}
char c;
try {
if(buffer.size() > 0){
return buffer.remove();
}
c = (char)urlStream.read();
} catch (IOException ex) {
markInvalid();
return null;
}
return (c != (char)-1) ? c : null;
}
private void markInvalid(){
isValid = false;
}
}
class StringIterator implements java.util.Iterator<String>{
private CharIterator charPointer;
private Queue<String> buffer;
private boolean isValid;
private StringIterator(){
charPointer = charIterator();
isValid = true;
buffer = new ArrayDeque<>();
}
#Override
public boolean hasNext() {
String value = next();
try {
buffer.add(value);
} catch (NullPointerException ex){
markInvalid();
return false;
}
return isValid;
}
#Override
public String next() {
if(buffer.size() > 0){
return buffer.remove();
}
if(!isValid){
return null;
}
StringBuilder sb = new StringBuilder();
Character currentChar = charPointer.next();
if(currentChar == null){
return null;
}
while (currentChar.equals('\n') || currentChar.equals('\r')){
currentChar = charPointer.next();
if(currentChar == null){
return null;
}
}
while (currentChar != Character.valueOf('\n') && currentChar != Character.valueOf('\r')){
sb.append(currentChar);
currentChar = charPointer.next();
}
return sb.toString();
}
private void markInvalid(){
isValid = false;
}
}
}

I think you should see how the data is fetched (SSE or WebSocket) and just try to subscribe to that service. If that is impossible try more efficient XML parser. I recommend https://vtd-xml.sourceforge.io/ it can be ~10x faster then DOM parser that comes with JDK.
Also be careful with the BufferedReader.readLine() as there is a hidden cost of allocation (this is pretty advanced stuff as you have to think about CPU memory bandwidth, L1 cache misses etc..) for the strings that you don't really need.
Example using the library I mentioned:
byte[] pageInBytes = readAllBytesFromTheURL();
VTDGen vg = new VTDGen();
vg.setDoc(pageInBytes);
vg.parse(false);
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
//Jump to the section that we want to process
ap.selectXPath("/html/body/div");
String fileId = vn.toString(vu.getElementFragment());

Related

How to convert an Iterator to a Spliterator

I have 4 large files (around 1.5 gb each) and I want to process these files, read each line of the file and convert it to a customer object. I have the following implementation.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import java.util.zip.GZIPInputStream;
import static java.nio.charset.StandardCharsets.UTF_8;
public class CustomerDataAccess {
public static void main(String[] args) throws IOException {
CustomerFileItem john = new CustomerFileItem("CustFile1", "http://w.customer1.com");
CustomerFileItem sarah = new CustomerFileItem("CustFile2", "http://w.customer2.com");
CustomerFileItem charles = new CustomerFileItem("CustFile3", "http://w.customer3.com");
List<CustomerFileItem> customers = Arrays.asList(john, sarah, charles);
Iterator<CustomerFileLineItem> custList = new CustIterator(customers);
}
public static class CustIterator implements Iterator<CustomerFileLineItem> {
private static final int HEADER_LINES = 9; // 8 + 1 blank line
BufferedReader bufferedReader;
private int index = 0;
private final List<CustomerFileItem> custFileItems = new ArrayList<>();
public CustIterator(final List<CustomerFileItem> custFileItems) throws IOException {
this.custFileItems.addAll(custFileItems);
processNext();
}
private void processNext() throws IOException {
if (bufferedReader != null) {
bufferedReader.close();
}
if (index < custFileItems.size()) { // only update if there's another file
CustomerFileItem custFileItem = custFileItems.get(index);
GZIPInputStream gis = new GZIPInputStream(new URL(custFileItem.url).openStream());
// default buffer size is 8 KB
bufferedReader = new BufferedReader(new InputStreamReader(gis, UTF_8));
// read the first few lines
for (int i = 0; i < HEADER_LINES; i++) {
bufferedReader.readLine();
}
}
index++;
}
#Override
public boolean hasNext() {
try {
boolean currentReaderStatus = bufferedReader.ready();
if (currentReaderStatus) {
return true;
} else if (index < custFileItems.size()) {
// at end of current file, try to get the next one
processNext();
return hasNext();
} else { // no more files left
return false;
}
} catch (IOException e) {
try {
bufferedReader.close();
} catch (IOException e1) {
throw new UncheckedIOException(e1);
}
throw new UncheckedIOException(e);
}
}
#Override
public CustomerFileLineItem next() {
try {
String line = bufferedReader.readLine();
if (line != null) {
return new CustomerFileLineItem(line);
} else {
return null;
}
} catch (IllegalArgumentException exception) {
return null;
} catch (IOException e) {
try {
bufferedReader.close();
} catch (IOException e1) {
throw new UncheckedIOException(e1);
}
throw new UncheckedIOException(e);
}
}
#Override
public void remove() {
throw new UnsupportedOperationException();
}
#Override
public void forEachRemaining(final Consumer<? super CustomerFileLineItem> action) {
throw new UnsupportedOperationException();
}
}
public static class CustomerFileLineItem {
private static final int NUMBER_OF_FIELDS = 4;
final String id;
final String productNumber;
final String usageType;
final String operation;
public CustomerFileLineItem(final String line) {
String[] strings = line.split(",");
if (strings.length != NUMBER_OF_FIELDS) {
throw new IllegalArgumentException(String.format("Malformed customer file line: %s", line));
}
this.id = strings[0];
this.productNumber = strings[1];
this.usageType = strings[3];
this.operation = strings[4];
}
}
static class CustomerFileItem {
private String fileName;
private String url;
public CustomerFileItem(String fileName, String url) {
this.fileName = fileName;
this.url = url;
}
}
}
In one of use case I want use streams in the output list(custList). But I know I can't use streams with Iterator. How I can convert it to Spliterator? Or how can I implement the same that I implement with Iterator in Spliterator?
TL;DR You don’t need to implement an Iterator or Spliterator, you can simply use a Stream in the first place:
private static final int HEADER_LINES = 9; // 8 + 1 blank line
Stream<CustomerFileLineItem> stream = customers.stream()
.flatMap(custFileItem -> {
try {
GZIPInputStream gis
= new GZIPInputStream(new URL(custFileItem.url).openStream());
BufferedReader br = new BufferedReader(new InputStreamReader(gis, UTF_8));
// read the first few lines
for (int i = 0; i < HEADER_LINES; i++) br.readLine();
return br.lines().onClose(() -> {
try { br.close(); }
catch(IOException ex) { throw new UncheckedIOException(ex); }
});
} catch(IOException ex) {
throw new UncheckedIOException(ex);
}
})
.map(CustomerFileLineItem::new);
But for completeness, addressing the question literally:
First of all, you should not add a method definition like
#Override
public void forEachRemaining(final Consumer<? super CustomerFileLineItem> action) {
throw new UnsupportedOperationException();
}
This method will surely backfire when you use the Stream API, as that’s where most non-short-circuiting operations will end up.
There is not even a reason to add it. When you don’t declare the method, you’ll get a reasonable default method from the Iterator interface.
When you fixed this issue, you can easily convert the Iterator to a Spliterator using Spliterators.pliteratorUnknownSize(Iterator, int).
But there is no reason to do so. Your code becomes simpler when implementing Spliterator in the first place:
public static class CustIterator
extends Spliterators.AbstractSpliterator<CustomerFileLineItem> {
private static final int HEADER_LINES = 9; // 8 + 1 blank line
BufferedReader bufferedReader;
private final ArrayDeque<CustomerFileItem> custFileItems;
public CustIterator(final List<CustomerFileItem> custFileItems) throws IOException {
super(Long.MAX_VALUE, ORDERED|NONNULL);
this.custFileItems = new ArrayDeque<>(custFileItems);
processNext();
}
#Override
public boolean tryAdvance(Consumer<? super CustomerFileLineItem> action) {
if(bufferedReader == null) return false;
try {
String line = bufferedReader.readLine();
while(line == null) {
processNext();
if(bufferedReader == null) return false;
line = bufferedReader.readLine();
}
action.accept(new CustomerFileLineItem(line));
return true;
}
catch(IOException ex) {
if(bufferedReader != null) try {
bufferedReader.close();
bufferedReader = null;
}
catch(IOException ex2) {
ex.addSuppressed(ex2);
}
throw new UncheckedIOException(ex);
}
}
private void processNext() throws IOException {
if (bufferedReader != null) {
bufferedReader.close();
bufferedReader = null;
}
if (!custFileItems.isEmpty()) { // only update if there's another file
CustomerFileItem custFileItem = custFileItems.remove();
GZIPInputStream gis
= new GZIPInputStream(new URL(custFileItem.url).openStream());
// default buffer size is 8 KB
bufferedReader = new BufferedReader(new InputStreamReader(gis, UTF_8));
// read the first few lines
for (int i = 0; i < HEADER_LINES; i++) {
bufferedReader.readLine();
}
}
}
}
But, as said at the beginning, you don’t even need to implement a Spliterator here.
Every Iterable<T> object has the following methods:
Iterator<T> iterator() returning Iterator<T>
default Spliterator<T> spliterator() (default method) returning Spliterator<T>
Therefore, you want to create Iterable<T> back from Iterator<T> which requires to override the only one non-default and abstract method:
Iterable<CustomerFileLineItem> iterable = new Iterable<CustomerFileLineItem>() {
#Override
public Iterator<CustomerFileLineItem> iterator() {
return custList;
}
};
This can be shortened into a lambda expression resulting in:
Iterable<CustomerFileLineItem> iterable = () -> custList;
Spliterator<CustomerFileLineItem> spliterator = iterable.spliterator();
... so the Stream is easily to be created:
Stream<CustomerFileLineItem> stream = StreamSupport.stream(spliterator, false);

Add words to languagetool suggesting list

I use LanguageTool for some spellchecking and spell correction functionality in my application.
The LanguageTool documentation describes how to exclude words from spell checking (with call the addIgnoreTokens(...) method of the spell checking rule you're using).
How do you add some words (e.g., from a specific dictionary) to spell checking? That is, can LanguageTool fix words with misspellings and suggest words from my specific dictionary?
Unfortunately, the API doesn't support this I think. Without the API, you can add words to spelling.txt to get them accepted and used as suggestions. With the API, you might need to extend MorfologikSpellerRule and change this place of the code. (Disclosure: I'm the maintainer of LanguageTool)
I have similar requirement, which is load some custom words into dictionary as "suggest words", not just "ignored words". And finally I extend MorfologikSpellerRule to do this:
Create class MorfologikSpellerRuleEx extends from MorfologikSpellerRule, override the method "match()", and write my own "initSpeller()" for creating spellers.
And then for the language tool, create this custom speller rule to replace existing one.
Code:
Language lang = new AmericanEnglish();
JLanguageTool langTool = new JLanguageTool(lang);
langTool.disableRule("MORFOLOGIK_RULE_EN_US");
try {
MorfologikSpellerRuleEx spellingRule = new MorfologikSpellerRuleEx(JLanguageTool.getMessageBundle(), lang);
spellingRule.setSpellingFilePath(spellingFilePath);
//spellingFilePath is the file has my own words + words from /hunspell/spelling_en-US.txt
langTool.addRule(spellingRule);
} catch (IOException e) {
e.printStackTrace();
}
The code of my custom MorfologikSpellerRuleEx:
public class MorfologikSpellerRuleEx extends MorfologikSpellerRule {
private String spellingFilePath = null;
private boolean ignoreTaggedWords = false;
public MorfologikSpellerRuleEx(ResourceBundle messages, Language language) throws IOException {
super(messages, language);
}
#Override
public String getFileName() {
return "/en/hunspell/en_US.dict";
}
#Override
public String getId() {
return "MORFOLOGIK_SPELLING_RULE_EX";
}
#Override
public void setIgnoreTaggedWords() {
ignoreTaggedWords = true;
}
public String getSpellingFilePath() {
return spellingFilePath;
}
public void setSpellingFilePath(String spellingFilePath) {
this.spellingFilePath = spellingFilePath;
}
private void initSpellerEx(String binaryDict) throws IOException {
String plainTextDict = null;
if (JLanguageTool.getDataBroker().resourceExists(getSpellingFileName())) {
plainTextDict = getSpellingFileName();
}
if (plainTextDict != null) {
BufferedReader br = null;
if (this.spellingFilePath != null) {
try {
br = new BufferedReader(new FileReader(this.spellingFilePath));
}
catch (Exception e) {
br = null;
}
}
if (br != null) {
speller1 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 1);
speller2 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 2);
speller3 = new MorfologikMultiSpeller(binaryDict, br, plainTextDict, 3);
br.close();
}
else {
speller1 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 1);
speller2 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 2);
speller3 = new MorfologikMultiSpeller(binaryDict, plainTextDict, 3);
}
setConvertsCase(speller1.convertsCase());
} else {
throw new RuntimeException("Could not find ignore spell file in path: " + getSpellingFileName());
}
}
private boolean canBeIgnored(AnalyzedTokenReadings[] tokens, int idx, AnalyzedTokenReadings token)
throws IOException {
return token.isSentenceStart() || token.isImmunized() || token.isIgnoredBySpeller() || isUrl(token.getToken())
|| isEMail(token.getToken()) || (ignoreTaggedWords && token.isTagged()) || ignoreToken(tokens, idx);
}
#Override
public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
// lazy init
if (speller1 == null) {
String binaryDict = null;
if (JLanguageTool.getDataBroker().resourceExists(getFileName())) {
binaryDict = getFileName();
}
if (binaryDict != null) {
initSpellerEx(binaryDict); //here's the change
} else {
// should not happen, as we only configure this rule (or rather its subclasses)
// when we have the resources:
return toRuleMatchArray(ruleMatches);
}
}
int idx = -1;
for (AnalyzedTokenReadings token : tokens) {
idx++;
if (canBeIgnored(tokens, idx, token)) {
continue;
}
// if we use token.getToken() we'll get ignored characters inside and speller
// will choke
String word = token.getAnalyzedToken(0).getToken();
if (tokenizingPattern() == null) {
ruleMatches.addAll(getRuleMatches(word, token.getStartPos(), sentence));
} else {
int index = 0;
Matcher m = tokenizingPattern().matcher(word);
while (m.find()) {
String match = word.subSequence(index, m.start()).toString();
ruleMatches.addAll(getRuleMatches(match, token.getStartPos() + index, sentence));
index = m.end();
}
if (index == 0) { // tokenizing char not found
ruleMatches.addAll(getRuleMatches(word, token.getStartPos(), sentence));
} else {
ruleMatches.addAll(getRuleMatches(word.subSequence(index, word.length()).toString(),
token.getStartPos() + index, sentence));
}
}
}
return toRuleMatchArray(ruleMatches);
}
}

Logical problems causing multiple instances of a note to be created

Here is the (without most of the functions) definition of a class called note.
public class Note
{
private String text;
String fileName = "";
NoteManager noteManager = null;
List<String> hyperlinks = new ArrayList<String>();
public static final int BUFFER_SIZE = 512;
public Note(NoteManager noteManager) {
this.noteManager = noteManager;
this.text = "";
}
public Note(NoteManager noteManager, String content) {
this(noteManager);
if (content == null)
setText("");
else
setText(content);
}
public Note(NoteManager noteManager, CharSequence content) {
this(noteManager, content.toString());
}
....some functions....
public static Note newFromFile(NoteManager noteManager, Context context,
String filename) throws IOException
{
FileInputStream inputFileStream = context.openFileInput(filename);
StringBuilder stringBuilder = new StringBuilder();
byte[] buffer = new byte[BUFFER_SIZE];
int len;
while ((len = inputFileStream.read(buffer)) > 0)
{
String line = new String(buffer, 0, len);
stringBuilder.append(line);
buffer = new byte[Note.BUFFER_SIZE];
}
Note n = new Note(noteManager, stringBuilder.toString().trim());
n.fileName = filename;
inputFileStream.close();
return n;
}
.... some functions attributed to this class
}
These notes are managed by a class called NoteManager.java, which I have abbreviated below:
public class NoteManager
{
Context context=null;
ArrayList<Note> notes = new ArrayList<Note>();
..... some functions...
public void addNote(Note note)
{
if (note == null || note.noteManager != this || notes.contains(note)) return;
note.noteManager = this;
notes.add(note);
try
{
note.saveToFile(context);
} catch (IOException e)
{
e.printStackTrace();
}
}
....some functions....
public void loadNotes()
{
String[] files = context.fileList();
notes.clear();
for (String fname:files)
{
try
{
notes.add(Note.newFromFile(this, context, fname));
} catch (IOException e)
{
e.printStackTrace();
}
}
}
}
public void addNote(Note note)
{
if (note == null || notes.contains(note)) return;
note.noteManager = this;
notes.add(note);
try
{
note.saveToFile(context);
} catch (IOException e)
{
e.printStackTrace();
}
}
I am trying to work out why this notepad app creates random new notes when the app is fully shutdown and then reopened, however I just cannot see what the problem is. I have cut out all the functions which didnt seem to relate to the problem, so the logical error must be here somewhere.
How does one go about finding what I am guessing to be some kind of circular reference or lack of checks?
Android typically uses UTF-8, with multi-byte characters. Creating a new String on a arbitrary byte sub-array can have issues at begin and end, if you deviate from ASCII.
public static Note newFromFile(NoteManager noteManager, Context context,
String filename) throws IOException
{
Path path = Paths.get(filename);
byte[] bytes = Files.readAllBytes(path);
String content = new String(bytes, "UTF-8");
Note n = new Note(noteManager, content.trim());
n.fileName = filename;
noteManager.add(n); // One registration?
return n;
}
The problem of having multiple instances of a node might need the addition within newFromFile or maybe an extra check:
public void addNote(Note note)
{
if (note == null || note.noteManager != this || notes.contains(note)) {
return;
}
note.noteManager = this;
notes.add(note);
And finally a Note must be well defined.
public class Note extends Comparable<Note> {
private NoteManager noteManager:
private final String content; // Immutable.
public NoteManager(NoteManager noteManager, String content) {
this.noteManager = noteManager;
this.content = content;
}
... compare on the immutable content
... hashCode on content
Not being to be able to change the content, and comparing on the string content, means notes cannot be doubled, change in the set, mixing up the set ordering.

Getting metadata from SHOUTcast using IcyStreamMeta

I am writing an app for Android that grabs meta data from SHOUTcast mp3 streams. I am using a pretty nifty class I found online that I slightly modified, but I am still having 2 problems.
1) I have to continuously ping the server to update the metadata using a TimerTask. I am not fond of this approach but it was all I could think of.
2) There is a metric tonne of garbage collection while my app is running. Removing the TimerTask got rid of the garbage collection issue so I am not sure if I am just doing it wrong or if this is normal.
Here is the class I am using:
public class IcyStreamMeta {
protected URL streamUrl;
private Map<String, String> metadata;
private boolean isError;
public IcyStreamMeta(URL streamUrl) {
setStreamUrl(streamUrl);
isError = false;
}
/**
* Get artist using stream's title
*
* #return String
* #throws IOException
*/
public String getArtist() throws IOException {
Map<String, String> data = getMetadata();
if (!data.containsKey("StreamTitle"))
return "";
try {
String streamTitle = data.get("StreamTitle");
String title = streamTitle.substring(0, streamTitle.indexOf("-"));
return title.trim();
}catch (StringIndexOutOfBoundsException e) {
return "";
}
}
/**
* Get title using stream's title
*
* #return String
* #throws IOException
*/
public String getTitle() throws IOException {
Map<String, String> data = getMetadata();
if (!data.containsKey("StreamTitle"))
return "";
try {
String streamTitle = data.get("StreamTitle");
String artist = streamTitle.substring(streamTitle.indexOf("-")+1);
return artist.trim();
} catch (StringIndexOutOfBoundsException e) {
return "";
}
}
public Map<String, String> getMetadata() throws IOException {
if (metadata == null) {
refreshMeta();
}
return metadata;
}
public void refreshMeta() throws IOException {
retreiveMetadata();
}
private void retreiveMetadata() throws IOException {
URLConnection con = streamUrl.openConnection();
con.setRequestProperty("Icy-MetaData", "1");
con.setRequestProperty("Connection", "close");
//con.setRequestProperty("Accept", null);
con.connect();
int metaDataOffset = 0;
Map<String, List<String>> headers = con.getHeaderFields();
InputStream stream = con.getInputStream();
if (headers.containsKey("icy-metaint")) {
// Headers are sent via HTTP
metaDataOffset = Integer.parseInt(headers.get("icy-metaint").get(0));
} else {
// Headers are sent within a stream
StringBuilder strHeaders = new StringBuilder();
char c;
while ((c = (char)stream.read()) != -1) {
strHeaders.append(c);
if (strHeaders.length() > 5 && (strHeaders.substring((strHeaders.length() - 4), strHeaders.length()).equals("\r\n\r\n"))) {
// end of headers
break;
}
}
// Match headers to get metadata offset within a stream
Pattern p = Pattern.compile("\\r\\n(icy-metaint):\\s*(.*)\\r\\n");
Matcher m = p.matcher(strHeaders.toString());
if (m.find()) {
metaDataOffset = Integer.parseInt(m.group(2));
}
}
// In case no data was sent
if (metaDataOffset == 0) {
isError = true;
return;
}
// Read metadata
int b;
int count = 0;
int metaDataLength = 4080; // 4080 is the max length
boolean inData = false;
StringBuilder metaData = new StringBuilder();
// Stream position should be either at the beginning or right after headers
while ((b = stream.read()) != -1) {
count++;
// Length of the metadata
if (count == metaDataOffset + 1) {
metaDataLength = b * 16;
}
if (count > metaDataOffset + 1 && count < (metaDataOffset + metaDataLength)) {
inData = true;
} else {
inData = false;
}
if (inData) {
if (b != 0) {
metaData.append((char)b);
}
}
if (count > (metaDataOffset + metaDataLength)) {
break;
}
}
// Set the data
metadata = IcyStreamMeta.parseMetadata(metaData.toString());
// Close
stream.close();
}
public boolean isError() {
return isError;
}
public URL getStreamUrl() {
return streamUrl;
}
public void setStreamUrl(URL streamUrl) {
this.metadata = null;
this.streamUrl = streamUrl;
this.isError = false;
}
public static Map<String, String> parseMetadata(String metaString) {
Map<String, String> metadata = new HashMap<String, String>();
String[] metaParts = metaString.split(";");
Pattern p = Pattern.compile("^([a-zA-Z]+)=\\'([^\\']*)\\'$");
Matcher m;
for (int i = 0; i < metaParts.length; i++) {
m = p.matcher(metaParts[i]);
if (m.find()) {
metadata.put((String)m.group(1), (String)m.group(2));
}
}
return metadata;
}
}
And here is my timer:
private void getMeta() {
timer.schedule(new TimerTask() {
public void run() {
try {
icy = new IcyStreamMeta(new URL(stationUrl));
runOnUiThread(new Runnable() {
public void run() {
try {
artist.setText(icy.getArtist());
title.setText(icy.getTitle());
} catch (IOException e) {
e.printStackTrace();
} catch (StringIndexOutOfBoundsException e) {
e.printStackTrace();
}
}
});
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
},0,5000);
}
Much appreciation for any assistance!
I've replaced the IcyStreamMeta class in my program and am getting the meta data from the 7.html file that is a part of the SHOUTcast spec. Far less data usage and all that so I feel it is a better option.
I am still using the TimerTask, which is acceptable. There is practically no GC any more and I am happy with using 7.html and a little regex. :)

How to determine File Format? DOS/Unix/MAC

I have written the following method to detemine whether file in question is formatted with DOS/ MAC, or UNIX line endings.
I see at least 1 obvious issue:
1. i am hoping that i will get the EOL on the first run, say within first 1000 bytes. This may or may not happen.
I ask you to review this and suggest improvements which will lead to hardening the code and making it more generic.
THANK YOU.
new FileFormat().discover(fileName, 0, 1000);
and then
public void discover(String fileName, int offset, int depth) throws IOException {
BufferedInputStream in = new BufferedInputStream(new FileInputStream(fileName));
FileReader a = new FileReader(new File(fileName));
byte[] bytes = new byte[(int) depth];
in.read(bytes, offset, depth);
a.close();
in.close();
int thisByte;
int nextByte;
boolean isDos = false;
boolean isUnix = false;
boolean isMac = false;
for (int i = 0; i < (bytes.length - 1); i++) {
thisByte = bytes[i];
nextByte = bytes[i + 1];
if (thisByte == 10 && nextByte != 13) {
isDos = true;
break;
} else if (thisByte == 13) {
isUnix = true;
break;
} else if (thisByte == 10) {
isMac = true;
break;
}
}
if (!(isDos || isMac || isUnix)) {
discover(fileName, offset + depth, depth + 1000);
} else {
// do something clever
}
}
Your method seems unnecessarily complicated. Why not:
public class FileFormat {
public enum FileType { WINDOWS, UNIX, MAC, UNKNOWN }
private static final char CR = '\r';
private static final char LF = '\n';
public static FileType discover(String fileName) throws IOException {
Reader reader = new BufferedReader(new FileReader(fileName));
FileType result = discover(reader);
reader.close();
return result;
}
private static FileType discover(Reader reader) throws IOException {
int c;
while ((c = reader.read()) != -1) {
switch(c) {
case LF: return FileType.UNIX;
case CR: {
if (reader.read() == LF) return FileType.WINDOWS;
return FileType.MAC;
}
default: continue;
}
}
return FileType.UNKNOWN;
}
}
Which puts this in a static method that you can then call and use as:
switch(FileFormat.discover(fileName) {
case WINDOWS: ...
case MAC: ...
case UNKNOWN: ...
}
Here's a rough implementation that guesses the line ending type based on a simple majority and falls back on unknown in a worst-case scenario:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.EnumMap;
import java.util.Map;
import java.util.Scanner;
class LineEndings
{
private enum ExitState
{
SUCCESS, FAILURE;
}
public enum LineEndingType
{
DOS("Windows"), MAC("Mac OS Classic"), UNIX("Unix/Linux/Mac OS X"), UNKNOWN("Unknown");
private final String name;
private LineEndingType(String name)
{
this.name = name;
}
public String toString()
{
if (null == this.name) {
return super.toString();
}
else {
return this.name;
}
}
}
public static void main(String[] arguments)
{
ExitState exitState = ExitState.SUCCESS;
File inputFile = getInputFile();
if (null == inputFile) {
exitState = ExitState.FAILURE;
System.out.println("Error: No input file specified.");
}
else {
System.out.println("Determining line endings for: " + inputFile.getName());
try {
LineEndingType lineEndingType = getLineEndingType(inputFile);
System.out.println("Determined line endings: " + lineEndingType);
}
catch (java.io.IOException exception) {
exitState = ExitState.FAILURE;
System.out.println("Error: " + exception.getMessage());
}
}
switch (exitState) {
case SUCCESS:
System.exit(0);
break;
case FAILURE:
System.exit(1);
break;
}
}
private static File getInputFile()
{
File inputFile = null;
Scanner stdinScanner = new Scanner(System.in);
while (true) {
System.out.println("Enter the input file name:");
System.out.print(">> ");
if (stdinScanner.hasNext()) {
String inputFileName = stdinScanner.next();
inputFile = new File(inputFileName);
if (!inputFile.exists()) {
System.out.println("File not found.\n");
}
else if (!inputFile.canRead()) {
System.out.println("Could not read file.\n");
}
else {
break;
}
}
else {
inputFile = null;
break;
}
}
System.out.println();
return inputFile;
}
private static LineEndingType getLineEndingType(File inputFile)
throws java.io.IOException, java.io.FileNotFoundException
{
EnumMap<LineEndingType, Integer> lineEndingTypeCount =
new EnumMap<LineEndingType, Integer>(LineEndingType.class);
BufferedReader inputReader = new BufferedReader(new FileReader(inputFile));
LineEndingType currentLineEndingType = null;
while (inputReader.ready()) {
int token = inputReader.read();
if ('\n' == token) {
currentLineEndingType = LineEndingType.UNIX;
}
else if ('\r' == token) {
if (inputReader.ready()) {
int nextToken = inputReader.read();
if ('\n' == nextToken) {
currentLineEndingType = LineEndingType.DOS;
}
else {
currentLineEndingType = LineEndingType.MAC;
}
}
}
if (null != currentLineEndingType) {
incrementLineEndingType(lineEndingTypeCount, currentLineEndingType);
currentLineEndingType = null;
}
}
return getMostFrequentLineEndingType(lineEndingTypeCount);
}
private static void incrementLineEndingType(Map<LineEndingType, Integer> lineEndingTypeCount, LineEndingType targetLineEndingType)
{
Integer targetLineEndingCount = lineEndingTypeCount.get(targetLineEndingType);
if (null == targetLineEndingCount) {
targetLineEndingCount = 0;
}
else {
targetLineEndingCount++;
}
lineEndingTypeCount.put(targetLineEndingType, targetLineEndingCount);
}
private static LineEndingType getMostFrequentLineEndingType(Map<LineEndingType, Integer> lineEndingTypeCount)
{
Integer maximumEntryCount = Integer.MIN_VALUE;
Map.Entry<LineEndingType, Integer> mostFrequentEntry = null;
for (Map.Entry<LineEndingType, Integer> entry : lineEndingTypeCount.entrySet()) {
int entryCount = entry.getValue();
if (entryCount > maximumEntryCount) {
mostFrequentEntry = entry;
maximumEntryCount = entryCount;
}
}
if (null != mostFrequentEntry) {
return mostFrequentEntry.getKey();
}
else {
return LineEndingType.UNKNOWN;
}
}
}
There is a whole lot wrong with this. You need to understand the FileInputStream class better. Note that read is not guaranteed to read all the bytes you requested. offset is the offset into the array, not the file. And so on.

Categories

Resources