Related
I have 2 files as below :
1.txt
first|second|third
fourth|fifth|sixth
2.txt
first1|second1|third1
fourth1|fifth1|sixth1
Now I want to join them both :
first|first1|second1|third1|second|third
fourth|fourth1|fifth1|sixth1|fifth|sixth
Am trying using scanner but not able to join them. Any suggestion.
Scanner scanner = new Scanner(new File(("F:\\1.txt")));
Scanner scanner2 = new Scanner(new File(("F:\\2.txt")));
while(scanner.hasNext()) {
while(scanner2.hasNext()) {
system.out.println(scanner.next() + "|" + scanner2.next() + "|");
}
// output
first|second|third|first1|second1|third1|
fourth|fifth|sixth|fourth1|fifth1|sixth1|
Scanner scanner = new Scanner(new File(("F:\\1.txt")));
Scanner scanner2 = new Scanner(new File(("F:\\2.txt")));
String[] line1, line2, res;
while (scanner.hasNext() && scanner2.hasNext()) {
line1 = scanner.next().split("\\|");
line2 = scanner2.next().split("\\|");
int len = Math.min(line1.length,line2.length);
res= new String[line1.length + line2.length];
for(int index = 0, counter = 0; index < len; index++){
res[counter++] = line1[index];
res[counter++] = line2[index];
}
if(line1.length > line2.length){
for(int jIndex = 2*line2.length, counter = 0;jIndex < (line1.length+line2.length);jIndex++ ){
res[jIndex] = line1[line2.length + (counter++)];
}
}else if(line2.length > line1.length){
for(int jIndex = 2*line1.length, counter = 0;jIndex < (line1.length+line2.length);jIndex++ ){
res[jIndex] = line2[line1.length + (counter++)];
}
}
String result = Arrays.asList(res).toString().replaceAll("(^\\[|\\]$)", "").replace(", ", "|");
System.out.println(result);
}
scanner.close();
scanner2.close();
You can discard the if conditions if both lines contains same number of tokens
This will give output as,
first|first1|second|second1|third|third1
fourth|fourth1|fifth|fifth1|sixth|sixth1
And
String[] line1, line2, res;
while (scanner.hasNext() && scanner2.hasNext()) {
line1 = scanner.next().split("\\|");
line2 = scanner2.next().split("\\|");
res= new String[line1.length + line2.length];
int counter = 0;
res[counter++] = line1[0];
for(int index = 0; index < line2.length; index++){
res[counter++] = line2[index];
}
for(int index = 1; index < line1.length; index++){
res[counter++] = line1[index];
}
String result = Arrays.asList(res).toString().replaceAll("(^\\[|\\]$)", "").replace(", ", "|");
System.out.println(result);
}
scanner.close();
scanner2.close();
will give output as
first|first1|second1|third1|second|third
fourth|fourth1|fifth1|sixth1|fifth|sixth
I have a problem that deals with reading a file and putting them into an array.
Then subtracting the arrays to equal 0
Here is what I got so far:
Scanner infile2 = new Scanner(new File(fname));
int [] nums = new int [n];
for (int i = 0; i<n; i++)
{
nums[i] = infile2.nextInt();
}
for (int j = 0; j<n-1;j++){
for (int k = j+1; k<n;k++){
if (nums[j]+nums[k]==0){
int val = nums[j]+nums[k];
}
}
}
}
}
Any suggestions?
Try doing it this way
Scanner infile2 = new Scanner(new File(fname));
int [] nums = new int [n];
for (int i = 0; i<n; i++){
nums[i] = infile2.nextInt();
}
for (int j = 0; j<n;j++){
for (int k = j+1; k<n;k++){
if (nums[j]+nums[k]==0){
int val = nums[j]+nums[k];
System.out.println("num["+j+"]"+ " + " +"num["+k+"]"+" = " + nums[j] + " + " + nums[k] + " = " + val);
}
}
}
take the nested loop out, then it should solve the repeated count issue
Here is what i tried:
public static void main(String[] args) {
// TODO Auto-generated method stub
try{
String fname = "PathToFile/a.txt";
int n = 0;
Scanner infile = new Scanner(new File(fname));
while (infile.hasNextInt()){
n++;
infile.nextInt();
}
Scanner infile2 = new Scanner(new File(fname));
int [] nums = new int [n];
for (int i = 0; i<n; i++)
{
nums[i] = infile2.nextInt();
}
for (int j = 0; j<n-1;j++){
for (int k = j+1; k<n;k++){
if (nums[j]+nums[k]==0){
int val = nums[j]+nums[k];
System.out.println("num["+j+"]"+ " + " +"num["+k+"]"+" = " + nums[j] + " + " + nums[k] + " = " + val);
}
}
}
}catch(Exception e){
System.out.println(e);
}
}
result i am getting
num[1] + num[4] = -40 + 40 = 0
num[1] + num[7] = -40 + 40 = 0
num[3] + num[6] = -10 + 10 = 0
So I've been working on this password strength checker, and to provide visual feedback of the breakdown of points to the user as the password is being typed in, I use a KeyTyped event and then analyze the string and eventually start giving out points as the minimum length is reached. Here's what the a part of the analysis looks like :
if (in.matches("[a-z]+")){
lowerPenalty = -15;
}
if (in.matches("[0-9]+")){
numPenalty = -15;
}
for(int i=0;i<inLen;i++){
if ((in.charAt(i) + "").matches("[A-Z]")){
upperCounter++;
upperBonus = upperBonus + 4;
}
However, when I run the program, it doesn't consider the last character of the password typed in by the user, and thus the corresponding counter is not incremented. Here's the screenshot:
As you can see in the above screenshot, the numCounter in Number Bonus row is still at '1' instead of '2'. I've tried using KeyPressed event, though the problem still persists.
Please help.
As requested, here's the keyListener code:
input.addKeyListener(new KeyAdapter(){
#Override
public void keyTyped(KeyEvent e1){
if ((int)e1.getKeyChar() == 8){
count = -1;
baseScore = 0;
lenBonus = 0;
upperBonus = 0;
upperCounter = 0;
numBonus = 0;
numCounter = 0;
symBonus = 0;
symCounter = 0;
comBonus = 0;
lowerPenalty = 0;
numPenalty = 0;
comBonus = 0;
totalScore = 0;
input.setText("");
strength_lbl.setText("Enter a random password");
strength_lbl.setBackground(Color.LIGHT_GRAY);
}
count++;
Counter.setText(count+"");
analyzeStr(input.getText());
baseScore_txt.setText(baseScore+"" );
lowerPen_txt.setText(lowerPenalty+"");
numonlyPen_txt.setText(numPenalty+"");
upperBonus_txt.setText(upperBonus+" [" + (upperCounter) + "x4]");
numBonus_txt.setText(numBonus+" [" + numCounter + "x5]");
symBonus_txt.setText(symBonus+" [" + symCounter + "x5]");
comBonus_txt.setText(comBonus+"");
totalScore = baseScore + lenBonus + upperBonus + numBonus + symBonus + comBonus + lowerPenalty + numPenalty;
totalScore_txt.setText(totalScore+"");
if (totalScore>=1 && totalScore<50){
strength_lbl.setText("Weak!");
strength_lbl.setBackground(Color.red);
}
if (totalScore>=50 && totalScore<75){
strength_lbl.setText("Average!");
strength_lbl.setBackground(Color.orange);
}
if (totalScore>=75 && totalScore<100 ){
strength_lbl.setText("Strong!");
strength_lbl.setBackground(Color.cyan);
}
if (totalScore>=100){
strength_lbl.setText("Secure!");
strength_lbl.setBackground(Color.green);
}
}
});
As requested, here's the analyzeString method:
public void analyzeStr(String str){
String in = input.getText();
int inLen = input.getText().length();
if (count == 1){
strength_lbl.setBackground(Color.RED);
strength_lbl.setText("At least 8 characters please!");
}
if (input.getText().length()<8){
lengthBonus_txt.setText("0");
}
else{
lengthBonus_txt.setText(lenBonus +" [" + (count-8) + "x3]");
}
if (count==8){
baseScore = 50;
if (in.matches("[a-z]+")){
lowerPenalty = -15;
}
if (in.matches("[0-9]+")){
numPenalty = -15;
}
for(int i=0;i<inLen;i++){
if ((in.charAt(i) + "").matches("[A-Z]")){
upperCounter++;
upperBonus = upperBonus + 4;
}
if ((in.charAt(i) + "").matches("[0-9]")){
numCounter++;
numBonus = numBonus + 5;
}
if ((in.charAt(i) + "").matches("[!,#,#,$,%,^,&,*,?,_,~]")){
symCounter++;
symBonus = symBonus + 5;
}
}
}
if (count>8){
lenBonus = lenBonus + 3;
lengthBonus_txt.setText(lenBonus+" [" + (inLen-7) + "x3]");
if ((in.charAt(inLen-1) + "").matches("[A-Z]")){
upperCounter++;
upperBonus = upperBonus + 4;
}
if ((in.charAt(inLen-1) + "").matches("[0-9]")){
numCounter++;
numBonus = numBonus + 5;
}
if ((in.charAt(inLen-1) + "").matches("[!,#,#,$,%,^,&,*,?,_,~]")){
symCounter++;
symBonus = symBonus + 5;
}
}
if (count>=8){
if (in.matches("[A-Z][0-9][!,#,#,$,%,^,&,*,?,_,~]")){
comBonus = 25;
}
if (in.matches("[0-9][A-Z][!,#,#,$,%,^,&,*,?,_,~]")){
comBonus = 25;
}
if (in.matches("[!,#,#,$,%,^,&,*,?,_,~][0-9][A-Z]")){
comBonus = 25;
}
if (in.matches("[!,#,#,$,%,^,&,*,?,_,~][A-Z][0-9]")){
comBonus = 25;
}
if (in.matches("[!,#,#,$,%,^,&,*,?,_,~][A-Z][0-9]")){
comBonus = 25;
}
if (in.matches("[A-Z][!,#,#,$,%,^,&,*,?,_,~][0-9]")){
comBonus = 25;
}
if (in.matches("[0-9][!,#,#,$,%,^,&,*,?,_,~][A-Z]")){
comBonus = 25;
}
}
}
How do I create a JSON array like this code (see below), using servlets? Right now I write the code by hand in a String and convert it to a JSON array but I alway get an error:
error:
Error parsing data org.json.JSONException: Expected literal value at character 982 of
{
"question": [
{
"text": "",
"answers": [ { "a1": "" }, { "a2": "" }, { "a3": "" } ],
"correctAnswers": [ { "ra1": "" }, { "ra2": "" }, { "ra3": "" } ],
"explanation": ""
},
.
.
.
]
}
this is the servlet code: some parts are in German, sorry if you have problems understanding
StringBuffer sb = new StringBuffer();
ArrayList<Integer> kapitel = new ArrayList<Integer>();
String text = "false";
// boolean check = false;
for (int i = 0; i < 14; i++) {
if ("1".equals(request.getParameter("k" + (i + 1))))
kapitel.add(i + 1);
}
String arrayFragen[][] = null;
String arrayAntworten[][] = null;
String[] antwortSeatze = null;
String[] richtigeAntwortSeatze = null;
Main main = new Main();
Datenbank db = new Datenbank();
main.frageAntworten(kapitel);
arrayAntworten = main.antworten;
arrayFragen = main.fragen;
int zeahler = 1;
String frageId;
sb.append("{");
sb.append("\n\"frage\": [");
for (int i = 0; i < arrayFragen.length - 1; i++) {
frageId = arrayFragen[i][0];
sb.append("\n{");
sb.append("\n\"text\": \"" + arrayFragen[i][1] + "\", ");
sb.append("\n\"antworten\": [");
antwortSeatze = new String[db.arraySizeAntwortenSeatze(frageId,
arrayAntworten)];
int x = 0;
for (int j = 0; j < arrayAntworten.length; j++) {
if (arrayAntworten[j][0].equals(frageId))
antwortSeatze[x++] = arrayAntworten[j][1];
}
zeahler = 1;
for (int j = 0; j < antwortSeatze.length - 1; j++) {
sb.append("\n{\"a" + zeahler++ + "\": \"" + antwortSeatze[j]
+ "\"}, ");
}
// letze Antwortmöglichkeit
sb.append("\n{\"a" + zeahler++ + "\": \""
+ antwortSeatze[antwortSeatze.length - 1] + "\"} ");
sb.append("\n],");
richtigeAntwortSeatze = new String[db.arraySizeAntwortenSeatze(
frageId, arrayAntworten)];
// Richige Antworten
sb.append("\n\"richtigeantworten\": [");
antwortSeatze = new String[db.arraySizeAntwortenSeatze(frageId,
arrayAntworten)];
int y = 0;
for (int j = 0; j < richtigeAntwortSeatze.length; j++) {
if (arrayAntworten[j][0].equals(frageId))
if (arrayAntworten[j][2].equals("false"))
antwortSeatze[y++] = "null";
if (!(arrayAntworten[j][2].equals("false")))
antwortSeatze[y++] = arrayAntworten[j][1];
}
zeahler = 1;
for (int j = 0; j < antwortSeatze.length - 1; j++) {
sb.append("\n{\"ra" + zeahler++ + "\": \"" + antwortSeatze[j]
+ "\"}, ");
// letze Richtige Antwortmöglichkeit
}
sb.append("\n{\"ra" + zeahler++ + "\": \""
+ antwortSeatze[antwortSeatze.length - 1] + "\"} ");
sb.append("\n],");
richtigeAntwortSeatze = new String[db.arraySizeAntwortenSeatze(
frageId, arrayAntworten)];
sb.append("\n\"erklaerung\": \"" + arrayFragen[i][2] + "\", ");
sb.append("\n},");
}
// Letzte Frage
frageId = arrayFragen[arrayFragen.length - 1][0];
sb.append("\n{");
sb.append("\n\"text\": \"" + arrayFragen[arrayFragen.length - 1][1]
+ "\", ");
sb.append("\n\"antworten\": [");
antwortSeatze = new String[db.arraySizeAntwortenSeatze(frageId,
arrayAntworten)];
int x = 0;
for (int j = 0; j < arrayAntworten.length; j++) {
if (arrayAntworten[j][0].equals(frageId))
antwortSeatze[x++] = arrayAntworten[j][1];
}
zeahler = 1;
for (int j = 0; j < antwortSeatze.length - 1; j++) {
sb.append("\n{\"a" + zeahler++ + "\": \"" + antwortSeatze[j] + "\"},");
}
// letze Antwort möglichkeit
sb.append("\n{\"a" + zeahler++ + "\": \""
+ antwortSeatze[antwortSeatze.length - 1] + "\"} ");
sb.append("\n],");
richtigeAntwortSeatze = new String[db.arraySizeAntwortenSeatze(frageId,
arrayAntworten)];
sb.append("\n\"richtigeantworten\": [");
antwortSeatze = new String[db.arraySizeAntwortenSeatze(frageId,
arrayAntworten)];
int y = 0;
for (int j = 0; j < richtigeAntwortSeatze.length; j++) {
if (arrayAntworten[j][0].equals(frageId))
if (arrayAntworten[j][2].equals("false"))
antwortSeatze[y++] = "null";
if (!(arrayAntworten[j][2].equals("false")))
antwortSeatze[y++] = arrayAntworten[j][1];
}
zeahler = 1;
for (int j = 0; j < antwortSeatze.length - 1; j++) {
sb.append("\n{\"ra" + zeahler++ + "\": \"" + antwortSeatze[j]
+ "\" },");
}
// letze Richtige Antwortmöglichkeit
sb.append("\n{\"ra" + zeahler++ + "\": \""
+ antwortSeatze[antwortSeatze.length - 1] + "\"} ");
sb.append("\n],");
richtigeAntwortSeatze = new String[db.arraySizeAntwortenSeatze(frageId,
arrayAntworten)];
sb.append("\n\"erklaerung\": \"" + arrayFragen[arrayFragen.length - 1][2]
+ "\", ");
sb.append("\n}");
sb.append("\n]");
sb.append("\n}");
text = sb.toString();
response.getWriter().print(text);
System.out.println(text);
the android part:
private class JSONParser extends AsyncTask<URL, String, String> {
#Override
protected String doInBackground(URL... url) {
InputStream is = null;
String json = "";
// VERBINDUNGSAUFBAU
try {
URLConnection connection = url[0].openConnection();
HttpURLConnection http = (HttpURLConnection)connection;
int response = http.getResponseCode();
if (response == HttpURLConnection.HTTP_OK) {
is = http.getInputStream();}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
// CONVERTIERUNG VON INPUTSTREAM ZU STRING
try {
BufferedReader reader = new BufferedReader(
new InputStreamReader(is, "iso-8859-1"), 8);
StringBuilder sb = new StringBuilder();
String line = null;
while ((line = reader.readLine()) != null) {
sb.append(line);
}
is.close();
json = sb.toString();
} catch (Exception e) {
Log.e("Buffer Error", "Error converting result " + e.toString());
}
return json;
// InputStream is = m.getData(url[0]);
// String json = m.InputstreamToString(is);
// return json;
}
protected void onPostExecute(String json) {
TextView fragenNr = (TextView)
findViewById(R.id.FragenText);
fragenNr.setText(json);
JSONObject jObj = null;
try {
jObj = new JSONObject(json);
} catch (JSONException e) {
Log.e("JSON Parser", "Error parsing data " + e.toString());
}
try {
frage = jObj.getJSONArray(TAG_FRAGE);
for (int i = 0; i < frage.length(); i++) {
JSONObject f = frage.getJSONObject(i);
// Storing each json item in variable
String text = f.getString(TAG_TEXT);
String erklaerung = f.getString(TAG_ERKLAERUNG);
HashMap<String, Object> map = new HashMap<String, Object>();
map.put(TAG_TEXT, text);
map.put(TAG_ERKLAERUNG, erklaerung);
ArrayList<String> antwortenArray = new ArrayList<String>();
JSONArray antworten = f.getJSONArray(TAG_ANTWORTEN);
for (int j = 0; j < antworten.length(); j++) {
JSONObject a = antworten.getJSONObject(j);
String tag = TAG_ANTWORTEN_a + (j + 1);
antwortenArray.add(a.getString(tag));
map.put(tag, antwortenArray.get(j));
}
map.put("anzahlAntworten", antworten.length());
ArrayList<String> richtigeantwortenArray = new ArrayList<String>();
JSONArray richtigeantworten = f
.getJSONArray(TAG_RICHTIGEANTWORTEN);
for (int k = 0; k < richtigeantworten.length(); k++) {
JSONObject ra = richtigeantworten.getJSONObject(k);
String tag = TAG_RICHTIGEANTWORTEN_ra + (k + 1);
richtigeantwortenArray.add(ra.getString(tag));
map.put(tag, richtigeantwortenArray.get(k));
}
map.put("anzahlRichtigerAntworten",richtigeantworten.length());
fragenUndAntworten.add(map);
fragenListe.add(text);
}
} catch (JSONException e) {
e.printStackTrace();
}
View weiterButton = findViewById(R.id.weiterButton);
weiterButton.setOnClickListener(Fragen.this);
weiterButton.performClick();
}
}
Don't try to generate JSON by yourself. Generate a data structure, and use a JSON parser/generator to actually transform this data structure into a JSON string. The generator will take care of escaping the characters for you. There are dozens of JSON APIs freely available. See http://json.org/ for a list.
Use json.jar to create json object or array... It has methods for create and handle the json...
I used lingpipe for sentence detection but I don't have any idea if there is a better tool. As far as I have understood, there is no way to compare two sentences and see if they mean the same thing.
Is there anyother good source where I can have a pre-built method for comparing two sentences and see if they are similar?
My requirement is as below:
String sent1 = "Mary and Meera are my classmates.";
String sent2 = "Meera and Mary are my classmates.";
String sent3 = "I am in Meera and Mary's class.";
// several sentences will be formed and basically what I need to do is
// this
boolean bothAreEqual = compareOf(sent1, sent2);
sop(bothAreEqual); // should print true
boolean bothAreEqual = compareOf(sent2, sent3);
sop(bothAreEqual);// should print true
How to test if the meaning of two sentences are the same: this would be a too open-ended question.
However, there are methods for comparing two sentences and see if they are similar. There are many possible definition for similarity that can be tested with pre-built methods.
See for example http://en.wikipedia.org/wiki/Levenshtein_distance
Distance between
'Mary and Meera are my classmates.'
and 'Meera and Mary are my classmates.':
6
Distance between
'Mary and Meera are my classmates.'
and 'Alice and Bobe are not my classmates.':
14
Distance between
'Mary and Meera are my classmates.'
and 'Some totally different sentence.':
29
code:
public class LevenshteinDistance {
private static int minimum(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}
public static int computeDistance(CharSequence str1,
CharSequence str2) {
int[][] distance = new int[str1.length() + 1][str2.length() + 1];
for (int i = 0; i <= str1.length(); i++){
distance[i][0] = i;
}
for (int j = 0; j <= str2.length(); j++){
distance[0][j] = j;
}
for (int i = 1; i <= str1.length(); i++){
for (int j = 1; j <= str2.length(); j++){
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1]
+ ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0 : 1));
}
}
int result = distance[str1.length()][str2.length()];
//log.debug("distance:"+result);
return result;
}
public static void main(String[] args) {
String sent1="Mary and Meera are my classmates.";
String sent2="Meera and Mary are my classmates.";
String sent3="Alice and Bobe are not my classmates.";
String sent4="Some totally different sentence.";
System.out.println("Distance between \n'"+sent1+"' \nand '"+sent2+"': \n"+computeDistance(sent1, sent2));
System.out.println("Distance between \n'"+sent1+"' \nand '"+sent3+"': \n"+computeDistance(sent1, sent3));
System.out.println("Distance between \n'"+sent1+"' \nand '"+sent4+"': \n"+computeDistance(sent1, sent4));
}
}
Here is wat i have come up with. this is just a substitute till i get to the real thing but it might be of some help to people out there..
package com.examples;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.aliasi.sentences.MedlineSentenceModel;
import com.aliasi.sentences.SentenceModel;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Files;
import com.sun.accessibility.internal.resources.accessibility;
public class SentenceWordAnalysisAndLevenshteinDistance {
private static int minimum(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}
public static int computeDistance(CharSequence str1, CharSequence str2) {
int[][] distance = new int[str1.length() + 1][str2.length() + 1];
for (int i = 0; i <= str1.length(); i++) {
distance[i][0] = i;
}
for (int j = 0; j <= str2.length(); j++) {
distance[0][j] = j;
}
for (int i = 1; i <= str1.length(); i++) {
for (int j = 1; j <= str2.length(); j++) {
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1]
+ ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0
: 1));
}
}
int result = distance[str1.length()][str2.length()];
return result;
}
static final TokenizerFactory TOKENIZER_FACTORY = IndoEuropeanTokenizerFactory.INSTANCE;
static final SentenceModel SENTENCE_MODEL = new MedlineSentenceModel();
public static void main(String[] args) {
try {
ArrayList<String> sentences = null;
sentences = new ArrayList<String>();
// Reading from text file
// sentences = readSentencesInFile("D:\\sam.txt");
// Giving sentences
// ArrayList<String> sentences = new ArrayList<String>();
sentences.add("Mary and Meera are my classmates.");
sentences.add("Mary and Meera are my classmates.");
sentences.add("Meera and Mary are my classmates.");
sentences.add("Alice and Bobe are not my classmates.");
sentences.add("Some totally different sentence.");
// Self-implemented
wordAnalyser(sentences);
// Internet referred
// levenshteinDistance(sentences);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
private static ArrayList<String> readSentencesInFile(String path) {
ArrayList<String> sentencesList = new ArrayList<String>();
try {
System.out.println("Reading file from : " + path);
File file = new File(path);
String text = Files.readFromFile(file, "ISO-8859-1");
System.out.println("INPUT TEXT: ");
System.out.println(text);
List<String> tokenList = new ArrayList<String>();
List<String> whiteList = new ArrayList<String>();
Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(
text.toCharArray(), 0, text.length());
tokenizer.tokenize(tokenList, whiteList);
System.out.println(tokenList.size() + " TOKENS");
System.out.println(whiteList.size() + " WHITESPACES");
String[] tokens = new String[tokenList.size()];
String[] whites = new String[whiteList.size()];
tokenList.toArray(tokens);
whiteList.toArray(whites);
int[] sentenceBoundaries = SENTENCE_MODEL.boundaryIndices(tokens,
whites);
System.out.println(sentenceBoundaries.length
+ " SENTENCE END TOKEN OFFSETS");
if (sentenceBoundaries.length < 1) {
System.out.println("No sentence boundaries found.");
return new ArrayList<String>();
}
int sentStartTok = 0;
int sentEndTok = 0;
for (int i = 0; i < sentenceBoundaries.length; ++i) {
sentEndTok = sentenceBoundaries[i];
System.out.println("SENTENCE " + (i + 1) + ": ");
StringBuffer sentenceString = new StringBuffer();
for (int j = sentStartTok; j <= sentEndTok; j++) {
sentenceString.append(tokens[j] + whites[j + 1]);
}
System.out.println(sentenceString.toString());
sentencesList.add(sentenceString.toString());
sentStartTok = sentEndTok + 1;
}
} catch (IOException e) {
// TODO: handle exception
e.printStackTrace();
}
return sentencesList;
}
private static void levenshteinDistance(ArrayList<String> sentences) {
System.out.println("\nLevenshteinDistance");
for (int i = 0; i < sentences.size(); i++) {
System.out.println("Distance between \n'" + sentences.get(0)
+ "' \nand '" + sentences.get(i) + "': \n"
+ computeDistance(sentences.get(0),
sentences.get(i)));
}
}
private static void wordAnalyser(ArrayList<String> sentences) {
System.out.println("No.of Sentences : " + sentences.size());
List<String> stopWordsList = getStopWords();
List<String> tokenList = new ArrayList<String>();
ArrayList<List<String>> filteredSentences = new ArrayList<List<String>>();
for (int i = 0; i < sentences.size(); i++) {
tokenList = new ArrayList<String>();
List<String> whiteList = new ArrayList<String>();
Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(sentences.get(i)
.toCharArray(), 0, sentences.get(i).length());
tokenizer.tokenize(tokenList, whiteList);
System.out.print("Sentence " + (i + 1) + ": " + tokenList.size()
+ " TOKENS, ");
System.out.println(whiteList.size() + " WHITESPACES");
filteredSentences.add(filterStopWords(tokenList, stopWordsList));
}
for (int i = 0; i < sentences.size(); i++) {
System.out.println("\n" + (i + 1) + ". Comparing\n '"
+ sentences.get(0) + "' \nwith\n '" +
sentences.get(i)
+ "' : \n");
System.out.println(filteredSentences.get(0) + "\n and \n"
+ filteredSentences.get(i));
System.out.println("Percentage of similarity: "
+ calculateSimilarity(filteredSentences.get(0),
filteredSentences.get(i))
+ "%");
}
}
private static double calculateSimilarity(List<String> list1,
List<String> list2) {
int length1 = list1.size();
int length2 = list2.size();
int count1 = 0;
int count2 = 0;
double result1 = 0.0;
double result2 = 0.0;
int least, highest;
if (length2 > length1) {
least = length1;
highest = length2;
} else {
least = length2;
highest = length1;
}
// computing result1
for (String string1 : list1) {
if (list2.contains(string1))
count1++;
}
result1 = (count1 * 100) / length1;
// computing result2
for (String string2 : list2) {
if (list1.contains(string2))
count2++;
}
result2 = (count2 * 100) / length2;
double avg = (result1 + result2) / 2;
return avg;
}
private static List<String> getStopWords() {
String stopWordsString = ".,a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your";
List<String> stopWordsList = new ArrayList<String>();
List<String> stopWordTokenList = new ArrayList<String>();
List<String> whiteList = new ArrayList<String>();
Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(
stopWordsString.toCharArray(), 0, stopWordsString.length());
tokenizer.tokenize(stopWordTokenList, whiteList);
for (int i = 0; i < stopWordTokenList.size(); i++) {
// System.out.println((i + 1) + ":" + tokenList.get(i));
if (!stopWordTokenList.get(i).equals(",")) {
stopWordsList.add(stopWordTokenList.get(i));
}
}
System.out.println("No.of stop words: " + stopWordsList.size());
return stopWordsList;
}
private static List<String> filterStopWords(List<String> tokenList,
List<String> stopWordsList) {
List<String> filteredSentenceWords = new ArrayList<String>();
for (String sentenceToken : tokenList) {
if (!stopWordsList.contains(sentenceToken)) {
filteredSentenceWords.add(sentenceToken);
}
}
return filteredSentenceWords;
}
}