Counting the frequency of n-grams in a sample text file - java

So, I'm implementing a Markov random text generator in Java, and I've gotten as far as plucking out the n-grams in the text file, but now I'm struggling to write a class that gives the number of occurrences of the n-grams in the text (and eventually the probability).
This is the code I have so far. It's a little messy but this is a rough draft.
//here's the main file, where I parse the text and create a new n-gram object with the given text
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
public class Markov {
public static String readCorpusToString(File fileName) {
String corpus = " ";
try {
corpus = new String(Files.readAllBytes(Paths.get(String.valueOf(fileName))));
}
catch (IOException e) {
e.printStackTrace();
}
return corpus;
}
public static void main(String[] args) {
File text = new File(args[0]);
String corpus = readCorpusToString(text);
//System.out.println(corpus);
Ngram test = new Ngram(3, corpus);
for ( int i = 0; i <= corpus.length(); i++) {
System.out.println(test.next());
}
}
}
and here's the class for my n-gram object
import java.util.Iterator;
public class Ngram implements Iterator<String> {
String[] words;
int pos = 0, n;
public Ngram(int n, String str) {
this.n = n;
words = str.split(" ");
}
public boolean hasNext() {
return pos < words.length - n + 1;
}
public String next() {
StringBuilder sb = new StringBuilder();
for (int i = pos; i < pos + n; i++) {
sb.append((i > pos ? " " : "") + words[i]);
}
pos++;
return sb.toString();
}
public void remove() {
throw new UnsupportedOperationException();
}
}

Related

How can you rearrange characters to make a word existing in the dictionary?

I've been trying to tweak little bits of code here and there to make my output correct. I am trying to have my code be able to rearrange the letters in a word to make other words that exist in words.txt, from https://github.com/dwyl/english-words. Any help would be appreciated. Thanks.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Scanner;
public class WordsInWords {
public static void main(String[] args) throws IOException {
String file = "/Users/laptop/Desktop/Test.txt";
BufferedReader r = new BufferedReader(new FileReader(file));
StringBuilder b = new StringBuilder();
String c = r.readLine();
while (c != null) {
b.append(c);
b.append(" ");
c = r.readLine();
}
Scanner s = new Scanner(System.in);
String in = s.nextLine();
char[] input = new char[in.length()];
for (int i = 0; i < input.length; i++) {
input[i] = in.charAt(i);
}
char[] temp = null;
for (int i = 0; i < b.length(); i++) {
if (i < b.length() - 1 && b.charAt(i) == ' ' && b.charAt(i + 1) != ' ') {
boolean found = false;
int counter = 0;
while (!found) {
counter++;
if (b.charAt(i + counter) == ' ') {
found = true;
temp = new char[counter - 1];
for (int j = i + 1; j < i + counter; j++) {
temp[j] = b.charAt(j);
}
}
}
}
}
if (Arrays.asList(input).contains(temp)) {
System.out.println(temp);
}
}
}
Here is my tweaked code:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Scanner;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class WordsInWords {
public static void main(String[] args) throws IOException {
String file = "/Users/laptop/Desktop/words.txt";
BufferedReader r = new BufferedReader(new FileReader(file));
String[] words;
String c = r.readLine();
int a=0;
while (c != null) {
c = r.readLine();
a++;
}
words=new String[a];
a=0;
r = new BufferedReader(new FileReader(file));
String temp=r.readLine();
while (temp != null) {
words[a]=r.readLine();
temp=words[a];
a++;
}
for (int i = 0; i < words.length; i++) {
System.out.println(words[i]);
}
Scanner s = new Scanner(System.in);
String input = s.nextLine();
List<String> found = findRearranged(input, words);
System.out.println("For '" + input + "' found: " + Arrays.toString(found.toArray()));
}
public static List<String> findRearranged(String input, String[] words) {
List<String> found = new ArrayList<>();
for (String w : words) {
if (hasSameLetters(w, input)) {
found.add(w);
}
}
return found;
}
public static boolean hasSameLetters(String a, String b) {
if (a.length() != b.length()) {
return false;
}
while (a.length() > 0) {
for (char c : b.toCharArray()) {
int index = a.indexOf(c);
if (index >= 0) {
a = a.replace(String.valueOf(c), "");
} else {
return false;
}
}
}
return true;
}
}
Is rearranging the characters necessary?
Rearranging the input and searching through dictionary to find equal word could take lot's of computing time, single few letter word can have many permutations.
For me, it looks like you want to find the words in dictionary for input word containing same letters (in other words, if the input word would be rearranged, it would give you the existing word in dictionary). Probably checking if both words are having exactly same letters, regardless their position in both strings should satisfy the requirement.
Here's the sample for that approach:
public class Sample {
public static void main(String[] args) {
//the words in dictionary
String[] words = {"words", "sword", "nord", "chord", "score", "cores", "mors", "xyz", "scores", "ordsw"};
String[] input = {"sword", "score", "tores", "nores"};
for (String i : input) {
List<String> found = findRearranged(i, words);
System.out.println("For '" + i + "' found: " + Arrays.toString(found.toArray()));
}
}
public static List<String> findRearranged(String input, String[] words) {
List<String> found = new ArrayList<>();
for (String w : words) {
if (hasSameLetters(w, input)) {
found.add(w);
}
}
return found;
}
public static boolean hasSameLetters(String a, String b) {
if (a.length() != b.length()) {
return false;
}
while (a.length() > 0) {
for (char c : b.toCharArray()) {
int index = a.indexOf(c);
if (index >= 0) {
a = a.replace(String.valueOf(c), "");
} else {
return false;
}
}
}
return true;
}
}
And this outputs in:
For 'sword' found: [words, sword, ordsw]
For 'score' found: [score, cores]
For 'tores' found: []
For 'nores' found: []
Edit:
I see the assumption is that every word is in its own line.
I saw that you already came up with counting the words in the file, but still in that case it's better to use Collections which are having dynamic size.
Here's fixed sample:
public class WordsInWords {
public static void main(String[] args) throws IOException {
String file = "C:\\Users\\masta\\IdeaProjects\\podstawka-spring-java\\words.txt";
BufferedReader r = new BufferedReader(new FileReader(file));
List<String> words = new ArrayList<>();
String c = r.readLine();
while (c != null) {
words.add(c);
c = r.readLine();
}
for (int i = 0; i < words.size(); i++) {
System.out.println("Words: " + words.get(i));
}
Scanner s = new Scanner(System.in);
String input = s.nextLine();
List<String> found = findRearranged(input, words);
System.out.println("For '" + input + "' found: " + Arrays.toString(found.toArray()));
}
public static List<String> findRearranged(String input, List<String> words) {
List<String> found = new ArrayList<>();
for (String w : words) {
if (hasSameLetters(w, input)) {
found.add(w);
}
}
return found;
}
public static boolean hasSameLetters(String a, String b) {
if (a.length() != b.length()) {
return false;
}
while (a.length() > 0) {
for (char c : b.toCharArray()) {
int index = a.indexOf(c);
if (index >= 0) {
a = a.replace(String.valueOf(c), "");
} else {
return false;
}
}
}
return true;
}
}

print specific characters in an array of strings

I need to print specific indexes of strings in an array, for example
String[] words = {car, bike, truck};
print words[0][0] and the result would be c and print words[0][1] = a.
Also i have to read the array from a text file. What i have so far will print the first word of the array.
import java.util.Scanner;
import java.io.File;
import java.io.IOException;
import java.io.FileNotFoundException;
public class DemoReadingFiles
{
public static void main (String[] args)
{
String[] words = readArray("words.txt");
System.out.println(words[0]);//i can get it to print specific elements
}
public static String[] readArray(String file)
{
int ctr = 0;
try
{
Scanner s1 = new Scanner(new File(file));
while (s1.hasNextLine())
{
ctr = ctr + 1;
s1.next();
}
String[] words = new String[ctr];
Scanner s2 = new Scanner(new File(file));
for (int i = 0; i < ctr; i = i + 1)
{
words[i] = s2.next();
}
return words;
}
catch (FileNotFoundException e)
{
}
return null;
}
}
public static void main(String[] args) {
String[] words = {"cars", "bike", "truck"};
System.out.println("Specific character print:" + words[0].charAt(0));
System.out.println("Multi character selection printed as follows:" + words[0].substring(1, words[0].length() - 1));
}
Output:
Specific character print:c
Multi character selection printed as follows:ar

Palindromes and Arrays, an effort in frustration

I am currently attempting to make a Palindrome Checker using a series of Clean and dirty arrays and while I've gotten it to compile but I'm having issues getting the code to detect the actual palindromes
Below is the code in question, any and all help would be most appreciated.
import java.io.*;
import java.util.Scanner;
public class palindrome
{
public static void main (String[] args) throws IOException
{
File inputFile = new File ("Palindromes.txt");
Scanner inputScan = new Scanner (inputFile);
String [] dirty = new String [20];
int i = 0;
while (inputScan.hasNext())
{
dirty[i] = inputScan.nextLine();
System.out.println(dirty[i]);
i++;
}
inputScan.close();
String [] clean = new String [i];
String reverse ="";
for (int x = 0; x < clean.length; x++)
{
clean[x] = "";
for (int z = length; z < dirty[x].length(); z--)
{
char test = dirty[x].charAt(z);
if (Character.isLetterOrDigit(test))
{
test = Character.toLowerCase(test);
clean [x] += test;
if (clean[x].equals(clean[z]))
{
System.out.println(clean[z] +" is a palindrome");
} else
{
System.out.println(clean[z] +" is NOT a palindrome");
}
}
}
}
for (int j = 0; j < clean.length; j++)
{
System.out.println(clean[j]);
}
}
Are you trying to do something like this?
import java.io.File;
import java.io.IOException;
import java.util.Scanner;
public class Palindrome {
public static boolean isPalindrome(String s) {
s = s.toLowerCase().replaceAll("[\\s\\p{Punct}]", "");
int len = s.length(), mid = len / 2;
for (int x = 0; x < mid; x++)
if (s.charAt(x) != s.charAt(len - x - 1))
return false;
return true;
}
public static void main(String[] args) throws IOException {
File inputFile = new File("Palindromes.txt");
try (Scanner scanner = new Scanner(inputFile)) {
while (scanner.hasNextLine()) {
String s = scanner.nextLine();
if (isPalindrome(s)) {
System.out.println(s + " is a palindrome");
} else {
System.out.println(s + " is NOT a palindrome");
}
}
}
}
}

Java - Hashmapping a text file

and please excuse my ignorance, I have been puzzling on this for a while.
I have a huge .txt file containing mostly letters. I need to create HashMaps to store word length, Word characters and Word count...i have to print out the longest word occurred more than three times and show how many times it occurred.
Im thinking something like that
private void readWords(){
BufferedReader in = new BufferedReader(new FileReader("text.txt"));
Map<Integer, Map<String, Integer>>
}
The problem is that i dont quite know how to save to HashMap, can anybody help please?
Thank you!
import java.io.File;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
public class HashMapExample {
static String fileName = "text.txt";
private static Scanner input;
public static void main(String[] args) throws FileNotFoundException {
input = new Scanner(new File(fileName));
Map<String, Integer> map = new HashMap<String, Integer>();
while (input.hasNext()) {
String word = input.next();
if (map.containsKey(word)) {
int temp = map.get(word) + 1;
map.put(word, temp);
} else {
map.put(word, 1);
}
}
System.out.println("printing longest word(s) with word count < 3");
System.out.println("");
// iterate through the key set and display word, word length and values
System.out.printf("%-25s\t%-25s\t%s\n", "Word", "Word Length", "Count");
String longest = getLongest(map);
int valueOfLongest = 0;
if (!longest.equals("")) {
valueOfLongest = longest.length();
System.out.printf("%-25s\t%-25s\t%s\n", longest, longest.length(), map.get(longest));
map.remove(longest);
}
boolean isAllRemoved = false;
while (!isAllRemoved) {
isAllRemoved = false;
longest = getLongest(map);
if (!longest.equals("") && longest.length() == valueOfLongest){
System.out.printf("%-25s\t%-25s\t%s\n", longest, longest.length(), map.get(longest));
map.remove(longest);
} else
isAllRemoved = true;
}
System.out.println("");
System.out.println("printing next longest word(s) with word count > = 3");
System.out.println("");
// iterate through the key set and display word, word length and values
System.out.printf("%-25s\t%-25s\t%s\n", "Word", "Word Length", "Count");
String nextLongest = getNextLongest(map, valueOfLongest);
int valueOfNextLongest = 0;
if (!longest.equals("")) {
valueOfNextLongest = nextLongest.length();
System.out.printf("%-25s\t%-25s\t%s\n", nextLongest, nextLongest.length(), map.get(nextLongest));
map.remove(nextLongest);
}
boolean isNextLongest = false;
while (!isNextLongest) {
isNextLongest = true;
nextLongest = getNextLongest(map, valueOfLongest);
if (!(nextLongest.equals("")) && nextLongest.length() == valueOfNextLongest) {
System.out.printf("%-25s\t%-25s\t%s\n", nextLongest, nextLongest.length(), map.get(nextLongest));
map.remove(nextLongest);
isNextLongest = false;
}
}
}
public static String getLongest(Map<String, Integer> map) {
String longest = "";
for (Map.Entry<String, Integer> entry : map.entrySet()) {
String key = (String) entry.getKey();
if (longest.length() < key.length() && map.get(key) < 3) {
longest = key;
}
}
return longest;
}
public static String getNextLongest(Map<String, Integer> map,
int valueOfLongest) {
String nextLongest = "";
for (Map.Entry<String, Integer> entry : map.entrySet()) {
String key = (String) entry.getKey();
if (valueOfLongest > key.length() && nextLongest.length() < key.length() && map.get(key) >= 3) {
nextLongest = key;
}
}
return nextLongest;
}
}
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
public class CountWord {
public static void main(String args[]) throws IOException {
FileReader fr = new FileReader("c:/a.txt");
BufferedReader br = new BufferedReader(fr);
// init the longest size 0
int longestSize = 0;
String s = null;
// may be some word have the same length
Set<String> finalAnswerSet = new HashSet<String>();
Multiset<String> everyWordSet = HashMultiset.create();
while (br != null && (s = br.readLine()) != null) {
// put every word into the everyWordSet
everyWordSet.add(s);
// we care about the word appear 3+ times
if (everyWordSet.count(s) > 3) {
if (s.length() > longestSize) {
//if s'length is the longest,clear the finalAnswerSet and put s into it
longestSize = s.length();
finalAnswerSet.clear();
finalAnswerSet.add(s);
} else if (s.length() == longestSize) {
// finalAnswerSet may contains multi values
finalAnswerSet.add(s);
}
}
}
// and now we have the longestSize,and finalAnswerSet contains the answers,let's check it
System.out.println("The longest size is:" + longestSize);
for (String answer : finalAnswerSet) {
System.out.println("The word is :" + answer);
System.out.println("The word appears time is:" + everyWordSet.count(answer));
}
//don't forget to close the resource
br.close();
fr.close();
}
}

Splitting a sentence into words using for loop or while loop in java

i have to do a program which takes a sentence and reverses it word by word in java. for eg:
India is my country
output:aidnI si ym yrtnuoc
ive figured out all of it but i just cant split a sentence into separate words.im not allowed to use split function but im meant to use either substring or indexof();while loop and for loop are allowed.
this is what ive got so far:
import java.io.*;
public class Rereprogram10
{
public void d()throws IOException
{
BufferedReader br=new BufferedReader(new InputStreamReader(System.in));
String str;
System.out.println("input a string");
str=br.readLine();
String rev="";
int length=str.length();
int counter=length;
for(int i=0;i<length;i++)
{
rev=rev+str.charAt(counter-1);
counter--;
}
System.out.println("the result is: "+rev);
}
}
its wrong though,the output keeps on coming:
yrtnuoc ym si aidnI
i havent learnt arrays yet...
I'm going to assume that advanced datastructures are out, and efficiency is not an issue.
Where you are going wrong is that you are reversing the entire string, you need to only reverse the words. So you really need to check to find where a word ends, then either reverse it then, or be reversing it as you go along.
Here is an example of reversing as you go along.
int length=str.length();
String sentence="";
String word = "";
for(int i=0;i<length;i++) {
if (str.charAt(i) != ' '){
word = str.charAt(i) + word;
} else {
sentence += word +" ";
word = "";
}
}
sentence += word;
System.out.println("the result is: "+sentence);
This passes the test:
package com.sandbox;
import com.google.common.base.Joiner;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class SandboxTest {
#Test
public void testQuestionInput() {
String input = "India is my country";
assertEquals("country my is India", reverseWords(input));
}
private String reverseWords(String input) {
List<String> words = putWordsInList(input);
Collections.reverse(words);
return Joiner.on(" ").join(words);
}
private List<String> putWordsInList(String input) {
List<String> words = new ArrayList<String>();
String word = "";
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (c == ' ') {
words.add(word);
word = "";
} else {
word += c;
}
}
words.add(word);
return words;
}
}
Here is my code without split() for you.
Input:
India is my country
Output:
country my is India
aidnI si ym yrtnuoc
You can choose the output you need.
public class Reverse {
public static class Stack {
private Node[] slot = new Node[1000];
private int pos = 0;
private class Node{
private char[] n = new char[30];
private int pos = 0;
public void push(char c) {
n[pos++] = c;
}
public String toString() {
return new String(n).trim() + " "; // TODO Fix
}
}
public void push(char c) {
if(slot[pos] == null)
slot[pos] = new Node();
if(c != ' ') {
slot[pos].push(c);
} else {
slot[pos++].push(c);
}
}
public String toString() {
StringBuilder sb = new StringBuilder();
for(int i = pos; i >=0; i --)
sb.append(slot[i]);
return sb.toString();
}
private String reverseWord(String word) {
StringBuilder sb = new StringBuilder();
int len = word.length();
for(int i = len - 1; i >= 0; i--)
sb.append(word.charAt(i));
return sb.toString();
}
public String foryou() {
StringBuilder sb = new StringBuilder();
for(int i = 0; i < pos + 1; i ++)
sb.append(this.reverseWord(slot[i].toString()));
return sb.toString();
}
}
/**
* #param args
*/
public static void main(String[] args) {
Stack stack = new Stack();
String sentence = "India is my country";
System.out.println(sentence);
for(int i = 0; i < sentence.length(); i ++) {
stack.push(sentence.charAt(i));
}
System.out.println(stack);
System.out.println(stack.foryou());
}
}
try this
String x="India is my country";
StringBuilder b=new StringBuilder();
int i=0;
do{
i=x.indexOf(" ", 0);
String z;
if(i>0){
z=x.substring(0,i);
}
else{
z=x;
}
x=x.substring(i+1);
StringBuilder v=new StringBuilder(z);
b.append(v.reverse());
if(i!=-1)
b.append(" ");
System.out.println(b.toString());
}
while(i!=-1);

Categories

Resources