Encryption/Decryption in java - java

Hi i found some code after do some Google and i am using this code to Encrypt the string (witch i set as parameter in web-service)
and it's working fine, it's to hard for me to understand this code so put hole class.
public class RSA {
Vector<Object> vectEnc;
Object enc[];
private long P, Q;
private long N, M, E = 11;
private long D;
public RSA() {
P = 6151;
Q = 8807;
N = P * Q;
M = (P - 1) * (Q - 1);
E = 11;
D = 44310191;
vectEnc = new Vector<Object>();
}
public String doEncryption(String message) {
try {
String str = new BASE64Encoder().encode(message.getBytes("UTF-8"));
String encString = "";
for (int i = 0; i < str.length(); i += 3) {
String tempAsci = "1";
String tempStr;
for (int h = 0; h < 3; h++) {
int total = i + h;
if (total < str.length()) {
tempStr = String.valueOf((int) (str.subSequence(total,
total + 1).charAt(0)) - 30);
if (tempStr.length() < 2) {
tempStr = "0" + tempStr;
}
} else {
break;
}
tempAsci = tempAsci + tempStr;
}
vectEnc.add(tempAsci + "1");
}
enc = vectEnc.toArray();
vectEnc.removeAllElements();
for (int i = 0; i < enc.length; i++) {
long base = Long.parseLong(enc[i].toString());
long powMod = powMod(base, E, N);
encString = encString + String.valueOf(powMod) + " ";
}
return encString;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public String doDecryption(String codeMsg) {
String[] decryptArray = codeMsg.split(" ");
String decryptStr = "";
String originalStr = "";
for (int i = 0; i < decryptArray.length; i++) {
long base = Long.parseLong(decryptArray[i]);
long powMod = powMod(base, D, N);
String powModString = String.valueOf(powMod);
decryptStr = decryptStr
+ powModString.subSequence(1, powModString.length() - 1);
}
for (int i = 0; i < decryptStr.length(); i += 2) {
char ch = (char) (Integer.parseInt(decryptStr.subSequence(i, i + 2)
.toString()) + 30);
originalStr = originalStr + ch;
}
BASE64Decoder decoder = new BASE64Decoder();
byte[] decBytes = null;
try {
decBytes = decoder.decodeBuffer(originalStr);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String decodeStr = new String(decBytes);
return decodeStr;
}
public long powMod(long base, long exp, long modula) {
long accum = 1;
int i = 0;
long base2 = base;
while ((exp >> i) > 0) {
if (((exp >> i) & 1) == 1) {
accum = mo((accum * base2), modula);
}
base2 = mo((base2 * base2), modula);
i++;
}
return accum;
}
public long mo(long g, long l) {
return (long) (g - (l * Math.floor(g / l)));
}
}
But the problem is when the String Length is more the 56 it throw the Exception Like
java.lang.NumberFormatException: For input string: "174-17-201"
at java.lang.NumberFormatException.forInputString(Unknown Source)
at java.lang.Long.parseLong(Unknown Source)
at java.lang.Long.parseLong(Unknown Source)
at com.info.test.RSA.doEncryption(RSA.java:49)
at com.info.test.Test.main(Test.java:56)
i even no what is the algorithm is use by this code ,i do some Google and i found simple solution is make a part of string and do Encryption it Like this.
int MAX_LAN = 55;
List<String> splitEqually = splitEqually(string,MAX_LAN);
String encodeString = "";
for (int i = 0; i < splitEqually.size(); i++) {
encodeString +=rsa.doEncryption(splitEqually.get(i));
}
System.out.println(encodeString);
public static List<String> splitEqually(String text, int size) {
List<String> ret = new ArrayList<String>((text.length() + size - 1) / size);
for (int start = 0; start < text.length(); start += size) {
ret.add(text.substring(start, Math.min(text.length(), start + size)));
}
return ret;
}
and it working fine , so is it proper method or not ??

I would strongly suggest using Java's built-in cryptographic libraries for this. Follow this series of articles on how to perform RSA encryption/decryption in Java:
http://www.javamex.com/tutorials/cryptography/rsa_encryption.shtml

Related

why doesn't this java method work? it should eliminate empty spaces in Strings

here's the method :
public static String normalizza(String x) {
for (int i = x.length(); i > 0; i--) {
if (x.substring(i, x.length()).equalsIgnoreCase(" ")) {
x = x.substring(0, i);
}
}
return x;
}
i should read a String from a random access file and eliminate empty spaces to find the object position
these are the attributes of the class :
public class Iscritto {
private int id;
private String nome;
private String cognome;
private String dataNascita;
this is the search method for the attribute "nome" ( the method "normalizza" works with this one ) :
private static void cercaNome() {
Iscritto a = new Iscritto();
try {
File file = new File("C:\\temp\\iscritti.dat");
RandomAccessFile ra = new RandomAccessFile(file, "r");
String nome = JOptionPane.showInputDialog("Inserisci nome da cercare:");
ra.seek(0);
a.leggi(ra);
String nomeControllato = a.getNome();
nomeControllato = normalizza(nomeControllato);
int conta = 0;
if (nomeControllato.equalsIgnoreCase(nome)) {
int b = (int) ra.getFilePointer() - 2;
int position = b / 153;
ra.seek(position * 153);
a.leggi(ra);
System.out.println("iscritto: " + a);
} else {
while (!nomeControllato.equalsIgnoreCase(nome)) {
conta++;
ra.seek(conta * 153);
a.leggi(ra);
nomeControllato = a.getNome();
nomeControllato = normalizza(nomeControllato);
if (nomeControllato.equalsIgnoreCase(nome)) {
int b = (int) ra.getFilePointer() - 2;
int position = b / 153;
ra.seek(position * 153);
a.leggi(ra);
System.out.println("iscritto: " + a);
}
}
}
ra.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
this is the search method for the attribute "data" ( normalizza doesn't work for this one ) :
private static void cercaData() {
Iscritto a = new Iscritto();
try {
File file = new File("C:\\temp\\iscritti.dat");
RandomAccessFile ra = new RandomAccessFile(file, "r");
String data = JOptionPane.showInputDialog("Inserisci data da cercare (example : 12-MAG-2018):");
ra.seek(0);
a.leggi(ra);
String dataControllata = a.getDataNascita();
dataControllata = normalizza(dataControllata);
int conta = 0;
if (dataControllata.equalsIgnoreCase(data)) {
int b = (int) ra.getFilePointer() - 2;
int position = b / 153;
ra.seek(position * 153);
a.leggi(ra);
System.out.println("iscritto: " + a);
} else {
while (!dataControllata.equalsIgnoreCase(data)) {
conta++;
ra.seek(conta * 153);
a.leggi(ra);
dataControllata = a.getDataNascita();
dataControllata = normalizza(dataControllata);
if (dataControllata.equalsIgnoreCase(data)) {
int b = (int) ra.getFilePointer() - 2;
int position = b / 153;
ra.seek(position * 153);
a.leggi(ra);
System.out.println("iscritto: " + a);
}
}
}
ra.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
if i search a "data" and it's not the last of the file it will give me an infinite "null" output
If you want to eliminate empty spaces at the end of your String, simply use the trim() method.
System.out.println(" my String ".trim()); // prints 'my String'
If you need to eliminate all whitespaces in your string, then use replace(String, String)
System.out.println(" my String ".replace(" ", "")); // prints 'myString'
Assuming you have a reason not to use String.trim(), you could do a much simpler implementation:
start i from the end of string, and look backward at each .charAt(i) until != ' '. Then substring just once, cutting after that non-space char.

Retrieving sidHistory from LDAP with Java

I can retrieve objectSID and many other attributes without error, but not sidHistory (I need sidHistory to see which account in domain A corresponds to an account in domain B).
Here's the code that works for most attributes, including objectSID:
void dumpCSV(Attributes attrs, String[] displayList, Logger lg) {
// Assume we're only dealing with single valued attributes (for now)
StringBuilder sb = new StringBuilder();
for (String attName : displayList) {
String name = attName.trim().toLowerCase();
Attribute att = attrs.get(name);
if (sb.length() > 0)
sb.append(",");
if (att != null) {
String v = "?";
try {
if ((name.equals("objectsid")) || (name.equals("sidhistory")))
v = binString(att);
else {
v = (String) att.get();
if (name.equals("pwdlastset") || name.equals("lastlogontimestamp") || name.equals("lastlogon") || name.equals("accountexpires"))
v = TickConverter.tickDate(v);
}
sb.append(Logger.tidyString(v));
} catch (NamingException e) {
System.err.println("NamingException, " + e);
return;
}
}
}
lg.logln(sb.toString());
}
}
static String binString(Attribute att) {
try {
byte bin[] = (byte[]) att.get();
return decodeSID(bin);
} catch (NamingException e) {
System.err.println("NamingException, " + e);
return "?";
}
}
// taken from http://www.adamretter.org.uk/blog/entries/LDAPTest.java, in turn borrowed from Oracle docs
public static String decodeSID(byte[] sid) {
final StringBuilder strSid = new StringBuilder("S-");
// get version
final int revision = sid[0];
strSid.append(Integer.toString(revision));
//next byte is the count of sub-authorities
final int countSubAuths = sid[1] & 0xFF;
//get the authority
long authority = 0;
//String rid = "";
for(int i = 2; i <= 7; i++) {
authority |= ((long)sid[i]) << (8 * (5 - (i - 2)));
}
strSid.append("-");
strSid.append(Long.toHexString(authority));
//iterate all the sub-auths
int offset = 8;
int size = 4; //4 bytes for each sub auth
for(int j = 0; j < countSubAuths; j++) {
long subAuthority = 0;
for(int k = 0; k < size; k++) {
subAuthority |= (long)(sid[offset + k] & 0xFF) << (8 * k);
}
strSid.append("-");
strSid.append(subAuthority);
offset += size;
}
return strSid.toString();
}
If I try to retrieve sidHistory using this, tyhe value I get is "?".
Even if I use a namingEnumeration, as I think I probably should, I get "Exception in thread "AWT-EventQueue-0" java.util.NoSuchElementException: Vector Enumeration", probably because I am trying to save it to the wrong typoe (and I've tried a few different types).
snippet is :
String v;
NamingEnumeration nenum = att.getAll();
while (nenum.hasMore()) {
v = "";
if (name.equals("objectsid")) {
v = binString(att);
nenum.next();
} else if (name.equals("sidhistory")) {
nenum.next();
String[] vv = ((String[]) nenum.next());
v = vv[0];
} else
v = (String) nenum.next();
if (name.equals("pwdlastset") || name.equals("lastlogontimestamp") || name.equals("lastlogon") || name.equals("accountexpires"))
v = TickConverter.tickDate(v);
lg.logln(name + "=" + Logger.tidyString(v));
}
We used some code similar to:
We note we saw it at http://tomcatspnegoad.sourceforge.net/xref/net/sf/michaelo/tomcat/realm/ActiveDirectoryRealm.html#L566
...
Attribute sidHistory = roleAttributes.get("sIDHistory;binary");
List<String> sidHistoryStrings = new LinkedList<String>();
if (sidHistory != null)
{
NamingEnumeration<?> sidHistoryEnum = sidHistory.getAll();
while (sidHistoryEnum.hasMore())
{
byte[] sidHistoryBytes = (byte[]) sidHistoryEnum.next();
sidHistoryStrings.add(new Sid(sidHistoryBytes).toString());
}
...
}
sidHistory is multi-valued and binary (octetString) is what cause most people headaches.
Hope this helps.

Tiny GP output in text file

I've recently stumbled upon Tiny GP (A Genetic Programming program), and I found it pretty useful, so I decided to change all System.out.println() in the program to a write to text file method.
Problem: In the text file, for some reason, only says "PROBLEM SOLVED", instead of printing out generations and other things that it is supposed to (see code).
Tiny GP modified class file:
package main;
/*
* Program: tiny_gp.java
*
* Author: Riccardo Poli (email: rpoli#essex.ac.uk)
*
* Modified by Preston Tang
*/
import java.util.*;
import java.io.*;
import java.text.DecimalFormat;
public class tiny_gp {
String Name;
double[] fitness;
char[][] pop;
static Random rd = new Random();
static final int ADD = 110,
SUB = 111,
MUL = 112,
DIV = 113,
FSET_START = ADD,
FSET_END = DIV;
static double[] x = new double[FSET_START];
static double minrandom, maxrandom;
static char[] program;
static int PC;
static int varnumber, fitnesscases, randomnumber;
static double fbestpop = 0.0, favgpop = 0.0;
static long seed;
static double avg_len;
static final int MAX_LEN = 10000,
POPSIZE = 100000,
DEPTH = 5,
GENERATIONS = 100,
TSIZE = 2;
public static final double PMUT_PER_NODE = 0.05,
CROSSOVER_PROB = 0.9;
public static double[][] targets;
public double run() {
/* Interpreter */
char primitive = program[PC++];
if (primitive < FSET_START) {
return (x[primitive]);
}
switch (primitive) {
case ADD:
return (run() + run());
case SUB:
return (run() - run());
case MUL:
return (run() * run());
case DIV: {
double num = run(), den = run();
if (Math.abs(den) <= 0.001) {
return (num);
} else {
return (num / den);
}
}
}
return (0.0); // should never get here
}
public int traverse(char[] buffer, int buffercount) {
if (buffer[buffercount] < FSET_START) {
return (++buffercount);
}
switch (buffer[buffercount]) {
case ADD:
case SUB:
case MUL:
case DIV:
return (traverse(buffer, traverse(buffer, ++buffercount)));
}
return (0); // should never get here
}
public void setup_fitness(String fname) {
try {
int i, j;
String line;
BufferedReader in
= new BufferedReader(
new FileReader(fname));
line = in.readLine();
StringTokenizer tokens = new StringTokenizer(line);
varnumber = Integer.parseInt(tokens.nextToken().trim());
randomnumber = Integer.parseInt(tokens.nextToken().trim());
minrandom = Double.parseDouble(tokens.nextToken().trim());
maxrandom = Double.parseDouble(tokens.nextToken().trim());
fitnesscases = Integer.parseInt(tokens.nextToken().trim());
targets = new double[fitnesscases][varnumber + 1];
if (varnumber + randomnumber >= FSET_START) {
Write("too many variables and constants");
//System.out.println("too many variables and constants");
}
for (i = 0; i < fitnesscases; i++) {
line = in.readLine();
tokens = new StringTokenizer(line);
for (j = 0; j <= varnumber; j++) {
targets[i][j] = Double.parseDouble(tokens.nextToken().trim());
}
}
in.close();
} catch (FileNotFoundException e) {
Write("ERROR: Please provide a data file");
//System.out.println("ERROR: Please provide a data file");
System.exit(0);
} catch (Exception e) {
Write("ERROR: Incorrect data format");
//System.out.println("ERROR: Incorrect data format");
System.exit(0);
}
}
public double fitness_function(char[] Prog) {
int i = 0, len;
double result, fit = 0.0;
len = traverse(Prog, 0);
for (i = 0; i < fitnesscases; i++) {
for (int j = 0; j < varnumber; j++) {
x[j] = targets[i][j];
}
program = Prog;
PC = 0;
result = run();
fit += Math.abs(result - targets[i][varnumber]);
}
return (-fit);
}
public int grow(char[] buffer, int pos, int max, int depth) {
char prim = (char) rd.nextInt(2);
int one_child;
if (pos >= max) {
return (-1);
}
if (pos == 0) {
prim = 1;
}
if (prim == 0 || depth == 0) {
prim = (char) rd.nextInt(varnumber + randomnumber);
buffer[pos] = prim;
return (pos + 1);
} else {
prim = (char) (rd.nextInt(FSET_END - FSET_START + 1) + FSET_START);
switch (prim) {
case ADD:
case SUB:
case MUL:
case DIV:
buffer[pos] = prim;
one_child = grow(buffer, pos + 1, max, depth - 1);
if (one_child < 0) {
return (-1);
}
return (grow(buffer, one_child, max, depth - 1));
}
}
return (0); // should never get here
}
public int print_indiv(char[] buffer, int buffercounter) {
int a1 = 0, a2;
if (buffer[buffercounter] < FSET_START) {
if (buffer[buffercounter] < varnumber) {
Write("X" + (buffer[buffercounter] + 1) + " ");
//System.out.print("X" + (buffer[buffercounter] + 1) + " ");
} else {
WriteDouble(x[buffer[buffercounter]]);
//System.out.print(x[buffer[buffercounter]]);
}
return (++buffercounter);
}
switch (buffer[buffercounter]) {
case ADD:
Write("(");
//System.out.print("(");
a1 = print_indiv(buffer, ++buffercounter);
Write(" + ");
//System.out.print(" + ");
break;
case SUB:
Write("(");
//System.out.print("(");
a1 = print_indiv(buffer, ++buffercounter);
Write(" - ");
//System.out.print(" - ");
break;
case MUL:
Write("(");
//System.out.print("(");
a1 = print_indiv(buffer, ++buffercounter);
Write(" * ");
//System.out.print(" * ");
break;
case DIV:
Write("(");
//System.out.print("(");
a1 = print_indiv(buffer, ++buffercounter);
Write(" / ");
//System.out.print(" / ");
break;
}
a2 = print_indiv(buffer, a1);
Write(")");
//System.out.print(")");
return (a2);
}
public static char[] buffer = new char[MAX_LEN];
public char[] create_random_indiv(int depth) {
char[] ind;
int len;
len = grow(buffer, 0, MAX_LEN, depth);
while (len < 0) {
len = grow(buffer, 0, MAX_LEN, depth);
}
ind = new char[len];
System.arraycopy(buffer, 0, ind, 0, len);
return (ind);
}
public char[][] create_random_pop(int n, int depth, double[] fitness) {
char[][] pop = new char[n][];
int i;
for (i = 0; i < n; i++) {
pop[i] = create_random_indiv(depth);
fitness[i] = fitness_function(pop[i]);
}
return (pop);
}
public void stats(double[] fitness, char[][] pop, int gen) {
int i, best = rd.nextInt(POPSIZE);
int node_count = 0;
fbestpop = fitness[best];
favgpop = 0.0;
for (i = 0; i < POPSIZE; i++) {
node_count += traverse(pop[i], 0);
favgpop += fitness[i];
if (fitness[i] > fbestpop) {
best = i;
fbestpop = fitness[i];
}
}
avg_len = (double) node_count / POPSIZE;
favgpop /= POPSIZE;
Write("Generation=" + gen + " Avg Fitness=" + (-favgpop)
+ " Best Fitness=" + (-fbestpop) + " Avg Size=" + avg_len
+ "\nBest Individual: ");
//System.out.print("Generation=" + gen + " Avg Fitness=" + (-favgpop)
// + " Best Fitness=" + (-fbestpop) + " Avg Size=" + avg_len
// + "\nBest Individual: ");
print_indiv(pop[best], 0);
Write("\n");
//System.out.print("\n");
//System.out.flush();
}
public int tournament(double[] fitness, int tsize) {
int best = rd.nextInt(POPSIZE), i, competitor;
double fbest = -1.0e34;
for (i = 0; i < tsize; i++) {
competitor = rd.nextInt(POPSIZE);
if (fitness[competitor] > fbest) {
fbest = fitness[competitor];
best = competitor;
}
}
return (best);
}
public int negative_tournament(double[] fitness, int tsize) {
int worst = rd.nextInt(POPSIZE), i, competitor;
double fworst = 1e34;
for (i = 0; i < tsize; i++) {
competitor = rd.nextInt(POPSIZE);
if (fitness[competitor] < fworst) {
fworst = fitness[competitor];
worst = competitor;
}
}
return (worst);
}
public char[] crossover(char[] parent1, char[] parent2) {
int xo1start, xo1end, xo2start, xo2end;
char[] offspring;
int len1 = traverse(parent1, 0);
int len2 = traverse(parent2, 0);
int lenoff;
xo1start = rd.nextInt(len1);
xo1end = traverse(parent1, xo1start);
xo2start = rd.nextInt(len2);
xo2end = traverse(parent2, xo2start);
lenoff = xo1start + (xo2end - xo2start) + (len1 - xo1end);
offspring = new char[lenoff];
System.arraycopy(parent1, 0, offspring, 0, xo1start);
System.arraycopy(parent2, xo2start, offspring, xo1start,
(xo2end - xo2start));
System.arraycopy(parent1, xo1end, offspring,
xo1start + (xo2end - xo2start),
(len1 - xo1end));
return (offspring);
}
public char[] mutation(char[] parent, double pmut) {
int len = traverse(parent, 0), i;
int mutsite;
char[] parentcopy = new char[len];
System.arraycopy(parent, 0, parentcopy, 0, len);
for (i = 0; i < len; i++) {
if (rd.nextDouble() < pmut) {
mutsite = i;
if (parentcopy[mutsite] < FSET_START) {
parentcopy[mutsite] = (char) rd.nextInt(varnumber + randomnumber);
} else {
switch (parentcopy[mutsite]) {
case ADD:
case SUB:
case MUL:
case DIV:
parentcopy[mutsite]
= (char) (rd.nextInt(FSET_END - FSET_START + 1)
+ FSET_START);
}
}
}
}
return (parentcopy);
}
public void print_parms() {
Write("-- TINY GP (Java version) --\n");
//System.out.print("-- TINY GP (Java version) --\n");
Write("SEED=" + seed + "\nMAX_LEN=" + MAX_LEN
+ "\nPOPSIZE=" + POPSIZE + "\nDEPTH=" + DEPTH
+ "\nCROSSOVER_PROB=" + CROSSOVER_PROB
+ "\nPMUT_PER_NODE=" + PMUT_PER_NODE
+ "\nMIN_RANDOM=" + minrandom
+ "\nMAX_RANDOM=" + maxrandom
+ "\nGENERATIONS=" + GENERATIONS
+ "\nTSIZE=" + TSIZE
+ "\n----------------------------------\n");
// System.out.print("SEED=" + seed + "\nMAX_LEN=" + MAX_LEN
// + "\nPOPSIZE=" + POPSIZE + "\nDEPTH=" + DEPTH
// + "\nCROSSOVER_PROB=" + CROSSOVER_PROB
// + "\nPMUT_PER_NODE=" + PMUT_PER_NODE
// + "\nMIN_RANDOM=" + minrandom
// + "\nMAX_RANDOM=" + maxrandom
// + "\nGENERATIONS=" + GENERATIONS
// + "\nTSIZE=" + TSIZE
// + "\n----------------------------------\n");
}
public tiny_gp(String fname, long s) {
fitness = new double[POPSIZE];
seed = s;
if (seed >= 0) {
rd.setSeed(seed);
}
setup_fitness(fname);
for (int i = 0; i < FSET_START; i++) {
x[i] = (maxrandom - minrandom) * rd.nextDouble() + minrandom;
}
pop = create_random_pop(POPSIZE, DEPTH, fitness);
}
public void evolve() {
int gen = 0, indivs, offspring, parent1, parent2, parent;
double newfit;
char[] newind;
print_parms();
stats(fitness, pop, 0);
for (gen = 1; gen < GENERATIONS; gen++) {
if (fbestpop > -1e-5) {
Write("PROBLEM SOLVED\n");
//System.out.print("PROBLEM SOLVED\n");
System.exit(0);
}
for (indivs = 0; indivs < POPSIZE; indivs++) {
if (rd.nextDouble() < CROSSOVER_PROB) {
parent1 = tournament(fitness, TSIZE);
parent2 = tournament(fitness, TSIZE);
newind = crossover(pop[parent1], pop[parent2]);
} else {
parent = tournament(fitness, TSIZE);
newind = mutation(pop[parent], PMUT_PER_NODE);
}
newfit = fitness_function(newind);
offspring = negative_tournament(fitness, TSIZE);
pop[offspring] = newind;
fitness[offspring] = newfit;
}
stats(fitness, pop, gen);
}
Write("PROBLEM *NOT* SOLVED\n");
//System.out.print("PROBLEM *NOT* SOLVED\n");
System.exit(1);
}
public void Write(String context) {
FileWriter fileWriter;
try {
fileWriter = new FileWriter("GP.txt");
try (BufferedWriter bufferedWriter = new BufferedWriter(fileWriter)) {
bufferedWriter.write(context);
}
} catch (IOException ex) {
}
}
public void WriteDouble(double context) {
FileWriter fileWriter;
try {
fileWriter = new FileWriter("GP.txt");
try (BufferedWriter bufferedWriter = new BufferedWriter(fileWriter)) {
String ncontext = Double.toString(context);
bufferedWriter.write(ncontext);
}
} catch (IOException ex) {
}
}
};
The Functions Mapper file that uses the Tiny GP class file:
package function_mapper;
import javax.swing.JOptionPane;
import main.*;
public class Function_Mapper {
public static void main(String[] args) {
String fname = JOptionPane.showInputDialog(null, "File Name", "Input Dialog", JOptionPane.INFORMATION_MESSAGE);
long s = -1;
if (args.length == 2) {
s = Integer.valueOf(args[0]).intValue();
fname = args[1];
}
if (args.length == 1) {
fname = args[0];
}
tiny_gp gp = new tiny_gp(fname, s);
gp.evolve();
}
}
Much help appreciated, thanks!
The Write method overwrites the contents of the file on each invocation. There are two ways to fix this.
An easier one, is to append file, instead of overwriting it. It could be achieved by passing append argument to the FileWriter (I simplified code a little bit along the way).
// true on the next line means "append"
try (Writer writer = new FileWriter("GP.txt", true)) {
writer.write(Double.toString(context));
} catch (IOException ex) {
}
A harder, but much fore efficient one is to openwriter in the constructor, use it in Write method, and close in the specially introduced close method of the tiny_gp.

SVM Predict reading datatest

I have such a big problem with implementation the svm_predict function. I have trained svm, and prepare datatest. Both files are in .txt. file.Datatest are from LBP( Local Binary patterns) and it looks like:
-0.6448744548418511
-0.7862774302452588
1.7746263060948377
I'm loading it to the svm_predict function and at my console after compiling my program there is:
Accuracy = 0.0% (0/800) (classification)
So it's look like it can't read datatest?
import libsvm.*;
import java.io.*;
import java.util.*;
class svm_predict {
private static double atof(String s)
{
return Double.valueOf(s).doubleValue();
}
private static int atoi(String s)
{
return Integer.parseInt(s);
}
private static void predict(BufferedReader input, DataOutputStream output, svm_model model, int predict_probability) throws IOException
{
int correct = 0;
int total = 0;
double error = 0;
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
int svm_type=svm.svm_get_svm_type(model);
int nr_class=svm.svm_get_nr_class(model);
double[] prob_estimates=null;
if(predict_probability == 1)
{
if(svm_type == svm_parameter.EPSILON_SVR ||
svm_type == svm_parameter.NU_SVR)
{
System.out.print("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+svm.svm_get_svr_probability(model)+"\n");
}
else
{
int[] labels=new int[nr_class];
svm.svm_get_labels(model,labels);
prob_estimates = new double[nr_class];
output.writeBytes("labels");
for(int j=0;j<nr_class;j++)
output.writeBytes(" "+labels[j]);
output.writeBytes("\n");
}
}
while(true)
{
String line = input.readLine();
if(line == null) break;
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
double target = atof(st.nextToken());
int m = st.countTokens()/2;
svm_node[] x = new svm_node[m];
for(int j=0;j<m;j++)
{
x[j] = new svm_node();
x[j].index = atoi(st.nextToken());
x[j].value = atof(st.nextToken());
}
double v;
if (predict_probability==1 && (svm_type==svm_parameter.C_SVC || svm_type==svm_parameter.NU_SVC))
{
v = svm.svm_predict_probability(model,x,prob_estimates);
output.writeBytes(v+" ");
for(int j=0;j<nr_class;j++)
output.writeBytes(prob_estimates[j]+" ");
output.writeBytes("\n");
}
else
{
v = svm.svm_predict(model,x);
output.writeBytes(v+"\n");
}
if(v == target)
++correct;
error += (v-target)*(v-target);
sumv += v;
sumy += target;
sumvv += v*v;
sumyy += target*target;
sumvy += v*target;
++total;
}
if(svm_type == svm_parameter.EPSILON_SVR ||
svm_type == svm_parameter.NU_SVR)
{
System.out.print("Mean squared error = "+error/total+" (regression)\n");
System.out.print("Squared correlation coefficient = "+
((total*sumvy-sumv*sumy)*(total*sumvy-sumv*sumy))/
((total*sumvv-sumv*sumv)*(total*sumyy-sumy*sumy))+
" (regression)\n");
}
else
System.out.print("Accuracy = "+(double)correct/total*100+
"% ("+correct+"/"+total+") (classification)\n");
}
private static void exit_with_help()
{
System.err.print("usage: svm_predict [options] test_file model_file output_file\n"
+"options:\n"
+"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); one-class SVM not supported yet\n");
System.exit(1);
}
public static void main(String argv[]) throws IOException
{
int i, predict_probability=0;
// parse options
for(i=0;i<argv.length;i++)
{
if(argv[i].charAt(0) != '-') break;
++i;
switch(argv[i-1].charAt(1))
{
case 'b':
predict_probability = atoi(argv[i]);
break;
default:
System.err.print("Unknown option: " + argv[i-1] + "\n");
exit_with_help();
}
}
if(i>=argv.length-2)
exit_with_help();
try
{
BufferedReader input = new BufferedReader(new FileReader(argv[i]));
DataOutputStream output = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(argv[i+2])));
svm_model model = svm.svm_load_model(argv[i+1]);
if(predict_probability == 1)
{
if(svm.svm_check_probability_model(model)==0)
{
System.err.print("Model does not support probabiliy estimates\n");
System.exit(1);
}
}
else
{
if(svm.svm_check_probability_model(model)!=0)
{
System.out.print("Model supports probability estimates, but disabled in prediction.\n");
}
}
predict(input,output,model,predict_probability);
input.close();
output.close();
}
catch(FileNotFoundException e)
{
exit_with_help();
}
catch(ArrayIndexOutOfBoundsException e)
{
exit_with_help();
}
}
}
It's difficult to know becasue its a big process
make sure you follow their classification guide
the data should be scaled it seems it goes above 1 right now

Sentence comparison with NLP

I used lingpipe for sentence detection but I don't have any idea if there is a better tool. As far as I have understood, there is no way to compare two sentences and see if they mean the same thing.
Is there anyother good source where I can have a pre-built method for comparing two sentences and see if they are similar?
My requirement is as below:
String sent1 = "Mary and Meera are my classmates.";
String sent2 = "Meera and Mary are my classmates.";
String sent3 = "I am in Meera and Mary's class.";
// several sentences will be formed and basically what I need to do is
// this
boolean bothAreEqual = compareOf(sent1, sent2);
sop(bothAreEqual); // should print true
boolean bothAreEqual = compareOf(sent2, sent3);
sop(bothAreEqual);// should print true
How to test if the meaning of two sentences are the same: this would be a too open-ended question.
However, there are methods for comparing two sentences and see if they are similar. There are many possible definition for similarity that can be tested with pre-built methods.
See for example http://en.wikipedia.org/wiki/Levenshtein_distance
Distance between
'Mary and Meera are my classmates.'
and 'Meera and Mary are my classmates.':
6
Distance between
'Mary and Meera are my classmates.'
and 'Alice and Bobe are not my classmates.':
14
Distance between
'Mary and Meera are my classmates.'
and 'Some totally different sentence.':
29
code:
public class LevenshteinDistance {
private static int minimum(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}
public static int computeDistance(CharSequence str1,
CharSequence str2) {
int[][] distance = new int[str1.length() + 1][str2.length() + 1];
for (int i = 0; i <= str1.length(); i++){
distance[i][0] = i;
}
for (int j = 0; j <= str2.length(); j++){
distance[0][j] = j;
}
for (int i = 1; i <= str1.length(); i++){
for (int j = 1; j <= str2.length(); j++){
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1]
+ ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0 : 1));
}
}
int result = distance[str1.length()][str2.length()];
//log.debug("distance:"+result);
return result;
}
public static void main(String[] args) {
String sent1="Mary and Meera are my classmates.";
String sent2="Meera and Mary are my classmates.";
String sent3="Alice and Bobe are not my classmates.";
String sent4="Some totally different sentence.";
System.out.println("Distance between \n'"+sent1+"' \nand '"+sent2+"': \n"+computeDistance(sent1, sent2));
System.out.println("Distance between \n'"+sent1+"' \nand '"+sent3+"': \n"+computeDistance(sent1, sent3));
System.out.println("Distance between \n'"+sent1+"' \nand '"+sent4+"': \n"+computeDistance(sent1, sent4));
}
}
Here is wat i have come up with. this is just a substitute till i get to the real thing but it might be of some help to people out there..
package com.examples;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.aliasi.sentences.MedlineSentenceModel;
import com.aliasi.sentences.SentenceModel;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Files;
import com.sun.accessibility.internal.resources.accessibility;
public class SentenceWordAnalysisAndLevenshteinDistance {
private static int minimum(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}
public static int computeDistance(CharSequence str1, CharSequence str2) {
int[][] distance = new int[str1.length() + 1][str2.length() + 1];
for (int i = 0; i <= str1.length(); i++) {
distance[i][0] = i;
}
for (int j = 0; j <= str2.length(); j++) {
distance[0][j] = j;
}
for (int i = 1; i <= str1.length(); i++) {
for (int j = 1; j <= str2.length(); j++) {
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1]
+ ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0
: 1));
}
}
int result = distance[str1.length()][str2.length()];
return result;
}
static final TokenizerFactory TOKENIZER_FACTORY = IndoEuropeanTokenizerFactory.INSTANCE;
static final SentenceModel SENTENCE_MODEL = new MedlineSentenceModel();
public static void main(String[] args) {
try {
ArrayList<String> sentences = null;
sentences = new ArrayList<String>();
// Reading from text file
// sentences = readSentencesInFile("D:\\sam.txt");
// Giving sentences
// ArrayList<String> sentences = new ArrayList<String>();
sentences.add("Mary and Meera are my classmates.");
sentences.add("Mary and Meera are my classmates.");
sentences.add("Meera and Mary are my classmates.");
sentences.add("Alice and Bobe are not my classmates.");
sentences.add("Some totally different sentence.");
// Self-implemented
wordAnalyser(sentences);
// Internet referred
// levenshteinDistance(sentences);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
private static ArrayList<String> readSentencesInFile(String path) {
ArrayList<String> sentencesList = new ArrayList<String>();
try {
System.out.println("Reading file from : " + path);
File file = new File(path);
String text = Files.readFromFile(file, "ISO-8859-1");
System.out.println("INPUT TEXT: ");
System.out.println(text);
List<String> tokenList = new ArrayList<String>();
List<String> whiteList = new ArrayList<String>();
Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(
text.toCharArray(), 0, text.length());
tokenizer.tokenize(tokenList, whiteList);
System.out.println(tokenList.size() + " TOKENS");
System.out.println(whiteList.size() + " WHITESPACES");
String[] tokens = new String[tokenList.size()];
String[] whites = new String[whiteList.size()];
tokenList.toArray(tokens);
whiteList.toArray(whites);
int[] sentenceBoundaries = SENTENCE_MODEL.boundaryIndices(tokens,
whites);
System.out.println(sentenceBoundaries.length
+ " SENTENCE END TOKEN OFFSETS");
if (sentenceBoundaries.length < 1) {
System.out.println("No sentence boundaries found.");
return new ArrayList<String>();
}
int sentStartTok = 0;
int sentEndTok = 0;
for (int i = 0; i < sentenceBoundaries.length; ++i) {
sentEndTok = sentenceBoundaries[i];
System.out.println("SENTENCE " + (i + 1) + ": ");
StringBuffer sentenceString = new StringBuffer();
for (int j = sentStartTok; j <= sentEndTok; j++) {
sentenceString.append(tokens[j] + whites[j + 1]);
}
System.out.println(sentenceString.toString());
sentencesList.add(sentenceString.toString());
sentStartTok = sentEndTok + 1;
}
} catch (IOException e) {
// TODO: handle exception
e.printStackTrace();
}
return sentencesList;
}
private static void levenshteinDistance(ArrayList<String> sentences) {
System.out.println("\nLevenshteinDistance");
for (int i = 0; i < sentences.size(); i++) {
System.out.println("Distance between \n'" + sentences.get(0)
+ "' \nand '" + sentences.get(i) + "': \n"
+ computeDistance(sentences.get(0),
sentences.get(i)));
}
}
private static void wordAnalyser(ArrayList<String> sentences) {
System.out.println("No.of Sentences : " + sentences.size());
List<String> stopWordsList = getStopWords();
List<String> tokenList = new ArrayList<String>();
ArrayList<List<String>> filteredSentences = new ArrayList<List<String>>();
for (int i = 0; i < sentences.size(); i++) {
tokenList = new ArrayList<String>();
List<String> whiteList = new ArrayList<String>();
Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(sentences.get(i)
.toCharArray(), 0, sentences.get(i).length());
tokenizer.tokenize(tokenList, whiteList);
System.out.print("Sentence " + (i + 1) + ": " + tokenList.size()
+ " TOKENS, ");
System.out.println(whiteList.size() + " WHITESPACES");
filteredSentences.add(filterStopWords(tokenList, stopWordsList));
}
for (int i = 0; i < sentences.size(); i++) {
System.out.println("\n" + (i + 1) + ". Comparing\n '"
+ sentences.get(0) + "' \nwith\n '" +
sentences.get(i)
+ "' : \n");
System.out.println(filteredSentences.get(0) + "\n and \n"
+ filteredSentences.get(i));
System.out.println("Percentage of similarity: "
+ calculateSimilarity(filteredSentences.get(0),
filteredSentences.get(i))
+ "%");
}
}
private static double calculateSimilarity(List<String> list1,
List<String> list2) {
int length1 = list1.size();
int length2 = list2.size();
int count1 = 0;
int count2 = 0;
double result1 = 0.0;
double result2 = 0.0;
int least, highest;
if (length2 > length1) {
least = length1;
highest = length2;
} else {
least = length2;
highest = length1;
}
// computing result1
for (String string1 : list1) {
if (list2.contains(string1))
count1++;
}
result1 = (count1 * 100) / length1;
// computing result2
for (String string2 : list2) {
if (list1.contains(string2))
count2++;
}
result2 = (count2 * 100) / length2;
double avg = (result1 + result2) / 2;
return avg;
}
private static List<String> getStopWords() {
String stopWordsString = ".,a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your";
List<String> stopWordsList = new ArrayList<String>();
List<String> stopWordTokenList = new ArrayList<String>();
List<String> whiteList = new ArrayList<String>();
Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(
stopWordsString.toCharArray(), 0, stopWordsString.length());
tokenizer.tokenize(stopWordTokenList, whiteList);
for (int i = 0; i < stopWordTokenList.size(); i++) {
// System.out.println((i + 1) + ":" + tokenList.get(i));
if (!stopWordTokenList.get(i).equals(",")) {
stopWordsList.add(stopWordTokenList.get(i));
}
}
System.out.println("No.of stop words: " + stopWordsList.size());
return stopWordsList;
}
private static List<String> filterStopWords(List<String> tokenList,
List<String> stopWordsList) {
List<String> filteredSentenceWords = new ArrayList<String>();
for (String sentenceToken : tokenList) {
if (!stopWordsList.contains(sentenceToken)) {
filteredSentenceWords.add(sentenceToken);
}
}
return filteredSentenceWords;
}
}

Categories

Resources