Neural Network: Backpropagation not working (Java) - java

I have to create an OCR Programm for a school project, so I started to create a Backpropagation algorithm with the help of wikipedia. To train my Network, I use the MNIST Database, which I extracted a few days ago, so that I have the real image files. But now the error is always about 237 and after training a while, the error and weights become NaN. What is wrong with my code ?
A screenshot of my images folder
Here is my Main class, which shall train my Network:
package de.Marcel.NeuralNetwork;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
public class OCR {
public static void main(String[] args) throws IOException {
// create network
NeuralNetwork net = new NeuralNetwork(784, 450, 5, 0.2);
// load Images
File file = new File("images");
int images= 0;
double error = 0;
for (File f : file.listFiles()) {
BufferedImage image = ImageIO.read(f);
int t = -1;
double[] pixels = new double[784];
for (int x = 0; x < image.getWidth(); x++) {
for (int y = 0; y < image.getHeight(); y++) {
t++;
Color c = new Color(image.getRGB(x, y));
if (c.getRed() == 0 && c.getGreen() == 0 && c.getBlue() == 0) {
pixels[t] = 1;
} else if (c.getRed() == 255 && c.getGreen() == 255 && c.getBlue() == 255) {
pixels[t] = 0;
}
}
}
try {
if (f.getName().startsWith("1")) {
net.learn(pixels, new double[] { 1, 0, 0, 0, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("2")) {
net.learn(pixels, new double[] { 0, 1, 0, 0, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("3")) {
net.learn(pixels, new double[] { 0, 0, 1, 0, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("4")) {
net.learn(pixels, new double[] { 0, 0, 0, 1, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("5")) {
net.learn(pixels, new double[] { 0, 0, 0, 0, 1 });
error += net.getError();
images++;
} else if (f.getName().startsWith("6")) {
break;
}
} catch (Exception e) {
e.printStackTrace();
}
}
error = error / iterations;
System.out.println("Trained images: " + images);
System.out.println("Error: " + error);
//save
System.out.println("Save");
try {
net.saveNetwork("network.nnet");
} catch (Exception e) {
e.printStackTrace();
}
}
}
... this is my Neuron class:
package de.Marcel.NeuralNetwork;
public class Neuron {
private double input, output;
public Neuron () {
}
public void setInput(double input) {
this.input = input;
}
public void setOutput(double output) {
this.output = output;
}
public double getInput() {
return input;
}
public double getOutput() {
return output;
}
}
... and finally my NeuralNetwork
package de.Marcel.NeuralNetwork;
import java.io.File;
import java.io.FileWriter;
import java.util.Random;
public class NeuralNetwork {
private Neuron[] inputNeurons, hiddenNeurons, outputNeurons;
private double[] weightMatrix1, weightMatrix2;
private double learningRate, error;
public NeuralNetwork(int inputCount, int hiddenCount, int outputCount, double learningRate) {
this.learningRate = learningRate;
// create Neurons
// create Input
this.inputNeurons = new Neuron[inputCount];
for (int i = 0; i < inputCount; i++) {
this.inputNeurons[i] = new Neuron();
}
// createHidden
this.hiddenNeurons = new Neuron[hiddenCount];
for (int i = 0; i < hiddenCount; i++) {
this.hiddenNeurons[i] = new Neuron();
}
// createOutput
this.outputNeurons = new Neuron[outputCount];
for (int i = 0; i < outputCount; i++) {
this.outputNeurons[i] = new Neuron();
}
// create weights
Random random = new Random();
// weightMatrix1
this.weightMatrix1 = new double[inputCount * hiddenCount];
for (int i = 0; i < inputCount * hiddenCount; i++) {
this.weightMatrix1[i] = (random.nextDouble() * 2 - 1) / 0.25;
}
// weightMatrix2
this.weightMatrix2 = new double[hiddenCount * outputCount];
for (int i = 0; i < hiddenCount * outputCount; i++) {
this.weightMatrix2[i] = (random.nextDouble() * 2 - 1) / 0.25;
}
}
public void calculate(double[] input) throws Exception {
// verfiy input length
if (input.length == inputNeurons.length) {
// forwardPropagation
// set input array as input and output of input neurons
for (int i = 0; i < input.length; i++) {
inputNeurons[i].setInput(input[i]);
inputNeurons[i].setOutput(input[i]);
}
// calculate output of hiddenNeurons
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
double totalInput = 0;
// sum up totalInput of Neuron
for (int i = 0; i < inputNeurons.length; i++) {
Neuron iNeuron = inputNeurons[i];
totalInput += iNeuron.getOutput() * weightMatrix1[h * inputNeurons.length + i];
}
// set input
hNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
hNeuron.setOutput(calculatedOutput);
}
// calculate output of outputNeurons
for (int o = 0; o < outputNeurons.length; o++) {
Neuron oNeuron = outputNeurons[o];
double totalInput = 0;
// sum up totalInput of Neuron
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
totalInput += hNeuron.getOutput() * weightMatrix2[o * hiddenNeurons.length + h];
}
// set input
oNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
oNeuron.setOutput(calculatedOutput);
}
} else {
throw new Exception("[NeuralNetwork] input array is either too small or to big");
}
}
public void learn(double[] input, double[] output) throws Exception {
double partialOutput = 0;
// verfiy input length
if (input.length == inputNeurons.length) {
// forwardPropagation
// set input array as input and output of input neurons
for (int i = 0; i < input.length; i++) {
inputNeurons[i].setInput(input[i]);
inputNeurons[i].setOutput(input[i]);
}
// calculate output of hiddenNeurons
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
double totalInput = 0;
// sum up totalInput of Neuron
for (int i = 0; i < inputNeurons.length; i++) {
Neuron iNeuron = inputNeurons[i];
totalInput += iNeuron.getOutput() * weightMatrix1[h * inputNeurons.length + i];
}
// set input
hNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
hNeuron.setOutput(calculatedOutput);
}
// calculate output of outputNeurons
for (int o = 0; o < outputNeurons.length; o++) {
Neuron oNeuron = outputNeurons[o];
double totalInput = 0;
// sum up totalInput of Neuron
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
totalInput += hNeuron.getOutput() * weightMatrix2[o * hiddenNeurons.length + h];
}
// set input
oNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
oNeuron.setOutput(calculatedOutput);
}
// backPropagation
double totalError = 0;
// calculate weights in matrix2
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
for (int o = 0; o < outputNeurons.length; o++) {
Neuron oNeuron = outputNeurons[o];
// calculate weight
double delta = learningRate * derivativeSigmoid(oNeuron.getInput())
* (output[o] - oNeuron.getOutput()) * hNeuron.getOutput();
// set new weight
weightMatrix2[h + o * hiddenNeurons.length] = weightMatrix2[h + o * hiddenNeurons.length] + delta;
// update partial output
partialOutput += (derivativeSigmoid(oNeuron.getInput()) * (output[o] - oNeuron.getOutput())
* weightMatrix2[h + o * hiddenNeurons.length]);
//calculate error
totalError += Math.pow((output[o] - oNeuron.getOutput()), 2);
}
}
//set error
this.error = 0.5 * totalError;
// calculate weights in matrix1
for (int i = 0; i < inputNeurons.length; i++) {
Neuron iNeuron = inputNeurons[i];
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
// calculate weight
double delta = learningRate * derivativeSigmoid(hNeuron.getInput()) * partialOutput
* (iNeuron.getOutput());
// set new weight
weightMatrix1[i + h * inputNeurons.length] = weightMatrix1[i + h * inputNeurons.length] + delta;
}
}
} else {
throw new Exception("[NeuralNetwork] input array is either too small or to big");
}
}
// save Network
public void saveNetwork(String fileName) throws Exception {
File file = new File(fileName);
FileWriter writer = new FileWriter(file);
writer.write("weightmatrix1:");
writer.write(System.lineSeparator());
// write weightMatrix1
for (double d : weightMatrix1) {
writer.write(d + "-");
}
writer.write(System.lineSeparator());
writer.write("weightmatrix2:");
writer.write(System.lineSeparator());
// write weightMatrix2
for (double d : weightMatrix2) {
writer.write(d + "-");
}
// save
writer.close();
}
// sigmoid function
private double sigmoid(double input) {
return Math.exp(input * (-1));
}
private double derivativeSigmoid(double input) {
return sigmoid(input) * (1 - sigmoid(input));
}
public double getError() {
return error;
}
}

It looks like your sigmoid function is incorrect. It should be 1/(1+exp(-x)).
If you still run into NaN errors, it might be because using the function as such can be an overkill, especially for large numbers (ie, numbers less than -10 and greater than 10).
Using an array of precalculated values of sigmoid(x) might prevent this problem for bigger datasets and will also help the program run more efficiently.
Hope this helps!

Related

Backpropagation: networkerror of one testinput rises, the others go down, whats wrong?

I am currently trying to program a neural network... for learning I want to use the backpropagation algorithm! My problem is, that I don't know where my error is.
I try to train it the logical AND.
My network errors after the first round are:
28.68880035284087 for INPUT 1|1
22.17048518538824 for INPUT 1|0
21.346787829014342 for INPUT 0|1
20.44791655274438 for INPUT 0|0
If I make a few iterations my errors are like this:
34.17584528001372 for INPUT 1|1
18.315643070675343 for INPUT 1|0
17.568891920535222 for INPUT 0|1
17.753497551261436 for INPUT 0|0
I have absolutely no idea why the error for INPUT 1|1 is growing, while the others get smaller...
Here's my code:
classes for the testdata:
public class Trainingset
{
private double[] input;
private double[] target;
public Trainingset(double[] input, double[] target)
{
this.input = input;
this.target = target;
}
public double[] getInput()
{
return input;
}
public double[] getTarget()
{
return target;
}
}
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
public class TrainingData
{
private List<Trainingset> trainingSets;
private Random random;
private int nextEntry;
public TrainingData()
{
random = new Random();
trainingSets = new ArrayList<Trainingset>();
nextEntry = 0;
}
public void addData(double[] input, double[] target)
{
Trainingset ts = new Trainingset(input.clone(), target.clone());
trainingSets.add(ts);
}
public Trainingset getRandomTrainingset()
{
return trainingSets.get(random.nextInt(trainingSets.size()));
}
public Trainingset getNext()
{
if(nextEntry == trainingSets.size())
nextEntry = 0;
return trainingSets.get(nextEntry++);
}
}
the networkclass:
import java.util.ArrayList;
import java.util.List;
public class FFN3
{
private List<FFNlayer3> layers;
private double learningrate = 0.45;
private double momentum = 0.9;
private double outputError;
private double networkErrkor;
public FFN3()
{
layers = new ArrayList<>();
layers.add(new FFNlayer3(2));
layers.add(new FFNlayer3(1));
layers.get(0).setNextLayer(layers.get(1));
layers.get(1).setPrevLayer(layers.get(0));
double[][] ItoH = {
{ 0.4, 0.1 },
{ -0.1, -0.1 }
};
double[][] HtoO = {
{ 0.06, -0.4 }
};
layers.get(0).setWeights(ItoH);
layers.get(1).setWeights(HtoO);
networkErrkor = Double.MAX_VALUE;
}
public void learn(TrainingData td)
{
Trainingset ts = td.getNext();
double[] results = compute(ts.getInput());
double error = 0;
for(int i = 0; i < results.length; i++)
{
error += Math.pow(ts.getTarget()[i] - results[i], 2);
}
networkErrkor = error / results.length;
layers.get(layers.size()-1).updateWeights(learningrate, momentum, ts.getTarget());
layers.get(0).updateHiddenWeights(learningrate, momentum, ts.getInput());
}
public double getNetworkError()
{
return networkErrkor;
}
public double[] compute(double[] input)
{
return layers.get(0).compute(input);
}
}
The layerclass:
public class FFNlayer3
{
private double[][] incomingWeights;
private double[][] prevWeightChanges;
private double[] neuronValues;
private double[] neuronSums;
private double[] errors;
private FFNlayer3 prevLayer;
private FFNlayer3 nextLayer;
public FFNlayer3(int neuroncount)
{
neuronValues = new double[neuroncount];
neuronSums = new double[neuroncount];
errors = new double[neuroncount];
nextLayer = null;
prevLayer = null;
}
public void setWeights(double[][] weights)
{
incomingWeights = weights;
prevWeightChanges = new double[incomingWeights.length][incomingWeights[0].length];
}
public void setPrevLayer(FFNlayer3 prevLayer)
{
this.prevLayer = prevLayer;
}
public void setNextLayer(FFNlayer3 nextLayer)
{
this.nextLayer = nextLayer;
}
public void updateWeights(double learningrate, double momentum, double[] targetValues)
{
for(int i = 0; i < errors.length; i++)
{
errors[i] = neuronValues[i] * (1 - neuronValues[i]) * (targetValues[i] - neuronValues[i]);
}
for(int i = 0; i < incomingWeights.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
double delta = learningrate * errors[i] * prevLayer.getNeuronValues()[j];
incomingWeights[i][j] += delta + momentum * prevWeightChanges[i][j];
}
}
prevLayer.updateHiddenWeights(learningrate, momentum);
}
public void updateHiddenWeights(double learningrate, double momentum)
{
if(prevLayer==null)
return;
for(int i = 0; i < errors.length; i++)
{
for(int j = 0; j < nextLayer.getErrors().length; j++)
{
errors[i] += nextLayer.getErrors()[j] * nextLayer.getWeights()[j][i];
}
}
for(int i = 0; i < incomingWeights.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
double delta = learningrate * errors[i] * prevLayer.getNeuronValues()[j];
incomingWeights[i][j] += delta + momentum * prevWeightChanges[i][j];
}
}
prevLayer.updateHiddenWeights(learningrate, momentum);
}
public void updateHiddenWeights(double learningrate, double momentum, double[] input)
{
for(int i = 0; i < errors.length; i++)
{
for(int j = 0; j < nextLayer.getErrors().length; j++)
{
errors[i] += nextLayer.getErrors()[j] * nextLayer.getWeights()[j][i];
}
}
for(int i = 0; i < incomingWeights.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
double delta = learningrate * errors[i] * input[j];
incomingWeights[i][j] += delta + momentum * prevWeightChanges[i][j];
}
}
}
public double[][] getWeights()
{
return incomingWeights;
}
public double[] getErrors()
{
return errors;
}
public double[] getNeuronValues()
{
return neuronValues;
}
public double[] compute(double[] input)
{
for(int i = 0; i < neuronValues.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
neuronSums[i] += input[j] * incomingWeights[i][j];
}
neuronValues[i] = SIGMOID(neuronSums[i]);
neuronSums = new double[neuronSums.length];
}
if(nextLayer==null)
return neuronValues;
return nextLayer.compute(neuronValues);
}
private double SIGMOID(double value)
{
return 1 / (1+ Math.exp(-value));
}
}
And the snippet from my main:
FFN3 network = new FFN3();
double[] input = new double[2];
double[] target = new double[1];
TrainingData td = new TrainingData();
input[0] = 1;
input[1] = 1;
target[0] = 1;
td.addData(input, target);
input[0] = 1;
input[1] = 0;
target[0] = 0;
//target[1] = 1;
td.addData(input, target);
input[0] = 0;
input[1] = 1;
target[0] = 0;
td.addData(input, target);
input[0] = 0;
input[1] = 0;
target[0] = 0;
td.addData(input, target);
while(Double.compare(network.getNetworkError(), 0.001)>0)
{
network.learn(td);
System.out.println(network.getNetworkError()*100);
}
I was using this document: http://www.dataminingmasters.com/uploads/studentProjects/NeuralNetworks.pdf
The values after the first epoch are similar to the values in the document... what is wrong? Is it the document, my code or both?
Hope you can help me!
you may try with BigDecimal instead of double, as they could cause trouble (look here for further information )

artificial intelligence (neural networks) - actual output never gets near the correct output

I am working on a program which should act like a XOR-operator.
In order to adjust the weights I use backprop.
I have also included deep learning (which works nearly as it should, the same struggle here) but this shouldn't be from importance. (When there is a if-clause like
if(hiddenNeurons.size() > 1)
{
.....
}
than there is just code in this if which is from importance when using multiple hidden neurons. (Which is in this question not the case))
The struggle: The output is no matter what the input is, nearly always the same (around 0.5).
The weights and biases get adjusted.
Here is the code (there is more, but the other code isn't from importance):
public void learnFromData(int iterations) //this method learns from the ArrayList 'inputs' and 'outputs'
{
if(inputs.size() == outputs.size())
{
//Collections.shuffle(inputs);
for(int j = 0;j<iterations;j++)
{
for(int i = 0;i<inputs.size();i++)
{
double actualOutput = computeOutput(inputs.get(i))[0];
double expectedOutput = outputs.get(i)[0];
//System.out.println(String.format("Input: %.3f /\\ Ouput: %.4f Expected: %.4f",inputs.get(i)[0], actualOutput, expectedOutput));
double error = 0;
if (actualOutput > expectedOutput) {
error = actualOutput - expectedOutput;
} else {
error = expectedOutput - actualOutput;
}
if(i == 0){
System.out.println(String.format("Error: %.10f", error));}
learn(outputs.get(i));
}
}
}
else{
System.out.println("\nERROR: the number of inputs and outputs have to match!\n");
}
}
public double[] computeOutput(double[] inputValues)
{
for(int i = 0;i<inputValues.length;i++) //giving the inputNeurons a value
{
inputNeurons[i] = inputValues[i];
}
for(int i = 0;i<hiddenNeurons.get(0).length;i++)
{
hSums.get(0)[i] = 0.0;
}
for(int i = 0;i<aOutputNeurons.length;i++)
{
hoSums[i] = 0.0;
}
for(int i = 0;i<inputNeurons.length;i++) //calculating the sums of the hidden neurons (Input-function)
{
for(int b = 0;b<hiddenNeurons.get(0).length;b++)
{
hSums.get(0)[b] += inputNeurons[i] * ihWeights[i][b];
}
}
for(int i = 0;i<hiddenNeurons.get(0).length;i++) //Each bias-value has to be added to its associated sum
{
hSums.get(0)[i] += hBiases.get(0)[i];
}
for(int i = 0;i<hiddenNeurons.get(0).length;i++)
{
hiddenNeurons.get(0)[i] = Helper.sig(hSums.get(0)[i]); //output-function = sigmoid
}
//calculating the hSums
if(hiddenNeurons.size()>1)
{
for (int layer = 0;layer<hiddenNeurons.size()-1;layer++)
{
//calculating the sums of the layer
for(int neuron_nextLayer = 0; neuron_nextLayer < hiddenNeurons.get(layer+1).length;neuron_nextLayer++)
{
hSums.get(layer+1)[neuron_nextLayer] = 0;
for(int neuron_actualLayer = 0;neuron_actualLayer < hiddenNeurons.get(layer).length;neuron_actualLayer++)
{
hSums.get(layer+1)[neuron_nextLayer] += hiddenNeurons.get(layer)[neuron_actualLayer] * hhWeights.get(layer)[neuron_actualLayer][neuron_nextLayer];
}
}
}
}
// calculating the sums of the output neurons (Input-function)
int lastHiddenLayer = hiddenNeurons.size()-1;
for(int i = 0;i<aOutputNeurons.length;i++)
{
hoSums[i] = 0;
for(int b = 0;b<hiddenNeurons.get(lastHiddenLayer).length;b++)
{
hoSums[i] += hiddenNeurons.get(lastHiddenLayer)[b] * hoWeights[b][i];
}
hoSums[i] += hoBiases[i];
aOutputNeurons[i] = Helper.sig(hoSums[i]);
}
//weightToString();
return aOutputNeurons;
}
public void learn(double[] cValues) //correctValues
{
// calculating the output-gradients
for(int i = 0;i<aOutputNeurons.length;i++)
{
oGradients[i] = (cValues[i]-aOutputNeurons[i])*Helper.invSig(aOutputNeurons[i]);
}
//calculating the hidden-gradients
double sum; //sum of all multiplications between gradients of the output layer and the weights between the hidden neuron and each output neuron.
int lastHiddenLayer = hiddenNeurons.size()-1;
for(int i = 0;i<hiddenNeurons.get(lastHiddenLayer).length;i++)
{
sum = 0;
for(int b = 0;b<aOutputNeurons.length;b++)
{
sum += oGradients[b] * hoWeights[i][b];
}
hGradients.get(lastHiddenLayer)[i] = Helper.invSig(hiddenNeurons.get(lastHiddenLayer)[i]) * sum;
}
if(hiddenNeurons.size() > 1)
{
for(int layer = lastHiddenLayer;layer > 0;layer--)
{
for(int neuron_actualHiddenLayer = 0; neuron_actualHiddenLayer < hiddenNeurons.get(layer-1).length;neuron_actualHiddenLayer++) // neuron_actualHiddenLayer is more in the direction of the input neurons and neuron_nextHiddenLayer more in the direction of the output neurons
{
sum = 0;
for(int neuron_nextHiddenLayer = 0;neuron_nextHiddenLayer < hiddenNeurons.get(layer).length;neuron_nextHiddenLayer++)
{
sum += hGradients.get(layer)[neuron_nextHiddenLayer] * hhWeights.get(layer-1)[neuron_actualHiddenLayer][neuron_nextHiddenLayer];
}
hGradients.get(layer-1)[neuron_actualHiddenLayer] = Helper.invSig(hiddenNeurons.get(layer-1)[neuron_actualHiddenLayer]) * sum;
}
}
}
//calculating weight- and biasdeltas of input- to hidden neurons
for(int i = 0;i<inputNeurons.length;i++)
{
for(int b = 0;b<hiddenNeurons.get(0).length;b++)
{
ihPrevWeightsDeltas[i][b] = eta * hGradients.get(0)[b] * inputNeurons[i];
ihWeights[i][b] += ihPrevWeightsDeltas[i][b];
}
}
// calculating weight- and biasdeltas of hidden- to hidden neurons
if(hiddenNeurons.size() > 1)
{
for(int layer = 0;layer < hiddenNeurons.size()-1;layer++)
{
for(int neuron_actualHiddenLayer = 0; neuron_actualHiddenLayer < hiddenNeurons.get(layer).length;neuron_actualHiddenLayer++) // neuron_actualHiddenLayer is more in the direction of the input neurons and neuron_nextHiddenLayer more in the direction of the output neurons
{
for(int neuron_nextHiddenLayer = 0;neuron_nextHiddenLayer < hiddenNeurons.get(layer+1).length;neuron_nextHiddenLayer++)
{
hhPrevWeightDeltas.get(layer)[neuron_actualHiddenLayer][neuron_nextHiddenLayer] = eta * hGradients.get(layer+1)[neuron_nextHiddenLayer] * hiddenNeurons.get(layer)[neuron_actualHiddenLayer];
hhWeights.get(layer)[neuron_actualHiddenLayer][neuron_nextHiddenLayer] += hhPrevWeightDeltas.get(layer)[neuron_actualHiddenLayer][neuron_nextHiddenLayer];
hhPrevBiasDeltas.get(layer)[neuron_actualHiddenLayer] = eta*hGradients.get(layer)[neuron_actualHiddenLayer];
hBiases.get(layer)[neuron_actualHiddenLayer] += hhPrevBiasDeltas.get(layer)[neuron_actualHiddenLayer];
}
}
}
}
for(int i = 0;i<hiddenNeurons.get(0).length;i++)
{
ihPrevBiasDeltas[i] = eta*hGradients.get(0)[i];
hBiases.get(0)[i] += ihPrevBiasDeltas[i];
}
for(int i = 0;i<aOutputNeurons.length;i++)
{
hoPrevBiasDeltas[i] = eta*oGradients[i];
hoBiases[i] += hoPrevBiasDeltas[i];
}
for(int i = 0;i<hiddenNeurons.get(0).length;i++)
{
for(int b = 0;b<aOutputNeurons.length;b++)
{
hoPrevWeightsDeltas[i][b] = eta * oGradients[b] * hiddenNeurons.get(lastHiddenLayer)[i];
hoWeights[i][b] += hoPrevWeightsDeltas[i][b];
}
}
}
Because it is your own code, please try to do same network using some matured project like the Neuroph library and compare the results, it should help you to narrow the problem.
also remember that XOR function is a non-linear classification problem, you need at least 2 layers (1 input and 1 hidden) with non-linear activation function. It just because linear classifier can't be adjusted to do non-linear classification, you need to have non-linear classifier.

Java Backpropagation Algorithm is very slow

I have a big problem. I try to create a neural network and want to train it with a backpropagation algorithm. I found this tutorial here http://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/ and tried to recreate it in Java. And when I use the training data he uses, I get the same results as him.
Without backpropagation my TotalError is nearly the same as his. And when I use the back backpropagation 10 000 time like him, than I get the nearly the same error. But he uses 2 Input Neurons, 2 Hidden Neurons and 2 Outputs but I'd like to use this neural network for OCR, so I need definitely more Neurons. But if I use for example 49 Input Neurons, 49 Hidden Neurons and 2 Output Neurons, It takes very long to change the weights to get a small error. (I believe it takes forever.....). I have a learningRate of 0.5. In the constructor of my network, I generate the neurons and give them the same training data like the one in the tutorial and for testing it with more neurons, I gave them random weights, inputs and targets. So can't I use this for many Neurons, does it takes just very long or is something wrong with my code ? Shall I increase the learning rate, the bias or the start weight?
Hopefully you can help me.
package de.Marcel.NeuralNetwork;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Random;
public class Network {
private ArrayList<Neuron> inputUnit, hiddenUnit, outputUnit;
private double[] inHiWeigth, hiOutWeigth;
private double hiddenBias, outputBias;
private double learningRate;
public Network(double learningRate) {
this.inputUnit = new ArrayList<Neuron>();
this.hiddenUnit = new ArrayList<Neuron>();
this.outputUnit = new ArrayList<Neuron>();
this.learningRate = learningRate;
generateNeurons(2,2,2);
calculateTotalNetInputForHiddenUnit();
calculateTotalNetInputForOutputUnit();
}
public double calcuteLateTotalError () {
double e = 0;
for(Neuron n : outputUnit) {
e += 0.5 * Math.pow(Math.max(n.getTarget(), n.getOutput()) - Math.min(n.getTarget(), n.getOutput()), 2.0);
}
return e;
}
private void generateNeurons(int input, int hidden, int output) {
// generate inputNeurons
for (int i = 0; i < input; i++) {
Neuron neuron = new Neuron();
// for testing give each neuron an input
if(i == 0) {
neuron.setInput(0.05d);
} else if(i == 1) {
neuron.setOutput(0.10d);
}
inputUnit.add(neuron);
}
// generate hiddenNeurons
for (int i = 0; i < hidden; i++) {
Neuron neuron = new Neuron();
hiddenUnit.add(neuron);
}
// generate outputNeurons
for (int i = 0; i < output; i++) {
Neuron neuron = new Neuron();
if(i == 0) {
neuron.setTarget(0.01d);
} else if(i == 1) {
neuron.setTarget(0.99d);
}
outputUnit.add(neuron);
}
// generate Bias
hiddenBias = 0.35;
outputBias = 0.6;
// generate connections
double startWeigth = 0.15;
// generate inHiWeigths
inHiWeigth = new double[inputUnit.size() * hiddenUnit.size()];
for (int i = 0; i < inputUnit.size() * hiddenUnit.size(); i += hiddenUnit.size()) {
for (int x = 0; x < hiddenUnit.size(); x++) {
int z = i + x;
inHiWeigth[z] = round(startWeigth, 2, BigDecimal.ROUND_HALF_UP);
startWeigth += 0.05;
}
}
// generate hiOutWeigths
hiOutWeigth = new double[hiddenUnit.size() * outputUnit.size()];
startWeigth += 0.05;
for (int i = 0; i < hiddenUnit.size() * outputUnit.size(); i += outputUnit.size()) {
for (int x = 0; x < outputUnit.size(); x++) {
int z = i + x;
hiOutWeigth[z] = round(startWeigth, 2, BigDecimal.ROUND_HALF_UP);
startWeigth += 0.05;
}
}
}
private double round(double unrounded, int precision, int roundingMode)
{
BigDecimal bd = new BigDecimal(unrounded);
BigDecimal rounded = bd.setScale(precision, roundingMode);
return rounded.doubleValue();
}
private void calculateTotalNetInputForHiddenUnit() {
// calculate totalnetinput for each hidden neuron
for (int s = 0; s < hiddenUnit.size(); s++) {
double net = 0;
int x = (inHiWeigth.length / inputUnit.size());
// calculate toAdd
for (int i = 0; i < x; i++) {
int v = i + s * x;
double weigth = inHiWeigth[v];
double toAdd = weigth * inputUnit.get(i).getInput();
net += toAdd;
}
// add bias
net += hiddenBias * 1;
net = net *-1;
double output = (1.0 / (1.0 + (double)Math.exp(net)));
hiddenUnit.get(s).setOutput(output);
}
}
private void calculateTotalNetInputForOutputUnit() {
// calculate totalnetinput for each hidden neuron
for (int s = 0; s < outputUnit.size(); s++) {
double net = 0;
int x = (hiOutWeigth.length / hiddenUnit.size());
// calculate toAdd
for (int i = 0; i < x; i++) {
int v = i + s * x;
double weigth = hiOutWeigth[v];
double outputOfH = hiddenUnit.get(s).getOutput();
double toAdd = weigth * outputOfH;
net += toAdd;
}
// add bias
net += outputBias * 1;
net = net *-1;
double output = (double) (1.0 / (1.0 + Math.exp(net)));
outputUnit.get(s).setOutput(output);
}
}
private void backPropagate() {
// calculate ouputNeuron weigthChanges
double[] oldWeigthsHiOut = hiOutWeigth;
double[] newWeights = new double[hiOutWeigth.length];
for (int i = 0; i < hiddenUnit.size(); i += 1) {
double together = 0;
double[] newOuts = new double[hiddenUnit.size()];
for (int x = 0; x < outputUnit.size(); x++) {
int z = x * hiddenUnit.size() + i;
double weigth = oldWeigthsHiOut[z];
double target = outputUnit.get(x).getTarget();
double output = outputUnit.get(x).getOutput();
double totalErrorChangeRespectOutput = -(target - output);
double partialDerivativeLogisticFunction = output * (1 - output);
double totalNetInputChangeWithRespect = hiddenUnit.get(x).getOutput();
double puttedAllTogether = totalErrorChangeRespectOutput * partialDerivativeLogisticFunction
* totalNetInputChangeWithRespect;
double weigthChange = weigth - learningRate * puttedAllTogether;
// set new weigth
newWeights[z] = weigthChange;
together += (totalErrorChangeRespectOutput * partialDerivativeLogisticFunction * weigth);
double out = hiddenUnit.get(x).getOutput();
newOuts[x] = out * (1.0 - out);
}
for (int t = 0; t < newOuts.length; t++) {
inHiWeigth[t + i] = (double) (inHiWeigth[t + i] - learningRate * (newOuts[t] * together * inputUnit.get(t).getInput()));
}
hiOutWeigth = newWeights;
}
}
}
And my Neuron Class:
package de.Marcel.NeuralNetwork;
public class Neuron {
private double input, output;
private double target;
public Neuron () {
}
public void setTarget(double target) {
this.target = target;
}
public void setInput (double input) {
this.input = input;
}
public void setOutput(double output) {
this.output = output;
}
public double getInput() {
return input;
}
public double getOutput() {
return output;
}
public double getTarget() {
return target;
}
}
Think about it: you have 10,000 propagations through 49->49->2 neurons. Between the input layer and the hidden layer, you have 49 * 49 links to propagate through, so parts of your code are being executed about 24 million times (10,000 * 49 * 49). That is going to take time. You could try 100 propogations, and see how long it takes, just to give you an idea.
There are a few things that can be done to increase performance, like using a plain array instead of an ArrayList, but this is a better topic for the Code Review site. Also, don't expect this to give drastic improvements.
Your back propagation code has complexity of O(h*o + h^2) * 10000, where h is the number of hidden neurons and o is the number of output neurons. Here's why.
You have a loop that executes for all of your hidden neurons...
for (int i = 0; i < hiddenUnit.size(); i += 1) {
... containing another loop that executes for all the output neurons...
for (int x = 0; x < outputUnit.size(); x++) {
... and an additional inner loop that executes again for all the hidden neurons...
double[] newOuts = new double[hiddenUnit.size()];
for (int t = 0; t < newOuts.length; t++) {
... and you execute all of that ten thousand times. Add on top of this O(i + h + o) [initial object creation] + O(i*h + o*h) [initial weights] + O(h*i) [calculate net inputs] + O(h*o) [calculate net outputs].
No wonder it's taking forever; your code is littered with nested loops. If you want it to go faster, factor these out - for example, combine object creation and initialization - or reduce the number of neurons. But significantly cutting the number of back propagation calls is the best way to make this run faster.

Troubleshooting DIT FFT Radix-2 Algorithm

I have implemented a recursive radix-2 DIT FFT in Java, and a regular DFT to verify my results from the FFT, but the results from the two differ and I cannot seem to figure it out. Both are fed the entire array with the apply()-methods, start and stop index is 0 and data.length respectively. The DFT version looks correct with a nice peak at bin 50 while the FFT one is full of garbage. What am I doing wrong?
This is the FFT implementation (adapted from http://www.engineeringproductivitytools.com/stuff/T0001/PT04.HTM "A Recursive DIT FFT Routine.", I verified by comparing to the pseudo code at https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Pseudocode):
public class DITFFT2 extends Transform {
public float[] apply(float[] data, int startIndex, int stopIndex) throws IllegalArgumentException {
int N;
float[] filteredData;
Complex[] complexData;
Complex[] filteredComplexData;
if (stopIndex < startIndex) {
throw new IllegalArgumentException("stopIndex cannot be lower than startIndex!");
}
if (stopIndex < 0 || startIndex < 0) {
throw new IllegalArgumentException("Index cannot be negative!");
}
N = stopIndex - startIndex;
filteredData = new float[N];
complexData = new Complex[N];
for (int i = startIndex; i < stopIndex; i++) {
complexData[i-startIndex] = new Complex(data[i], 0.0f);
}
filteredComplexData = transform(complexData, N);
for (int i = 0; i < N; i++) {
filteredData[i] = filteredComplexData[i].abs();
}
return filteredData;
}
public Complex[] transform(Complex[] data, int N) {
Complex x;
Complex[] result = new Complex[N];
if (N == 1) {
result[0] = data[0];
} else {
Complex[] fe = new Complex[N/2];
Complex[] fo = new Complex[N/2];
for (int i = 0; i < N/2; i++) {
fe[i] = data[2*i];
fo[i] = data[2*i+1];
}
Complex[] Fe = transform(fe, N / 2);
Complex[] Fo = transform(fo, N / 2);
for (int k = 0; k < N/2; k++) {
x = Fo[k].copy();
x.mul(getTwiddleFactor(k, N));
result[k] = Fe[k].copy();
result[k].add(x);
result[k+N/2] = Fe[k].copy();
result[k+N/2].sub(x);
}
}
return result;
}
private Complex getTwiddleFactor(int k, int N) {
return new Complex(1.0f, (float)(-2.0f * Math.PI * k / (float)N));
}
}
And this is the DFT implementation:
public class DFT extends Transform {
public float[] apply(float[] data, int startIndex, int stopIndex) throws IllegalArgumentException {
int N;
float[] filteredData;
Complex[] complexData;
Complex[] filteredComplexData;
if (stopIndex < startIndex) {
throw new IllegalArgumentException("stopIndex cannot be lower than startIndex!");
}
if (stopIndex < 0 || startIndex < 0) {
throw new IllegalArgumentException("Index cannot be negative!");
}
N = stopIndex - startIndex;
filteredData = new float[N];
complexData = new Complex[N];
filteredComplexData = new Complex[N];
for (int i = startIndex; i < stopIndex; i++) {
complexData[i-startIndex] = new Complex(data[i], 0.0f);
filteredComplexData[i-startIndex] = new Complex(0.0f, 0.0f);
}
for (int k = 0; k < N; k++) {
for (int n = 0; n < N; n++) {
Complex c = complexData[n].copy();
filteredComplexData[k].add(c.mul(new Complex(1.0f, (float)(-2*Math.PI*n*k/(float)N))));
}
}
for (int i = 0; i < N; i++) {
filteredData[i] = filteredComplexData[i].abs();
}
return filteredData;
}
}
Now, both seems to give the correct answer for [8.0, 4.0, 8.0, 0.0], which is [20.0, 4.0j, 12.0, -4.0j]. But if I feed them a sine produced by:
mBuffer = new float[1024];
float sampleRate = 1000.0f;
float frequency = 50.0f;
for (int i = 0; i < mBuffer.length; i++) {
mBuffer[i] = (float)(0.5*Math.sin(2*Math.PI*i*frequency/sampleRate));
}
The implementation of Complex for reference:
public final class Complex {
public float mR, mTheta;
public Complex() {
mR = 0.0f;
mTheta = 0.0f;
}
public Complex(float r, float theta) {
mR = r;
mTheta = theta;
}
public Complex copy() {
return new Complex(mR, mTheta);
}
public Complex add(Complex c) {
float real, imag;
real = (float)(mR * Math.cos(mTheta) + c.mR * Math.cos(c.mTheta));
imag = (float)(mR * Math.sin(mTheta) + c.mR * Math.sin(c.mTheta));
mR = (float)Math.sqrt(Math.pow(real, 2) + Math.pow(imag, 2));
if (real != 0.0f) {
mTheta = (float)Math.atan(imag / real);
} else {
mTheta = (float)(imag > 0.0f ? Math.PI/2.0f : Math.PI*3.0f/2.0f);
}
return this;
}
public Complex sub(Complex c) {
float real, imag;
real = (float)(mR * Math.cos(mTheta) - c.mR * Math.cos(c.mTheta));
imag = (float)(mR * Math.sin(mTheta) - c.mR * Math.sin(c.mTheta));
mR = (float)Math.sqrt(Math.pow(real, 2) + Math.pow(imag, 2));
if (real != 0.0f) {
mTheta = (float)Math.atan(imag / real);
} else {
mTheta = (float)(imag > 0.0f ? Math.PI/2.0f : Math.PI*3.0f/2.0f);
}
return this;
}
public Complex mul(Complex c) {
mR = mR * c.mR;
mTheta = mTheta + c.mTheta;
return this;
}
public Complex div(Complex c) {
mR = mR / c.mR;
mTheta = mTheta - c.mTheta;
return this;
}
public Complex pow(float exp) {
mTheta = mTheta * exp;
mR = (float)Math.pow(mR, exp);
return this;
}
public float abs() {
return mR;
}
public float getRealPart() {
return (float)(mR * Math.cos(mTheta));
}
public float getImagPart() {
return (float)(mR * Math.sin(mTheta));
}
public String toStringRectangular() {
float real, imag;
StringBuilder sb = new StringBuilder();
real = (float)(mR * Math.cos(mTheta));
imag = (float)(mR * Math.sin(mTheta));
sb.append(real);
if (imag >= 0) {
sb.append(" + ");
} else {
sb.append(" - ");
}
sb.append(Math.abs(imag));
sb.append("i");
return sb.toString();
}
public String toStringExponential() {
StringBuilder sb = new StringBuilder();
sb.append(mR);
sb.append(" * e ^ ");
sb.append(mTheta);
sb.append("i");
return sb.toString();
}
public String toString() {
return toStringExponential() + " [ " + toStringRectangular() + " ] ";
}
public static Complex[] getInitializedArray(int size) {
Complex[] arr = new Complex[size];
for (int i = 0; i < arr.length; i++) {
arr[i] = new Complex(0.0f, 0.0f);
}
return arr;
}
}
Your FFT implementation seems reasonable. However there is an issue with the use of Math.atan (which return a value within the [-pi/2,pi/2], instead of the whole [-pi,pi] range) in Complex's add and sub.
To resolve this issue you should be using:
mTheta = (float)Math.atan2(imag, real);

java version of bat algorithm in matlab

I have a Matlab code of bat algorithm and I write java version of this algorithm
Bat algorithm is a simple optimization algorithm for finding the minimum of any function
here is the matlab code and my java version of this code
My java version of this algorithm can't find the optimum result like matlab version
and I can't find where is my mistake in converting the code from matlab to java
Can anyone help me where is my mistake?
import java.util.Random;
public class Bat
{
private int n;
private float A, r;
private float Qmin, Qmax;
private int d;
private int NofGen;
private float fmin;
private int fminIndex;
private float Fnew;
private int loopCounter;
private float Q[], V[][], Sol[][], UL_bound[][], fitness[], S[][], Best[];
private Random myRand;
public Bat(
int NBats,
float loudness,
float pulseRate,
float minFreq,
float maxFreq,
int NofGeneration,
int dimension
)
{
n = NBats;
A = loudness;
r = pulseRate;
Qmin = minFreq;
Qmax = maxFreq;
NofGen = NofGeneration;
d = dimension;
S = new float[n][d];
Best = new float[d];
UL_bound = new float[2][d];
//default bounds
for(int i = 0 ; i < d ; i++)
{
UL_bound[0][i] = -10000;
UL_bound[1][i] = 10000;
}
loopCounter = 0;
myRand = new Random();
Q = new float[n];
for(int i = 0 ; i < n ; i++)
Q[i] = 0;
V = new float[n][d];
for(int i = 0 ; i < n ; i++)
for(int j = 0 ; j < d ; j++)
V[i][j] = 0;
}
public void intial()
{
Sol = new float[n][d];
for(int i = 0 ; i < n ; i++)
for(int j = 0 ; j < d ; j++)
{
float t = myRand.nextFloat();
//(upper -lower)*rand + lower
Sol[i][j] = t * (UL_bound[1][j] - UL_bound[0][j]) + UL_bound[0][j];
}
fitness = new float[n];
for(int i = 0 ; i < n ; i++)
fitness[i] = function(Sol[i]);
//finding fmin
fmin = fitness[0];
fminIndex = 0;
for(int i = 0 ; i < n ; i++)
{
if (fitness[i] < fmin)
{
fmin = fitness[i];
fminIndex = i;
}
}
//setting best
for(int j = 0 ; j < d ; j++)
Best[j] = Sol[fminIndex][j];
}
public void start()
{
while(loopCounter < NofGen)
{
for(int i = 0 ; i < n ; i++)
{
Q[i] = Qmin + (Qmin - Qmax)* myRand.nextFloat();
for(int j = 0 ; j < d ; j++)
V[i][j] = V[i][j] + (Sol[i][j]-Best[j])*Q[i];
for(int j = 0 ; j < d ; j++)
S[i][j] = Sol[i][j] + V[i][j];
Sol[i] = simpleBounds(Sol[i]);
if(myRand.nextFloat() > r)
for(int j = 0 ; j < d ; j++)
S[i][j] = (float) (Best[j] + (.001 * myRand.nextFloat()) );
Fnew = function(S[i]);
if(Fnew <= fitness[i] && myRand.nextFloat() < A)
{
for(int j = 0 ; j < d ; j++)
Sol[i][j] = S[i][j];
fitness[i] = Fnew;
}
if(Fnew <= fmin)
{
fmin = Fnew;
for(int j = 0 ; j < d ; j++)
Best[j] = S[i][j];
}
}
loopCounter++;
}
}
public float[] simpleBounds(float p[])
{
for(int i = 0 ; i < d ; i++)
{
if(p[i] < UL_bound[0][i])
p[i] = UL_bound[0][i];
if(p[i] > UL_bound[1][i])
p[i] = UL_bound[1][i];
}
return p;
}
float function(float p[])
{
// Sphere function with fmin=0 at (0,0,...,0)
float sum = 0;
for(int i = 0 ; i < p.length ; i++)
sum = sum + p[i]*p[i];
return sum;
}
public float printResult()
{
System.out.println("After " + loopCounter + "Repeats :");
for(int i = 0 ; i < d ; i++)
System.out.print(Best[i] + ", ");
System.out.println ( "F(x) = " + fmin);
return fmin;
}
public void set_UL_Bound(int n, float L, float U)
{
if( n < d && n >= 0)
{
UL_bound[0][n] = L;
UL_bound[1][n] = U;
}
}
}
and this is the matlab versian
function [best,fmin,N_iter]=bat_algorithm(para)
% Display help
help bat_algorithm.m
% Default parameters
if nargin<1, para=[20 1000 0.5 0.5]; end
n=para(1); % Population size, typically 10 to 40
N_gen=para(2); % Number of generations
A=para(3); % Loudness (constant or decreasing)
r=para(4); % Pulse rate (constant or decreasing)
% This frequency range determines the scalings
% You should change these values if necessary
Qmin=0; % Frequency minimum
Qmax=2; % Frequency maximum
% Iteration parameters
N_iter=0; % Total number of function evaluations
% Dimension of the search variables
d=5; % Number of dimensions
% Lower limit/bounds/ a vector
Lb=-3*ones(1,d);
% Upper limit/bounds/ a vector
Ub=6*ones(1,d);
% Initializing arrays
Q=zeros(n,1); % Frequency
v=zeros(n,d); % Velocities
% Initialize the population/solutions
for i=1:n,
Sol(i,:)=Lb+(Ub-Lb).*rand(1,d);
Fitness(i)=Fun(Sol(i,:));
end
% Find the initial best solution
[fmin,I]=min(Fitness);
best=Sol(I,:);
for t=1:N_gen,
% Loop over all bats/solutions
for i=1:n,
Q(i)=Qmin+(Qmin-Qmax)*rand;
v(i,:)=v(i,:)+(Sol(i,:)-best)*Q(i);
S(i,:)=Sol(i,:)+v(i,:);
% Apply simple bounds/limits
Sol(i,:)=simplebounds(Sol(i,:),Lb,Ub);
% Pulse rate
if rand>r
% The factor 0.001 limits the step sizes of random walks
S(i,:)=best+0.001*randn(1,d);
end
% Evaluate new solutions
Fnew=Fun(S(i,:));
% Update if the solution improves, or not too loud
if (Fnew<=Fitness(i)) & (rand<A) ,
Sol(i,:)=S(i,:);
Fitness(i)=Fnew;
end
% Update the current best solution
if Fnew<=fmin,
best=S(i,:);
fmin=Fnew;
end
end
N_iter=N_iter+n;
end
% Output/display
disp(['Number of evaluations: ',num2str(N_iter)]);
disp(['Best =',num2str(best),' fmin=',num2str(fmin)]);
% Application of simple limits/bounds
function s=simplebounds(s,Lb,Ub)
% Apply the lower bound vector
ns_tmp=s;
I=ns_tmp<Lb;
ns_tmp(I)=Lb(I);
% Apply the upper bound vector
J=ns_tmp>Ub;
ns_tmp(J)=Ub(J);
% Update this new move
s=ns_tmp;
function z=Fun(u)
% Sphere function with fmin=0 at (0,0,...,0)
z=sum(u.^2);
%%%%% ============ end ====================================
The diff between two codes
In Matlab code:
S(i,:)=best+0.001*randn(1,d);
randn=>standard normal distribution.
While in Java code:
S[i][j] = (float) (Best[j] + (.001 * myRand.nextFloat()) );
java.util.Random.nextFloat()=>uniformly distributed float value between 0.0 and 1.0.
I was looking for the solution in C# and stumbled up on this. It was enough to get the job done. Here is the solution in C# translated from the java with variables renamed and an additional fitness function for finding the solution of two x,y equations xy=6 and x+y = 5. Also included is finding the square root of .3 :
using System;
namespace BatAlgorithmC
namespace BatAlgorithmC
{
class Program
{
static void Main(string[] args)
{
// Mybat x = new Mybat(100, 1000, 0.5, 0.5, 5, Mybat.sphere);
// Mybat x = new Mybat(1000, 1000, 0.5, 0.5, 1, Mybat.squareRoot);
Mybat x = new Mybat(1000, 1000, 0.5, 0.5, 2, Mybat.RootOfXYEquations);
Console.WriteLine("Hit any key to continue.");
Console.ReadLine();
}
}
public class Mybat
{
/**
* #param args the command line arguments
*/
public int _numberOfBats, _generations, Qmin, Qmax, N_iter, _dimension;
public double _volume, _pulseRate, min, max, fnew, fmin;
public double[][] _lowerBound, _upperBound, _velocity, _solution, S;
public double[] _fitness, _tempSolution, _bestSolution, Q;
public Random random;
//public static void main(String[] args) {
// Mybat x = new Mybat(20,1000,0.5,0.5,5, Mybat.sphere);
//}
public static void initJagged(double[][] array, int n, int d)
{
for (int i = 0; i < n; i++) array[i] = new double[d];
}
public Mybat(
int bats,
int generations,
double loud,
double pulse,
int dimension,
Func<double[], int, double> function
)
{
//initialization of variables
_numberOfBats = bats;
_generations = generations;
_volume = loud;
_pulseRate = pulse;
_dimension = dimension;
Random random = new Random();
//plan to change later and added as parameter
min = -15;
max = 15;
fmin = 0;
//decleration for the bounds
_lowerBound = new double[1][];
_upperBound = new double[1][];
Q = new double[_numberOfBats]; // frequency
_velocity = new double[_numberOfBats][]; //velocity
initJagged(_velocity, _numberOfBats, _dimension);
initJagged(_lowerBound, 1, _dimension);
initJagged(_upperBound, 1, _dimension);
//initialize solution array
_solution = new double[_numberOfBats][];
S = new double[_numberOfBats][];
_fitness = new double[_numberOfBats]; // fitness container
_bestSolution = new double[_dimension];
_tempSolution = new double[_dimension]; //temporary holder for a row in array _solution
initJagged(_solution, _numberOfBats, _dimension);
initJagged(S, _numberOfBats, _dimension);
for (int i = 0; i < _numberOfBats; i++)
{
// for minimal coding : added initialize Q[]array with '0' as element
Q[i] = 0;
for (int x = 0; x < _dimension; x++)
{
// for minimal coding : added initialize _velocity[][] array with '0' as element
_velocity[i][x] = 0;
//find random double values from LB to UB
_solution[i][x] = (random.NextDouble()*(max - min)) + min;
_tempSolution[x] = _solution[i][x];
//Console.WriteLine("sol["+i+"]["+x+"] = "+_solution[i][x]); //test line
//Console.WriteLine(rand.nextDouble()); //test line
}
_fitness[i] = function(_tempSolution, _dimension);
//initialize best and the fmin
if (i == 0 || fmin > _fitness[i])
{
fmin = _fitness[i];
for (int x = 0; x < _dimension; x++)
{
_bestSolution[x] = _solution[i][x];
}
}
Console.WriteLine("fitness[" + i + "]" + _fitness[i]); //test
}
Console.WriteLine("fmin = " + fmin); //test
// special note to these variables (below)
// change if required for maximum effectivity
Qmin = 0;
Qmax = 2;
N_iter = 1; //number of function evaluation
// bat proper
for (int loop = 0; loop < N_iter; loop++)
{
// loop over all bats/solutions
for (int nextBat = 0; nextBat < _numberOfBats; nextBat++)
{
Q[nextBat] = Qmin + ((Qmin - Qmax)*random.NextDouble());
// loop for velocity
for (int vel = 0; vel < _dimension; vel++)
{
_velocity[nextBat][vel] = _velocity[nextBat][vel] +
((_solution[nextBat][vel] - _bestSolution[vel])*Q[nextBat]);
}
//new solutions
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
S[nextBat][nextDimension] = _solution[nextBat][nextDimension] +
_velocity[nextBat][nextDimension];
}
/**
* RESERVED SPOT for the QUESTIONABLE AREA ON THE
* MATLAB CODE (i think it is not needed for the java equivalent)
*/
// pulse rate
if (random.NextDouble() > _pulseRate)
{
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
S[nextBat][nextDimension] = _bestSolution[nextDimension] + (0.001*random.NextGaussian());
}
}
//putting current row of _solution to a temp array
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
_tempSolution[nextDimension] = S[nextBat][nextDimension];
}
fnew = function(_tempSolution, _dimension);
// update if solution is improved, and not too loud
if ((fnew <= _fitness[nextBat]) && (random.NextDouble() < _volume))
{
for (int x = 0; x < _dimension; x++)
{
_solution[nextBat][x] = S[nextBat][x];
_fitness[nextBat] = fnew;
}
}
//update current best solution
if (fnew <= fmin)
{
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
_bestSolution[nextDimension] = S[nextBat][nextDimension];
fmin = fnew;
}
}
}
}
Console.WriteLine(" ");
Console.WriteLine("new fitness");
for (int i = 0; i < _numberOfBats; i++)
{
Console.WriteLine("fitness[" + i + "]" + _fitness[i]);
}
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
Console.WriteLine("best[" + nextDimension + "]" + _bestSolution[nextDimension]);
}
Console.WriteLine("Fmin = " + fmin);
}
//possible that this function is not needed in java
public void set_bounds(int x, double L, double U)
{
//double temp_Lb[x];
//double temp_Ub[x];
for (int i = 0; i < x; i++)
{
_lowerBound[0][i] = L;
_upperBound[0][i] = U;
}
}
public static double sphere(double[] value, int d)
{
// sphere function where fmin is at 0
double result = 0;
for (int i = 0; i < d; i++)
{
result += (value[i]*value[i]);
}
return result;
}
public static double squareRoot(double[] value, int d)
{
// find the square root of .3
double result = 0;
for (int i = 0; i < d; i++)
{
result += Math.Abs(.3 - (value[i]*value[i]));
}
return result;
}
public static double RootOfXYEquations(double[] value, int d)
{
// solve for x and y xy = 6 and x+y = 5
double result = 0;
result += Math.Abs(5 - (value[0] + value[1]));
result += Math.Abs(6 - (value[0] * value[1]));
return result;
}
}
static class MathExtensiionns
{
public static double NextGaussian(this Random rand)
{
double u1 = rand.NextDouble(); //these are uniform(0,1) random doubles
double u2 = rand.NextDouble();
double mean = 0, stdDev = 1;
double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) *
Math.Sin(2.0 * Math.PI * u2); //random normal(0,1)
double randNormal =
mean + stdDev * randStdNormal; //random normal(mean,stdDev^2)
return randNormal;
}
}
}
this will be my first time here at stack overflow so i will say sorry beforehand if my response will be a bit ambiguous and has many problems. i just hope that this answer of mine will help future visitors on this thread who wants to study bat algo via java.
anyway, i did look at your code since i am studying bat algorithm at the moment.
tried running it and it does gives far off results compared to the matlab version.
what i noticed is that you just "literally" tried to convert the matlab code without fully understanding each matlab lines. i wanted to point out all of the stuff you missed but i am feeling lazy right now so i will just leave my version of bat algorithm in java.
NOTE: i just made a running bat algorithm in java. not an efficient, fully debugged, matlab's java-equivalent bat algorithm.
import java.util.Random;
public class Mybat {
/**
* #param args the command line arguments
*/
public int n, N_gen, Qmin, Qmax, N_iter, d;
public double A,r,min,max,fnew,fmin;
public double Lb[][],Ub[][],Q[],v[][],Sol[][],S[][],fitness[],temp[],best[];
public Random random;
public static void main(String[] args) {
Mybat x = new Mybat(20,1000,0.5,0.5,5);
}
public Mybat(
int bats,
int generations,
double loud,
double pulse,
int dimension
){
//initialization of variables
n=bats;
N_gen = generations;
A = loud;
r = pulse;
d = dimension;
Random rand = new Random();
//plan to change later and added as parameter
min = -15;
max = 15;
fmin = 0;
//decleration for the bounds
Lb = new double[1][d];
Ub = new double[1][d];
Q = new double[n]; // frequency
v = new double[n][d]; //velocity
//initialize solution array
Sol = new double[n][d];
S = new double[n][d];
fitness = new double[n]; // fitness container
best =new double[d];
temp = new double[d]; //temporary holder for a row in array Sol
for(int i=0;i<n;i++){
// for minimal coding : added initialize Q[]array with '0' as element
Q[i] = 0;
for(int x=0;x<d;x++){
// for minimal coding : added initialize v[][] array with '0' as element
v[i][x] = 0;
//find random double values from LB to UB
Sol[i][x]= (rand.nextDouble()*(max - min)) + min;
temp[x] = Sol[i][x];
//System.out.println("sol["+i+"]["+x+"] = "+Sol[i][x]); //test line
//System.out.println(rand.nextDouble()); //test line
}
fitness[i] = function(temp);
//initialize best and the fmin
if(i==0 || fmin > fitness[i]){
fmin = fitness[i];
for(int x=0;x<d;x++){
best[x] = Sol[i][x];
}
}
System.out.println("fitness["+i+"]"+fitness[i]); //test
}
System.out.println("fmin = "+fmin); //test
// special note to these variables (below)
// change if required for maximum effectivity
Qmin = 0;
Qmax = 2;
N_iter = 1; //number of function evaluation
// bat proper
for(int loop=0;loop<N_iter;loop++){
// loop over all bats/solutions
for(int i=0;i<n;i++){
Q[i] = Qmin+((Qmin-Qmax)*rand.nextDouble());
// loop for velocity
for(int vel=0;vel<d;vel++){
v[i][vel] = v[i][vel]+((Sol[i][vel]-best[vel])*Q[i]);
}
//new solutions
for(int x=0;x<d;x++){
S[i][x] = Sol[i][x] + v[i][x];
}
/**
* RESERVED SPOT for the QUESTIONABLE AREA ON THE
* MATLAB CODE (i think it is not needed for the java equivalent)
*/
// pulse rate
if(rand.nextDouble()>r){
for(int x=0;x<d;x++){
S[i][x] = best[x]+(0.001*rand.nextGaussian());
}
}
//putting current row of Sol to a temp array
for(int x=0;x<d;x++){
temp[x] = S[i][x];
}
fnew = function(temp);
// update if solution is improved, and not too loud
if((fnew<=fitness[i]) && (rand.nextDouble()<A)){
for(int x=0;x<d;x++){
Sol[i][x] = S[i][x];
fitness[i] = fnew;
}
}
//update current best solution
if(fnew<=fmin){
for(int x=0;x<d;x++){
best[x] = S[i][x];
fmin = fnew;
}
}
}
}
System.out.println(" ");
System.out.println("new fitness");
for(int i=0;i<n;i++){
System.out.println("fitness["+i+"]"+fitness[i]);
}
System.out.println("Fmin = "+fmin);
}
//possible that this function is not needed in java
public void set_bounds(int x, double L, double U){
//double temp_Lb[x];
//double temp_Ub[x];
for(int i=0; i<x; i++){
Lb[0][i] = L;
Ub[0][i] = U;
}
}
public double function(double value[]){
// sphere function where fmin is at 0
double result = 0;
for(int i=0;i<d;i++){
result += (value[i]*value[i]);
}
return result;
}
}

Categories

Resources