Simple Neural Network with backpropagation in Swift - java

I'm trying to implement a really simple neural network with backpropagation. I trying to train the network with the AND logical operator. But the prediction it's not working for me fine. :(
public class ActivationFunction {
class func sigmoid(x: Float) -> Float {
return 1.0 / (1.0 + exp(-x))
}
class func dSigmoid(x: Float) -> Float {
return x * (1 - x)
}
}
public class NeuralNetConstants {
public static let learningRate: Float = 0.3
public static let momentum: Float = 0.6
public static let iterations: Int = 100000
}
public class Layer {
private var output: [Float]
private var input: [Float]
private var weights: [Float]
private var dWeights: [Float]
init(inputSize: Int, outputSize: Int) {
self.output = [Float](repeating: 0, count: outputSize)
self.input = [Float](repeating: 0, count: inputSize + 1)
self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)
self.dWeights = [Float](repeating: 0, count: weights.count)
}
public func run(inputArray: [Float]) -> [Float] {
input = inputArray
input[input.count-1] = 1
var offSet = 0
for i in 0..<output.count {
for j in 0..<input.count {
output[i] += weights[offSet+j] * input[j]
}
output[i] = ActivationFunction.sigmoid(x: output[i])
offSet += input.count
}
return output
}
public func train(error: [Float], learningRate: Float, momentum: Float) -> [Float] {
var offset = 0
var nextError = [Float](repeating: 0, count: input.count)
for i in 0..<output.count {
let delta = error[i] * ActivationFunction.dSigmoid(x: output[i])
for j in 0..<input.count {
let weightIndex = offset + j
nextError[j] = nextError[j] + weights[weightIndex] * delta
let dw = input[j] * delta * learningRate
weights[weightIndex] += dWeights[weightIndex] * momentum + dw
dWeights[weightIndex] = dw
}
offset += input.count
}
return nextError
}
}
public class BackpropNeuralNetwork {
private var layers: [Layer] = []
public init(inputSize: Int, hiddenSize: Int, outputSize: Int) {
self.layers.append(Layer(inputSize: inputSize, outputSize: hiddenSize))
self.layers.append(Layer(inputSize: hiddenSize, outputSize: outputSize))
}
public func getLayer(index: Int) -> Layer {
return layers[index]
}
public func run(input: [Float]) -> [Float] {
var activations = input
for i in 0..<layers.count {
activations = layers[i].run(inputArray: activations)
}
return activations
}
public func train(input: [Float], targetOutput: [Float], learningRate: Float, momentum: Float) {
let calculatedOutput = run(input: input)
var error = [Float](repeating: 0, count: calculatedOutput.count)
for i in 0..<error.count {
error[i] = targetOutput[i] - calculatedOutput[i]
}
for i in (0...layers.count-1).reversed() {
error = layers[i].train(error: error, learningRate: learningRate, momentum: momentum)
}
}
}
extension ClosedRange where Bound: FloatingPoint {
public func random() -> Bound {
let range = self.upperBound - self.lowerBound
let randomValue = (Bound(arc4random_uniform(UINT32_MAX)) / Bound(UINT32_MAX)) * range + self.lowerBound
return randomValue
}
}
This is my training data I just want that my network learn the simple AND logical operator.
My input data:
let traningData: [[Float]] = [ [0,0], [0,1], [1,0], [1,1] ]
let traningResults: [[Float]] = [ [0], [0], [0], [1] ]
let backProb = BackpropNeuralNetwork(inputSize: 2, hiddenSize: 3, outputSize: 1)
for iterations in 0..<NeuralNetConstants.iterations {
for i in 0..<traningResults.count {
backProb.train(input: traningData[i], targetOutput: traningResults[i], learningRate: NeuralNetConstants.learningRate, momentum: NeuralNetConstants.momentum)
}
for i in 0..<traningResults.count {
var t = traningData[i]
print("\(t[0]), \(t[1]) -- \(backProb.run(input: t)[0])")
}
}
This is my whole code for the neural network. The code is not really swifty but I think it's first more important to understand the theory about neural networks then the code will be more swifty.
The problem is that my results are completely wrong. This is what I get
0.0, 0.0 -- 0.246135
0.0, 1.0 -- 0.251307
1.0, 0.0 -- 0.24325
1.0, 1.0 -- 0.240923
This is what I want to get
0,0, 0,0 -- 0,000
0,0, 1,0 -- 0,005
1,0, 0,0 -- 0,005
1,0, 1,0 -- 0,992
Well for comparison the java implementation works fine..
public class ActivationFunction {
public static float sigmoid(float x) {
return (float) (1 / (1 + Math.exp(-x)));
}
public static float dSigmoid(float x) {
return x*(1-x); // because the output is the sigmoid(x) !!! we dont have to apply it twice
}
}
public class NeuralNetConstants {
private NeuralNetConstants() {
}
public static final float LEARNING_RATE = 0.3f;
public static final float MOMENTUM = 0.6f;
public static final int ITERATIONS = 100000;
}
public class Layer {
private float[] output;
private float[] input;
private float[] weights;
private float[] dWeights;
private Random random;
public Layer(int inputSize, int outputSize) {
output = new float[outputSize];
input = new float[inputSize + 1];
weights = new float[(1 + inputSize) * outputSize];
dWeights = new float[weights.length];
this.random = new Random();
initWeights();
}
public void initWeights() {
for (int i = 0; i < weights.length; i++) {
weights[i] = (random.nextFloat() - 0.5f) * 4f;
}
}
public float[] run(float[] inputArray) {
System.arraycopy(inputArray, 0, input, 0, inputArray.length);
input[input.length - 1] = 1; // bias
int offset = 0;
for (int i = 0; i < output.length; i++) {
for (int j = 0; j < input.length; j++) {
output[i] += weights[offset + j] * input[j];
}
output[i] = ActivationFunction.sigmoid(output[i]);
offset += input.length;
}
return Arrays.copyOf(output, output.length);
}
public float[] train(float[] error, float learningRate, float momentum) {
int offset = 0;
float[] nextError = new float[input.length];
for (int i = 0; i < output.length; i++) {
float delta = error[i] * ActivationFunction.dSigmoid(output[i]);
for (int j = 0; j < input.length; j++) {
int previousWeightIndex = offset + j;
nextError[j] = nextError[j] + weights[previousWeightIndex] * delta;
float dw = input[j] * delta * learningRate;
weights[previousWeightIndex] += dWeights[previousWeightIndex] * momentum + dw;
dWeights[previousWeightIndex] = dw;
}
offset += input.length;
}
return nextError;
}
}
public class BackpropNeuralNetwork {
private Layer[] layers;
public BackpropNeuralNetwork(int inputSize, int hiddenSize, int outputSize) {
layers = new Layer[2];
layers[0] = new Layer(inputSize, hiddenSize);
layers[1] = new Layer(hiddenSize, outputSize);
}
public Layer getLayer(int index) {
return layers[index];
}
public float[] run(float[] input) {
float[] inputActivation = input;
for (int i = 0; i < layers.length; i++) {
inputActivation = layers[i].run(inputActivation);
}
return inputActivation;
}
public void train(float[] input, float[] targetOutput, float learningRate, float momentum) {
float[] calculatedOutput = run(input);
float[] error = new float[calculatedOutput.length];
for (int i = 0; i < error.length; i++) {
error[i] = targetOutput[i] - calculatedOutput[i];
}
for (int i = layers.length - 1; i >= 0; i--) {
error = layers[i].train(error, learningRate, momentum);
}
}
}
public class NeuralNetwork {
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
float[][] trainingData = new float[][] {
new float[] { 0, 0 },
new float[] { 0, 1 },
new float[] { 1, 0 },
new float[] { 1, 1 }
};
float[][] trainingResults = new float[][] {
new float[] { 0 },
new float[] { 0 },
new float[] { 0 },
new float[] { 1 }
};
BackpropNeuralNetwork backpropagationNeuralNetworks = new BackpropNeuralNetwork(2, 3,1);
for (int iterations = 0; iterations < NeuralNetConstants.ITERATIONS; iterations++) {
for (int i = 0; i < trainingResults.length; i++) {
backpropagationNeuralNetworks.train(trainingData[i], trainingResults[i],
NeuralNetConstants.LEARNING_RATE, NeuralNetConstants.MOMENTUM);
}
System.out.println();
for (int i = 0; i < trainingResults.length; i++) {
float[] t = trainingData[i];
System.out.printf("%d epoch\n", iterations + 1);
System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], backpropagationNeuralNetworks.run(t)[0]);
}
}
}
}

You are initializing your weights differently. You are creating one random value and use it often. What you want to do is to create a random value for each weight in the array:
Replace
self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)
with
self.weights = (0..<(1 + inputSize) * outputSize).map { _ in
return (-2.0...2.0).random()
}
Beside that: please consider to only override the first elements of your input in the Layer.run method. So instead of
input = inputArray
you should do this:
for (i, e) in inputArray {
self.input[i] = e
}

Related

Neural Network: Backpropagation not working (Java)

I have to create an OCR Programm for a school project, so I started to create a Backpropagation algorithm with the help of wikipedia. To train my Network, I use the MNIST Database, which I extracted a few days ago, so that I have the real image files. But now the error is always about 237 and after training a while, the error and weights become NaN. What is wrong with my code ?
A screenshot of my images folder
Here is my Main class, which shall train my Network:
package de.Marcel.NeuralNetwork;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
public class OCR {
public static void main(String[] args) throws IOException {
// create network
NeuralNetwork net = new NeuralNetwork(784, 450, 5, 0.2);
// load Images
File file = new File("images");
int images= 0;
double error = 0;
for (File f : file.listFiles()) {
BufferedImage image = ImageIO.read(f);
int t = -1;
double[] pixels = new double[784];
for (int x = 0; x < image.getWidth(); x++) {
for (int y = 0; y < image.getHeight(); y++) {
t++;
Color c = new Color(image.getRGB(x, y));
if (c.getRed() == 0 && c.getGreen() == 0 && c.getBlue() == 0) {
pixels[t] = 1;
} else if (c.getRed() == 255 && c.getGreen() == 255 && c.getBlue() == 255) {
pixels[t] = 0;
}
}
}
try {
if (f.getName().startsWith("1")) {
net.learn(pixels, new double[] { 1, 0, 0, 0, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("2")) {
net.learn(pixels, new double[] { 0, 1, 0, 0, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("3")) {
net.learn(pixels, new double[] { 0, 0, 1, 0, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("4")) {
net.learn(pixels, new double[] { 0, 0, 0, 1, 0 });
error += net.getError();
images++;
} else if (f.getName().startsWith("5")) {
net.learn(pixels, new double[] { 0, 0, 0, 0, 1 });
error += net.getError();
images++;
} else if (f.getName().startsWith("6")) {
break;
}
} catch (Exception e) {
e.printStackTrace();
}
}
error = error / iterations;
System.out.println("Trained images: " + images);
System.out.println("Error: " + error);
//save
System.out.println("Save");
try {
net.saveNetwork("network.nnet");
} catch (Exception e) {
e.printStackTrace();
}
}
}
... this is my Neuron class:
package de.Marcel.NeuralNetwork;
public class Neuron {
private double input, output;
public Neuron () {
}
public void setInput(double input) {
this.input = input;
}
public void setOutput(double output) {
this.output = output;
}
public double getInput() {
return input;
}
public double getOutput() {
return output;
}
}
... and finally my NeuralNetwork
package de.Marcel.NeuralNetwork;
import java.io.File;
import java.io.FileWriter;
import java.util.Random;
public class NeuralNetwork {
private Neuron[] inputNeurons, hiddenNeurons, outputNeurons;
private double[] weightMatrix1, weightMatrix2;
private double learningRate, error;
public NeuralNetwork(int inputCount, int hiddenCount, int outputCount, double learningRate) {
this.learningRate = learningRate;
// create Neurons
// create Input
this.inputNeurons = new Neuron[inputCount];
for (int i = 0; i < inputCount; i++) {
this.inputNeurons[i] = new Neuron();
}
// createHidden
this.hiddenNeurons = new Neuron[hiddenCount];
for (int i = 0; i < hiddenCount; i++) {
this.hiddenNeurons[i] = new Neuron();
}
// createOutput
this.outputNeurons = new Neuron[outputCount];
for (int i = 0; i < outputCount; i++) {
this.outputNeurons[i] = new Neuron();
}
// create weights
Random random = new Random();
// weightMatrix1
this.weightMatrix1 = new double[inputCount * hiddenCount];
for (int i = 0; i < inputCount * hiddenCount; i++) {
this.weightMatrix1[i] = (random.nextDouble() * 2 - 1) / 0.25;
}
// weightMatrix2
this.weightMatrix2 = new double[hiddenCount * outputCount];
for (int i = 0; i < hiddenCount * outputCount; i++) {
this.weightMatrix2[i] = (random.nextDouble() * 2 - 1) / 0.25;
}
}
public void calculate(double[] input) throws Exception {
// verfiy input length
if (input.length == inputNeurons.length) {
// forwardPropagation
// set input array as input and output of input neurons
for (int i = 0; i < input.length; i++) {
inputNeurons[i].setInput(input[i]);
inputNeurons[i].setOutput(input[i]);
}
// calculate output of hiddenNeurons
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
double totalInput = 0;
// sum up totalInput of Neuron
for (int i = 0; i < inputNeurons.length; i++) {
Neuron iNeuron = inputNeurons[i];
totalInput += iNeuron.getOutput() * weightMatrix1[h * inputNeurons.length + i];
}
// set input
hNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
hNeuron.setOutput(calculatedOutput);
}
// calculate output of outputNeurons
for (int o = 0; o < outputNeurons.length; o++) {
Neuron oNeuron = outputNeurons[o];
double totalInput = 0;
// sum up totalInput of Neuron
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
totalInput += hNeuron.getOutput() * weightMatrix2[o * hiddenNeurons.length + h];
}
// set input
oNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
oNeuron.setOutput(calculatedOutput);
}
} else {
throw new Exception("[NeuralNetwork] input array is either too small or to big");
}
}
public void learn(double[] input, double[] output) throws Exception {
double partialOutput = 0;
// verfiy input length
if (input.length == inputNeurons.length) {
// forwardPropagation
// set input array as input and output of input neurons
for (int i = 0; i < input.length; i++) {
inputNeurons[i].setInput(input[i]);
inputNeurons[i].setOutput(input[i]);
}
// calculate output of hiddenNeurons
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
double totalInput = 0;
// sum up totalInput of Neuron
for (int i = 0; i < inputNeurons.length; i++) {
Neuron iNeuron = inputNeurons[i];
totalInput += iNeuron.getOutput() * weightMatrix1[h * inputNeurons.length + i];
}
// set input
hNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
hNeuron.setOutput(calculatedOutput);
}
// calculate output of outputNeurons
for (int o = 0; o < outputNeurons.length; o++) {
Neuron oNeuron = outputNeurons[o];
double totalInput = 0;
// sum up totalInput of Neuron
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
totalInput += hNeuron.getOutput() * weightMatrix2[o * hiddenNeurons.length + h];
}
// set input
oNeuron.setInput(totalInput);
// calculate output by applying sigmoid
double calculatedOutput = sigmoid(totalInput);
// set output
oNeuron.setOutput(calculatedOutput);
}
// backPropagation
double totalError = 0;
// calculate weights in matrix2
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
for (int o = 0; o < outputNeurons.length; o++) {
Neuron oNeuron = outputNeurons[o];
// calculate weight
double delta = learningRate * derivativeSigmoid(oNeuron.getInput())
* (output[o] - oNeuron.getOutput()) * hNeuron.getOutput();
// set new weight
weightMatrix2[h + o * hiddenNeurons.length] = weightMatrix2[h + o * hiddenNeurons.length] + delta;
// update partial output
partialOutput += (derivativeSigmoid(oNeuron.getInput()) * (output[o] - oNeuron.getOutput())
* weightMatrix2[h + o * hiddenNeurons.length]);
//calculate error
totalError += Math.pow((output[o] - oNeuron.getOutput()), 2);
}
}
//set error
this.error = 0.5 * totalError;
// calculate weights in matrix1
for (int i = 0; i < inputNeurons.length; i++) {
Neuron iNeuron = inputNeurons[i];
for (int h = 0; h < hiddenNeurons.length; h++) {
Neuron hNeuron = hiddenNeurons[h];
// calculate weight
double delta = learningRate * derivativeSigmoid(hNeuron.getInput()) * partialOutput
* (iNeuron.getOutput());
// set new weight
weightMatrix1[i + h * inputNeurons.length] = weightMatrix1[i + h * inputNeurons.length] + delta;
}
}
} else {
throw new Exception("[NeuralNetwork] input array is either too small or to big");
}
}
// save Network
public void saveNetwork(String fileName) throws Exception {
File file = new File(fileName);
FileWriter writer = new FileWriter(file);
writer.write("weightmatrix1:");
writer.write(System.lineSeparator());
// write weightMatrix1
for (double d : weightMatrix1) {
writer.write(d + "-");
}
writer.write(System.lineSeparator());
writer.write("weightmatrix2:");
writer.write(System.lineSeparator());
// write weightMatrix2
for (double d : weightMatrix2) {
writer.write(d + "-");
}
// save
writer.close();
}
// sigmoid function
private double sigmoid(double input) {
return Math.exp(input * (-1));
}
private double derivativeSigmoid(double input) {
return sigmoid(input) * (1 - sigmoid(input));
}
public double getError() {
return error;
}
}
It looks like your sigmoid function is incorrect. It should be 1/(1+exp(-x)).
If you still run into NaN errors, it might be because using the function as such can be an overkill, especially for large numbers (ie, numbers less than -10 and greater than 10).
Using an array of precalculated values of sigmoid(x) might prevent this problem for bigger datasets and will also help the program run more efficiently.
Hope this helps!

Backpropagation: networkerror of one testinput rises, the others go down, whats wrong?

I am currently trying to program a neural network... for learning I want to use the backpropagation algorithm! My problem is, that I don't know where my error is.
I try to train it the logical AND.
My network errors after the first round are:
28.68880035284087 for INPUT 1|1
22.17048518538824 for INPUT 1|0
21.346787829014342 for INPUT 0|1
20.44791655274438 for INPUT 0|0
If I make a few iterations my errors are like this:
34.17584528001372 for INPUT 1|1
18.315643070675343 for INPUT 1|0
17.568891920535222 for INPUT 0|1
17.753497551261436 for INPUT 0|0
I have absolutely no idea why the error for INPUT 1|1 is growing, while the others get smaller...
Here's my code:
classes for the testdata:
public class Trainingset
{
private double[] input;
private double[] target;
public Trainingset(double[] input, double[] target)
{
this.input = input;
this.target = target;
}
public double[] getInput()
{
return input;
}
public double[] getTarget()
{
return target;
}
}
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
public class TrainingData
{
private List<Trainingset> trainingSets;
private Random random;
private int nextEntry;
public TrainingData()
{
random = new Random();
trainingSets = new ArrayList<Trainingset>();
nextEntry = 0;
}
public void addData(double[] input, double[] target)
{
Trainingset ts = new Trainingset(input.clone(), target.clone());
trainingSets.add(ts);
}
public Trainingset getRandomTrainingset()
{
return trainingSets.get(random.nextInt(trainingSets.size()));
}
public Trainingset getNext()
{
if(nextEntry == trainingSets.size())
nextEntry = 0;
return trainingSets.get(nextEntry++);
}
}
the networkclass:
import java.util.ArrayList;
import java.util.List;
public class FFN3
{
private List<FFNlayer3> layers;
private double learningrate = 0.45;
private double momentum = 0.9;
private double outputError;
private double networkErrkor;
public FFN3()
{
layers = new ArrayList<>();
layers.add(new FFNlayer3(2));
layers.add(new FFNlayer3(1));
layers.get(0).setNextLayer(layers.get(1));
layers.get(1).setPrevLayer(layers.get(0));
double[][] ItoH = {
{ 0.4, 0.1 },
{ -0.1, -0.1 }
};
double[][] HtoO = {
{ 0.06, -0.4 }
};
layers.get(0).setWeights(ItoH);
layers.get(1).setWeights(HtoO);
networkErrkor = Double.MAX_VALUE;
}
public void learn(TrainingData td)
{
Trainingset ts = td.getNext();
double[] results = compute(ts.getInput());
double error = 0;
for(int i = 0; i < results.length; i++)
{
error += Math.pow(ts.getTarget()[i] - results[i], 2);
}
networkErrkor = error / results.length;
layers.get(layers.size()-1).updateWeights(learningrate, momentum, ts.getTarget());
layers.get(0).updateHiddenWeights(learningrate, momentum, ts.getInput());
}
public double getNetworkError()
{
return networkErrkor;
}
public double[] compute(double[] input)
{
return layers.get(0).compute(input);
}
}
The layerclass:
public class FFNlayer3
{
private double[][] incomingWeights;
private double[][] prevWeightChanges;
private double[] neuronValues;
private double[] neuronSums;
private double[] errors;
private FFNlayer3 prevLayer;
private FFNlayer3 nextLayer;
public FFNlayer3(int neuroncount)
{
neuronValues = new double[neuroncount];
neuronSums = new double[neuroncount];
errors = new double[neuroncount];
nextLayer = null;
prevLayer = null;
}
public void setWeights(double[][] weights)
{
incomingWeights = weights;
prevWeightChanges = new double[incomingWeights.length][incomingWeights[0].length];
}
public void setPrevLayer(FFNlayer3 prevLayer)
{
this.prevLayer = prevLayer;
}
public void setNextLayer(FFNlayer3 nextLayer)
{
this.nextLayer = nextLayer;
}
public void updateWeights(double learningrate, double momentum, double[] targetValues)
{
for(int i = 0; i < errors.length; i++)
{
errors[i] = neuronValues[i] * (1 - neuronValues[i]) * (targetValues[i] - neuronValues[i]);
}
for(int i = 0; i < incomingWeights.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
double delta = learningrate * errors[i] * prevLayer.getNeuronValues()[j];
incomingWeights[i][j] += delta + momentum * prevWeightChanges[i][j];
}
}
prevLayer.updateHiddenWeights(learningrate, momentum);
}
public void updateHiddenWeights(double learningrate, double momentum)
{
if(prevLayer==null)
return;
for(int i = 0; i < errors.length; i++)
{
for(int j = 0; j < nextLayer.getErrors().length; j++)
{
errors[i] += nextLayer.getErrors()[j] * nextLayer.getWeights()[j][i];
}
}
for(int i = 0; i < incomingWeights.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
double delta = learningrate * errors[i] * prevLayer.getNeuronValues()[j];
incomingWeights[i][j] += delta + momentum * prevWeightChanges[i][j];
}
}
prevLayer.updateHiddenWeights(learningrate, momentum);
}
public void updateHiddenWeights(double learningrate, double momentum, double[] input)
{
for(int i = 0; i < errors.length; i++)
{
for(int j = 0; j < nextLayer.getErrors().length; j++)
{
errors[i] += nextLayer.getErrors()[j] * nextLayer.getWeights()[j][i];
}
}
for(int i = 0; i < incomingWeights.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
double delta = learningrate * errors[i] * input[j];
incomingWeights[i][j] += delta + momentum * prevWeightChanges[i][j];
}
}
}
public double[][] getWeights()
{
return incomingWeights;
}
public double[] getErrors()
{
return errors;
}
public double[] getNeuronValues()
{
return neuronValues;
}
public double[] compute(double[] input)
{
for(int i = 0; i < neuronValues.length; i++)
{
for(int j = 0; j < incomingWeights[i].length; j++)
{
neuronSums[i] += input[j] * incomingWeights[i][j];
}
neuronValues[i] = SIGMOID(neuronSums[i]);
neuronSums = new double[neuronSums.length];
}
if(nextLayer==null)
return neuronValues;
return nextLayer.compute(neuronValues);
}
private double SIGMOID(double value)
{
return 1 / (1+ Math.exp(-value));
}
}
And the snippet from my main:
FFN3 network = new FFN3();
double[] input = new double[2];
double[] target = new double[1];
TrainingData td = new TrainingData();
input[0] = 1;
input[1] = 1;
target[0] = 1;
td.addData(input, target);
input[0] = 1;
input[1] = 0;
target[0] = 0;
//target[1] = 1;
td.addData(input, target);
input[0] = 0;
input[1] = 1;
target[0] = 0;
td.addData(input, target);
input[0] = 0;
input[1] = 0;
target[0] = 0;
td.addData(input, target);
while(Double.compare(network.getNetworkError(), 0.001)>0)
{
network.learn(td);
System.out.println(network.getNetworkError()*100);
}
I was using this document: http://www.dataminingmasters.com/uploads/studentProjects/NeuralNetworks.pdf
The values after the first epoch are similar to the values in the document... what is wrong? Is it the document, my code or both?
Hope you can help me!
you may try with BigDecimal instead of double, as they could cause trouble (look here for further information )

Algorithmic Puzzle About Using Two Rectangles To Enclose Points

I've been working on this problem for two days, and the best I can do is a brute force solution which is not efficient enough.
You are given a bunch of positive coordinate points ranging from (0, 0) to (1 billion, 1 billion). You must enclose all of the points with only two rectangles with the smallest possible total area. Rectangles must have sides parallel to the x-axis and y-axis. The rectangles cannot overlap, sharing the same boundary counts as overlapping. You **can** have a 0 by 0 rectangle of area zero. The sum of the areas of the two rectangles is **X**
You also have to find a single rectangle of the smallest possible area that encloses all of the points. This area is **Y**
You are trying to find **Y** - **X**.
For the following example, the answer **Y** - **X** = 107.
(4, 2), (8, 10), (1, 1), (9, 12), (14, 7), (2, 3)
Providing code would be very appreciated, if you do then please use Java or C++ if possible.
I do not want to spoil the game.
Start with the large rectangle. Then you can split on every x or y of a point.
Sort the points once by x, once by y.
Split vertically:
#######
#######
#######
#######
Split horizonally:
##
## ####
####
####
Splitting at a coordinate yields two sets of point where both rectangle halves are easily reduced.
Added a solution because of comment
As Point class I actually use int[2] so the x/y choice can be made as for-index. On the other hand, I had to make a class AreaCollector, where a simple Rectangle would suffice.
The rectangle points I have collected too; without them the code would become a bit smaller.
static private class AreaCollector {
private final int[] lwb = new int[] { Integer.MAX_VALUE, Integer.MAX_VALUE };
private final int[] upb = new int[] { Integer.MIN_VALUE, Integer.MIN_VALUE };
public void add(int[] point) {
if (point[0] < lwb[0]) {
lwb[0] = point[0];
}
if (point[1] < lwb[1]) {
lwb[1] = point[1];
}
if (point[0] > upb[0]) {
upb[0] = point[0];
}
if (point[1] > upb[1]) {
upb[1] = point[1];
}
}
public int getArea() {
if (upb[0] == Integer.MIN_VALUE) { /// Zero points added.
return 0;
}
return (upb[0] - lwb[0]) * (upb[1] - lwb[1]);
}
}
public int solve(int[][] points) {
AreaCollector ac = new AreaCollector();
for (int[] point : points) {
ac.add(point);
}
final int y = ac.getArea();
final int n = points.length;
// Best solution sofar:
int[][] ascPoints = Arrays.copyOf(points, n);
int[][] descPoints = new int[0][];
int bestX = y + 0;
for (int direction = 0; direction < 2; ++direction) {
final int dir = direction;
Arrays.sort(points, Comparator.comparingInt((pt) -> pt[dir]));
int[] ascAreas = new int[n];
AreaCollector ascAC = new AreaCollector();
for (int i = 0; i < n; ) {
int[] point = points[i];
int coord = point[direction];
for (int j = i; j < n && points[j][direction] == coord; ++j) {
ascAC.add(points[j]);
}
int area = ascAC.getArea();
for (int j = i; j < n && points[j][direction] == coord; ++j) {
ascAreas[j] = area;
++i;
}
}
int[] descAreas = new int[n];
AreaCollector descAC = new AreaCollector();
for (int i = n - 1; i >= 0; ) {
int[] point = points[i];
int coord = point[direction];
for (int j = i; j >= 0 && points[j][direction] == coord; --j) {
descAC.add(points[j]);
}
int area = descAC.getArea();
for (int j = i; j >= 0 && points[j][direction] == coord; --j) {
descAreas[j] = area;
--i;
}
}
int bestI = -1;
for (int i = 0; i < n- 1; ++i) {
if (points[i][direction] != points[i + 1][direction]) {
int x = ascAreas[i] + descAreas[i + 1];
if (x < bestX) {
bestX = x;
bestI = i;
}
}
}
if (bestI != -1) {
ascPoints = Arrays.copyOfRange(points, 0, bestI + 1);
descPoints = Arrays.copyOfRange(points, bestI + 1, n);
}
}
return y -bestX;
}
As Comparator I used java 8 terse notation. As you see the complexity of the hand-coded part is O(N) superseeded by Arrays.sort O(N.log N).
Here's a solution in Java. After calculating area Y, it first sorts the coordinates by X coordinates and then calculates the area of the rectangles by splitting the array into two halves at each X coordinate (with a special handling if two coordinates have the same X value). Then it does the same for the Y coordinates. The minimum rectangle area is the resulting X area.
import java.util.Arrays;
import java.util.Comparator;
public class Puzzle {
public static void main(String[] args) {
int[][] COORDINATES_1 = { { 4, 2 }, { 8, 10 }, { 1, 1 }, { 9, 12 }, { 14, 7 }, { 2, 3 } };
int[][] COORDINATES_2 = { { 2, 1 }, { 2, 2 }, { 3, 1 }, { 3, 3 }, { 4, 3 }, { 5, 3 }, { 5, 4 }, { 6, 4 } };
int[][] COORDINATES_3 = { { 4, 2 } };
solve(COORDINATES_1);
solve(COORDINATES_2);
solve(COORDINATES_3);
}
public static void solve(int[][] coordinates) {
int size = coordinates.length;
int y = calcMinRectArea(coordinates, 0, size);
// sort by x coordinates
Arrays.sort(coordinates, new Comparator<int[]>() {
#Override
public int compare(int[] o1, int[] o2) {
return o1[0] - o2[0];
}
});
int x = y;
for (int i = 1; i < size; i++) {
if (coordinates[i][0] == coordinates[i - 1][0])
continue; // several coordinates with the same x coordinates
x = Math.min(calcMinRectArea(coordinates, 0, i) + calcMinRectArea(coordinates, i, size - i), x);
}
// sort by y coordinates
Arrays.sort(coordinates, new Comparator<int[]>() {
#Override
public int compare(int[] o1, int[] o2) {
return o1[1] - o2[1];
}
});
for (int i = 1; i < size; i++) {
if (coordinates[i][1] == coordinates[i - 1][1])
continue; // several coordinates with the same y coordinates
x = Math.min(calcMinRectArea(coordinates, 0, i) + calcMinRectArea(coordinates, i, size - i), x);
}
System.out.printf("Y = %d, X = %d, Y - X = %d\n", y, x, y - x);
}
private static int calcMinRectArea(int[][] coords, int start, int length) {
if (length == 0)
return 0;
int minX = coords[start][0];
int maxX = minX;
int minY = coords[start][1];
int maxY = minY;
for (int i = start + 1; i < start + length; i++) {
int x = coords[i][0];
minX = Math.min(minX, x);
maxX = Math.max(maxX, x);
int y = coords[i][1];
minY = Math.min(minY, y);
maxY = Math.max(maxY, y);
}
return (maxX - minX) * (maxY - minY);
}
}

Troubleshooting DIT FFT Radix-2 Algorithm

I have implemented a recursive radix-2 DIT FFT in Java, and a regular DFT to verify my results from the FFT, but the results from the two differ and I cannot seem to figure it out. Both are fed the entire array with the apply()-methods, start and stop index is 0 and data.length respectively. The DFT version looks correct with a nice peak at bin 50 while the FFT one is full of garbage. What am I doing wrong?
This is the FFT implementation (adapted from http://www.engineeringproductivitytools.com/stuff/T0001/PT04.HTM "A Recursive DIT FFT Routine.", I verified by comparing to the pseudo code at https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Pseudocode):
public class DITFFT2 extends Transform {
public float[] apply(float[] data, int startIndex, int stopIndex) throws IllegalArgumentException {
int N;
float[] filteredData;
Complex[] complexData;
Complex[] filteredComplexData;
if (stopIndex < startIndex) {
throw new IllegalArgumentException("stopIndex cannot be lower than startIndex!");
}
if (stopIndex < 0 || startIndex < 0) {
throw new IllegalArgumentException("Index cannot be negative!");
}
N = stopIndex - startIndex;
filteredData = new float[N];
complexData = new Complex[N];
for (int i = startIndex; i < stopIndex; i++) {
complexData[i-startIndex] = new Complex(data[i], 0.0f);
}
filteredComplexData = transform(complexData, N);
for (int i = 0; i < N; i++) {
filteredData[i] = filteredComplexData[i].abs();
}
return filteredData;
}
public Complex[] transform(Complex[] data, int N) {
Complex x;
Complex[] result = new Complex[N];
if (N == 1) {
result[0] = data[0];
} else {
Complex[] fe = new Complex[N/2];
Complex[] fo = new Complex[N/2];
for (int i = 0; i < N/2; i++) {
fe[i] = data[2*i];
fo[i] = data[2*i+1];
}
Complex[] Fe = transform(fe, N / 2);
Complex[] Fo = transform(fo, N / 2);
for (int k = 0; k < N/2; k++) {
x = Fo[k].copy();
x.mul(getTwiddleFactor(k, N));
result[k] = Fe[k].copy();
result[k].add(x);
result[k+N/2] = Fe[k].copy();
result[k+N/2].sub(x);
}
}
return result;
}
private Complex getTwiddleFactor(int k, int N) {
return new Complex(1.0f, (float)(-2.0f * Math.PI * k / (float)N));
}
}
And this is the DFT implementation:
public class DFT extends Transform {
public float[] apply(float[] data, int startIndex, int stopIndex) throws IllegalArgumentException {
int N;
float[] filteredData;
Complex[] complexData;
Complex[] filteredComplexData;
if (stopIndex < startIndex) {
throw new IllegalArgumentException("stopIndex cannot be lower than startIndex!");
}
if (stopIndex < 0 || startIndex < 0) {
throw new IllegalArgumentException("Index cannot be negative!");
}
N = stopIndex - startIndex;
filteredData = new float[N];
complexData = new Complex[N];
filteredComplexData = new Complex[N];
for (int i = startIndex; i < stopIndex; i++) {
complexData[i-startIndex] = new Complex(data[i], 0.0f);
filteredComplexData[i-startIndex] = new Complex(0.0f, 0.0f);
}
for (int k = 0; k < N; k++) {
for (int n = 0; n < N; n++) {
Complex c = complexData[n].copy();
filteredComplexData[k].add(c.mul(new Complex(1.0f, (float)(-2*Math.PI*n*k/(float)N))));
}
}
for (int i = 0; i < N; i++) {
filteredData[i] = filteredComplexData[i].abs();
}
return filteredData;
}
}
Now, both seems to give the correct answer for [8.0, 4.0, 8.0, 0.0], which is [20.0, 4.0j, 12.0, -4.0j]. But if I feed them a sine produced by:
mBuffer = new float[1024];
float sampleRate = 1000.0f;
float frequency = 50.0f;
for (int i = 0; i < mBuffer.length; i++) {
mBuffer[i] = (float)(0.5*Math.sin(2*Math.PI*i*frequency/sampleRate));
}
The implementation of Complex for reference:
public final class Complex {
public float mR, mTheta;
public Complex() {
mR = 0.0f;
mTheta = 0.0f;
}
public Complex(float r, float theta) {
mR = r;
mTheta = theta;
}
public Complex copy() {
return new Complex(mR, mTheta);
}
public Complex add(Complex c) {
float real, imag;
real = (float)(mR * Math.cos(mTheta) + c.mR * Math.cos(c.mTheta));
imag = (float)(mR * Math.sin(mTheta) + c.mR * Math.sin(c.mTheta));
mR = (float)Math.sqrt(Math.pow(real, 2) + Math.pow(imag, 2));
if (real != 0.0f) {
mTheta = (float)Math.atan(imag / real);
} else {
mTheta = (float)(imag > 0.0f ? Math.PI/2.0f : Math.PI*3.0f/2.0f);
}
return this;
}
public Complex sub(Complex c) {
float real, imag;
real = (float)(mR * Math.cos(mTheta) - c.mR * Math.cos(c.mTheta));
imag = (float)(mR * Math.sin(mTheta) - c.mR * Math.sin(c.mTheta));
mR = (float)Math.sqrt(Math.pow(real, 2) + Math.pow(imag, 2));
if (real != 0.0f) {
mTheta = (float)Math.atan(imag / real);
} else {
mTheta = (float)(imag > 0.0f ? Math.PI/2.0f : Math.PI*3.0f/2.0f);
}
return this;
}
public Complex mul(Complex c) {
mR = mR * c.mR;
mTheta = mTheta + c.mTheta;
return this;
}
public Complex div(Complex c) {
mR = mR / c.mR;
mTheta = mTheta - c.mTheta;
return this;
}
public Complex pow(float exp) {
mTheta = mTheta * exp;
mR = (float)Math.pow(mR, exp);
return this;
}
public float abs() {
return mR;
}
public float getRealPart() {
return (float)(mR * Math.cos(mTheta));
}
public float getImagPart() {
return (float)(mR * Math.sin(mTheta));
}
public String toStringRectangular() {
float real, imag;
StringBuilder sb = new StringBuilder();
real = (float)(mR * Math.cos(mTheta));
imag = (float)(mR * Math.sin(mTheta));
sb.append(real);
if (imag >= 0) {
sb.append(" + ");
} else {
sb.append(" - ");
}
sb.append(Math.abs(imag));
sb.append("i");
return sb.toString();
}
public String toStringExponential() {
StringBuilder sb = new StringBuilder();
sb.append(mR);
sb.append(" * e ^ ");
sb.append(mTheta);
sb.append("i");
return sb.toString();
}
public String toString() {
return toStringExponential() + " [ " + toStringRectangular() + " ] ";
}
public static Complex[] getInitializedArray(int size) {
Complex[] arr = new Complex[size];
for (int i = 0; i < arr.length; i++) {
arr[i] = new Complex(0.0f, 0.0f);
}
return arr;
}
}
Your FFT implementation seems reasonable. However there is an issue with the use of Math.atan (which return a value within the [-pi/2,pi/2], instead of the whole [-pi,pi] range) in Complex's add and sub.
To resolve this issue you should be using:
mTheta = (float)Math.atan2(imag, real);

Parallel vs. serial implementation explanation

I have implemented serial and parallel algorithm for solving linear systems using jacobi method. Both implementations converge and give correct solutions.
I am having trouble with understanding:
How can parallel implementation converge after so low number of iterations compared to serial (same method is used in both). Am I facing some concurrency issues that I am not aware of?
How can number of iterations vary from run to run in parallel implementation (6,7)?
Thanks!
Program output:
Mathematica solution: {{-1.12756}, {4.70371}, {-1.89272}, {1.56218}}
Serial: iterations=7194 , error=false, solution=[-1.1270591, 4.7042074, -1.8922218, 1.5626835]
Parallel: iterations=6 , error=false, solution=[-1.1274619, 4.7035804, -1.8927546, 1.5621948]
Code:
Main
import java.util.Arrays;
public class Main {
public static void main(String[] args) {
Serial s = new Serial();
Parallel p = new Parallel(2);
s.solve();
p.solve();
System.out.println("Mathematica solution: {{-1.12756}, {4.70371}, {-1.89272}, {1.56218}}");
System.out.println(String.format("Serial: iterations=%d , error=%s, solution=%s", s.iter, s.errorFlag, Arrays.toString(s.data.solution)));
System.out.println(String.format("Parallel: iterations=%d , error=%s, solution=%s", p.iter, p.errorFlag, Arrays.toString(p.data.solution)));
}
}
Data
public class Data {
public float A[][] = {{2.886139567217389f, 0.9778259187352214f, 0.9432146432722157f, 0.9622157488990459f}
,{0.3023479007910952f,0.7503803506938734f,0.06163831478699766f,0.3856445043958068f}
,{0.4298384105199724f, 0.7787439716945019f, 1.838686110345417f, 0.6282668788698587f}
,{0.27798718418255075f, 0.09021764079496353f, 0.8765867330141233f, 1.246036349549629f}};
public float b[] = {1.0630309381779384f,3.674438173599066f,0.6796639099285651f,0.39831385324794155f};
public int size = A.length;
public float x[] = new float[size];
public float solution[] = new float[size];
}
Parallel
import java.util.Arrays;
public class Parallel {
private final int workers;
private float[] globalNorm;
public int iter;
public int maxIter = 1000000;
public double epsilon = 1.0e-3;
public boolean errorFlag = false;
public Data data = new Data();
public Parallel(int workers) {
this.workers = workers;
this.globalNorm = new float[workers];
Arrays.fill(globalNorm, 0);
}
public void solve() {
JacobiWorker[] threads = new JacobiWorker[workers];
int batchSize = data.size / workers;
float norm;
do {
for(int i=0;i<workers;i++) {
threads[i] = new JacobiWorker(i,batchSize);
threads[i].start();
}
for(int i=0;i<workers;i++)
try {
threads[i].join();
} catch (InterruptedException e) {
e.printStackTrace();
}
// At this point all worker calculations are done!
norm = 0;
for (float d : globalNorm) if (d > norm) norm = d;
if (norm < epsilon)
errorFlag = false; // Converged
else
errorFlag = true; // No desired convergence
} while (norm >= epsilon && ++iter <= maxIter);
}
class JacobiWorker extends Thread {
private final int idx;
private final int batchSize;
JacobiWorker(int idx, int batchSize) {
this.idx = idx;
this.batchSize = batchSize;
}
#Override
public void run() {
int upper = idx == workers - 1 ? data.size : (idx + 1) * batchSize;
float localNorm = 0, diff = 0;
for (int j = idx * batchSize; j < upper; j++) { // For every
// equation in batch
float s = 0;
for (int i = 0; i < data.size; i++) { // For every variable in
// equation
if (i != j)
s += data.A[j][i] * data.x[i];
data.solution[j] = (data.b[j] - s) / data.A[j][j];
}
diff = Math.abs(data.solution[j] - data.x[j]);
if (diff > localNorm) localNorm = diff;
data.x[j] = data.solution[j];
}
globalNorm[idx] = localNorm;
}
}
}
Serial
public class Serial {
public int iter;
public int maxIter = 1000000;
public double epsilon = 1.0e-3;
public boolean errorFlag = false;
public Data data = new Data();
public void solve() {
float norm,diff=0;
do {
for(int i=0;i<data.size;i++) {
float s=0;
for (int j = 0; j < data.size; j++) {
if (i != j)
s += data.A[i][j] * data.x[j];
data.solution[i] = (data.b[i] - s) / data.A[i][i];
}
}
norm = 0;
for (int i=0;i<data.size;i++) {
diff = Math.abs(data.solution[i]-data.x[i]); // Calculate convergence
if (diff > norm) norm = diff;
data.x[i] = data.solution[i];
}
if (norm < epsilon)
errorFlag = false; // Converged
else
errorFlag = true; // No desired convergence
} while (norm >= epsilon && ++iter <= maxIter);
}
}
I think its a matter of implementation and not parallelization. Look at what happens with Parallel p = new Parallel(1);
Mathematica solution: {{-1.12756}, {4.70371}, {-1.89272}, {1.56218}}
Serial: iterations=7194 , error=false, solution=[-1.1270591, 4.7042074, -1.8922218, 1.5626835]
Parallel: iterations=6 , error=false, solution=[-1.1274619, 4.7035804, -1.8927546, 1.5621948]
As it turns out - your second implementation is not doing exactly the same thing as your first one.
I added this into your parallel version and it ran in the same number of iterations.
for (int i = idx * batchSize; i < upper; i++) {
diff = Math.abs(data.solution[i] - data.x[i]); // Calculate
// convergence
if (diff > localNorm)
localNorm = diff;
data.x[i] = data.solution[i];
}
}

Categories

Resources