I can't seem to find what's wrong with my neural net, despite verifying my net based on this example, which suggests my backprop and forward prop is working fine. However, after training on XOR my net returns around 0.5 for the output regardless of the input. In other words, the net seems to be minimizing the error as best it can without seeing any correlation between the input and the output. Since a single iteration of back propagation seems to be working fine, my instinct would suggest the problem lies somehow in the iterations that follow. However, there isn't any obvious problem that would cause this, leaving me quite stumped.
I've looked at other threads where similar problems have arisen, but it seems most of the time their error is either extremely niche to the way they set up their net, or their parameters such as learning rate or epochs is really off. Is anyone familiar with a case like this?
public class Net
{
int[] sizes;
double LEARNING_RATE;
double[][][] weights;
double[][] bias;
Random rand = new Random(); //53489085
public Net(int[] sizes_, double LEARNING_RATE_)
{
LEARNING_RATE = LEARNING_RATE_;
sizes = sizes_;
int numInputs = sizes[0];
double range = 1.0 / Math.sqrt(numInputs);
bias = new double[sizes.length - 1][];
weights = new double[sizes.length - 1][][];
for(int w_layer = 0; w_layer < weights.length; w_layer++)
{
bias[w_layer] = new double[sizes[w_layer+1]];
weights[w_layer] = new double[sizes[w_layer+1]][sizes[w_layer]];
for(int j = 0; j < weights[w_layer].length; j++)
{
bias[w_layer][j] = 2*range*rand.nextDouble() - range;
for(int i = 0; i < weights[w_layer][0].length; i++)
{
weights[w_layer][j][i] = 2*range*rand.nextDouble() - range;
}
}
}
}
public double[] evaluate(double[] image_vector)
{
return forwardPass(image_vector)[sizes.length-1];
}
public double totalError(double[][] expec, double[][] actual)
{
double sum = 0;
for(int i = 0; i < expec.length; i++)
{
sum += error(expec[i], evaluate(actual[i]));
}
return sum / expec.length;
}
private double error(double[] expec, double[] actual)
{
double sum = 0;
for(int i = 0; i < expec.length; i++)
{
double del = expec[i] - actual[i];
sum += 0.5 * del * del;
}
return sum;
}
public void backpropagate(double[][] image_vector, double[][] outputs)
{
double[][][] deltaWeights = new double[weights.length][][];
double[][] deltaBias = new double[weights.length][];
for(int w = 0; w < weights.length; w++)
{
deltaBias[w] = new double[bias[w].length];
deltaWeights[w] = new double[weights[w].length][];
for(int j = 0; j < weights[w].length; j++)
{
deltaWeights[w][j] = new double[weights[w][j].length];
}
}
for(int batch = 0; batch < image_vector.length; batch++)
{
double[][] neuronVals = forwardPass(image_vector[batch]);
/* OUTPUT DELTAS */
int w_layer = weights.length-1;
double[] deltas = new double[weights[w_layer].length];
for(int j = 0; j < weights[w_layer].length; j++)
{
double actual = neuronVals[w_layer + 1][j];
double expec = outputs[batch][j];
double deltaErr = actual - expec;
double deltaSig = actual * (1 - actual);
double delta = deltaErr * deltaSig;
deltas[j] = delta;
deltaBias[w_layer][j] += delta;
for(int i = 0; i < weights[w_layer][0].length; i++)
{
deltaWeights[w_layer][j][i] += delta * neuronVals[w_layer][i];
}
}
w_layer--;
/* REST OF THE DELTAS */
while(w_layer >= 0)
{
double[] nextDeltas = new double[weights[w_layer].length];
for(int j = 0; j < weights[w_layer].length; j++)
{
double outNeur = neuronVals[w_layer+1][j];
double deltaSig = outNeur * (1 - outNeur);
double sum = 0;
for(int i = 0; i < weights[w_layer+1].length; i++)
{
sum += weights[w_layer+1][i][j] * deltas[i];
}
double delta = sum * deltaSig;
nextDeltas[j] = delta;
deltaBias[w_layer][j] += delta;
for(int i = 0; i < weights[w_layer][0].length; i++)
{
deltaWeights[w_layer][j][i] += delta * neuronVals[w_layer][i];
}
}
deltas = nextDeltas;
w_layer--;
}
}
for(int w_layer = 0; w_layer < weights.length; w_layer++)
{
for(int j = 0; j < weights[w_layer].length; j++)
{
deltaBias[w_layer][j] /= (double) image_vector.length;
bias[w_layer][j] -= LEARNING_RATE * deltaBias[w_layer][j];
for(int i = 0; i < weights[w_layer][j].length; i++)
{
deltaWeights[w_layer][j][i] /= (double) image_vector.length; // average of batches
weights[w_layer][j][i] -= LEARNING_RATE * deltaWeights[w_layer][j][i];
}
}
}
}
public double[][] forwardPass(double[] image_vector)
{
double[][] outputs = new double[sizes.length][];
double[] inputs = image_vector;
for(int w = 0; w < weights.length; w++)
{
outputs[w] = inputs;
double[] output = new double[weights[w].length];
for(int j = 0; j < weights[w].length; j++)
{
output[j] = bias[w][j];
for(int i = 0; i < weights[w][j].length; i++)
{
output[j] += weights[w][j][i] * inputs[i];
}
output[j] = sigmoid(output[j]);
}
inputs = output;
}
outputs[outputs.length-1] = inputs.clone();
return outputs;
}
static public double sigmoid(double val)
{
return 1.0 / (1.0 + Math.exp(-val));
}
}
And my XOR class looks like this. It's very unlikely that the error lies in this part given it's simplicity, but I figured it couldn't hurt to post just in case I have some fundamental misunderstanding to how XOR works. My net is set up to take examples in batches, but as you can see below for this particular example I send it batches of one, or effectively not using batches.
public class SingleLayer {
static int numEpochs = 10000;
static double LEARNING_RATE = 0.001;
static int[] sizes = new int[] {2, 2, 1};
public static void main(String[] args)
{
System.out.println("Initializing randomly generate neural net...");
Net n = new Net(sizes, LEARNING_RATE);
System.out.println("Complete!");
System.out.println("Loading dataset...");
double[][] inputs = new double[4][2];
double[][] outputs = new double[4][1];
inputs[0] = new double[] {1, 1};
outputs[0] = new double[] {0};
inputs[1] = new double[] {1, 0};
outputs[1] = new double[] {1};
inputs[2] = new double[] {0, 1};
outputs[2] = new double[] {1};
inputs[3] = new double[] {0, 0};
outputs[3] = new double[] {0};
System.out.println("Complete!");
System.out.println("STARTING ERROR: " + n.totalError(outputs, inputs));
for(int epoch = 0; epoch < numEpochs; epoch++)
{
double[][] in = new double[1][2];
double[][] out = new double[1][1];
int num = (int)(Math.random()*inputs.length);
in[0] = inputs[num];
out[0] = outputs[num];
n.backpropagate(inputs, outputs);
System.out.println("ERROR: " + n.totalError(out, in));
}
System.out.println("Prediction After Training: " + n.evaluate(inputs[0])[0] + " Expected: " + outputs[0][0]);
System.out.println("Prediction After Training: " + n.evaluate(inputs[1])[0] + " Expected: " + outputs[1][0]);
System.out.println("Prediction After Training: " + n.evaluate(inputs[2])[0] + " Expected: " + outputs[2][0]);
System.out.println("Prediction After Training: " + n.evaluate(inputs[3])[0] + " Expected: " + outputs[3][0]);
}
}
Can anyone provide some insight as to what may be wrong? My parameters are pretty well defined and I've followed all the suggestions for how the weights should be initialized and what the learning rate should be etc. Thanks!
You're only presenting the first 3 inputs to your neural network, because the following line is wrong:
int num = (int)(Math.random() * 3);
change that to
int num = (int)(Math.random() * inputs.length);
to use all 4 possible inputs.
I figured it out. I wasn't running enough epochs. That seems a little silly to me but this visualization revealed to me that the net lingers on answers ~0.5 for a long time before reducing the error to less than 0.00001.
So I'm having the following problem: I have a method that breaks a big matrix into smaller blocks of the same size. After I do some operations on the blocks, I want to reconstruct the big matrix in the right order, but I'm going wrong at it somehow.
The following code reconstructs correctly a 4x4 matrix that breaks into 2x2, but for any other dimensions, it's not working properly.
public long[][] blocksToMatrix(List<long[][]> blocks, int blockDimension, int width, int height ){
long[][] yuvMatrix = new long[height][width];
int heightPos = 0;
int widthPos = 0;
for (int i = 0; i < blocks.size(); i++) {
long[][] yuvBlock = blocks.get(i);
int heightPosTemp = heightPos;
for (int j = 0; j < blockDimension * blockDimension; j++) {
yuvMatrix[heightPos][widthPos] = yuvBlock[j / blockDimension][j % blockDimension];
widthPos++;
if (widthPos >= width){
widthPos = (i * blockDimension) % width;
heightPos++;
}
if (widthPos == ((i + 1) * blockDimension) % width){
widthPos = (i * blockDimension) % width;
heightPos++;
}
}
if (heightPos == height ){
heightPos = heightPosTemp;
}
else {
heightPos = (i * blockDimension) % height;
}
widthPos = ((i + 1) * blockDimension) % width;
}
return yuvMatrix;
}
The method I used to break the matrix:
public List<long[][]> matrixToBlocks(long[][] yuvMatrix, int blockDimension, int width, int height){
int blocksSize = width / blockDimension * (height / blockDimension);
List<long[][]> blocks = new ArrayList<long[][]>();
for (int i = 0; i < blocksSize; i++) {
long[][] subBlock = new long[blockDimension][blockDimension];
int heightPos = (blockDimension * (i / blockDimension)) % height;
int widthPos = (blockDimension * i) % width;
if (widthPos + blockDimension > width) {
widthPos = 0;
}
for (int row = 0; row < blockDimension; row++) {
for (int col = 0; col < blockDimension; col++) {
subBlock[row][col] = yuvMatrix[heightPos + row][col + widthPos];
}
}
blocks.add(subBlock);
}
return blocks;
}
The way I tested it:
public static void testareMatBlo(int height, int width, int blockdim){
long[][] test = new long[height][width];
int val = 1;
for (int i = 0; i < height; i++){
for (int j = 0; j < width; j++){
test[i][j] = val;
val++;
}
}
List<long[][]> blocks = matrixToBlocks(test, blockdim, width, height);
long[][] matrix = blocksToMatrix(blocks, blockdim, width, height);
if (Arrays.deepEquals(test, matrix)){
System.out.println("YES");
}
else {
System.out.println("NO");
}
}
This works:
testareMatBlo(4, 4, 2);
But anything else doesn't. Can anyone explain what I did wrong?
I didn't thoroughly read your code for matrixToBlocks(...) but all those calculations like int blocksSize = width / blockDimension * (height / blockDimension); are very likely to introduce hard to spot errors - and you actually don't need them:
public static List<long[][]> matrixToBlocks(long[][] yuvMatrix, int blockDimension){
//Check matrix and block dimension match
if( yuvMatrix.length == 0 || yuvMatrix.length % blockDimension != 0
|| yuvMatrix[0].length == 0 || yuvMatrix[0].length % blockDimension != 0 ) {
throw new IllegalArgumentException("whatever message you like");
}
List<long[][]> blocks = new ArrayList<long[][]>();
//Iterate over the blocks in row-major order (down first, then right)
for( int c = 0; c < yuvMatrix.length; c += blockDimension ) {
for( int r = 0; r < yuvMatrix[c].length; r += blockDimension ) {
long[][] subBlock = new long[blockDimension][blockDimension];
//Iterate over the block in row-major order
for(int bc = 0; bc < blockDimension; bc++ ) {
for(int br = 0; br < blockDimension; br++ ) {
subBlock[bc][br]=yuvMatrix[c+bc][r+br];
}
}
blocks.add(subBlock);
}
}
return blocks;
}
That method doesn't look shorter but it is: discounting the preliminary check yours is missing there are only 8 actual lines of code compared to 13 in your code. That's not the point however. What's more important is that the logic is easier since there are only a few calculations involved (like c+bc).
You might think this is inefficient but it isn't: you're accessing each element only once and thus even though there are 4 nested loops the overall complexity is still O(n) with n being the size of the matrix.
Constructing the matrix back is equally easy. The major thing you need to take care of is the ordering of the blocks: if you create them in row-major order (blocks below each other are next to each other in the list) you need to recreate the matrix in the same way:
public static long[][] blocksToMatrix( List<long[][]> blocks, int width, int height ) {
long[][] yuvMatrix = new long[width][height];
int c = 0;
int r = 0;
for( long[][] block : blocks ) {
int blockWidth = block.length;
int blockHeight = block[0].length;
for( int bc = 0; bc < block.length; bc++ ) {
for( int br = 0; br < block[bc].length; br++ ) {
yuvMatrix[c + bc][r + br] = block[bc][br];
}
}
//calculate the next offset into the matrix
//The blocks where created in row-major order so we need to advance the offset in the same way
r += blockHeight;
if( r >= height ) {
r = 0;
c += blockWidth;
}
}
return yuvMatrix;
}
I have a Matlab code of bat algorithm and I write java version of this algorithm
Bat algorithm is a simple optimization algorithm for finding the minimum of any function
here is the matlab code and my java version of this code
My java version of this algorithm can't find the optimum result like matlab version
and I can't find where is my mistake in converting the code from matlab to java
Can anyone help me where is my mistake?
import java.util.Random;
public class Bat
{
private int n;
private float A, r;
private float Qmin, Qmax;
private int d;
private int NofGen;
private float fmin;
private int fminIndex;
private float Fnew;
private int loopCounter;
private float Q[], V[][], Sol[][], UL_bound[][], fitness[], S[][], Best[];
private Random myRand;
public Bat(
int NBats,
float loudness,
float pulseRate,
float minFreq,
float maxFreq,
int NofGeneration,
int dimension
)
{
n = NBats;
A = loudness;
r = pulseRate;
Qmin = minFreq;
Qmax = maxFreq;
NofGen = NofGeneration;
d = dimension;
S = new float[n][d];
Best = new float[d];
UL_bound = new float[2][d];
//default bounds
for(int i = 0 ; i < d ; i++)
{
UL_bound[0][i] = -10000;
UL_bound[1][i] = 10000;
}
loopCounter = 0;
myRand = new Random();
Q = new float[n];
for(int i = 0 ; i < n ; i++)
Q[i] = 0;
V = new float[n][d];
for(int i = 0 ; i < n ; i++)
for(int j = 0 ; j < d ; j++)
V[i][j] = 0;
}
public void intial()
{
Sol = new float[n][d];
for(int i = 0 ; i < n ; i++)
for(int j = 0 ; j < d ; j++)
{
float t = myRand.nextFloat();
//(upper -lower)*rand + lower
Sol[i][j] = t * (UL_bound[1][j] - UL_bound[0][j]) + UL_bound[0][j];
}
fitness = new float[n];
for(int i = 0 ; i < n ; i++)
fitness[i] = function(Sol[i]);
//finding fmin
fmin = fitness[0];
fminIndex = 0;
for(int i = 0 ; i < n ; i++)
{
if (fitness[i] < fmin)
{
fmin = fitness[i];
fminIndex = i;
}
}
//setting best
for(int j = 0 ; j < d ; j++)
Best[j] = Sol[fminIndex][j];
}
public void start()
{
while(loopCounter < NofGen)
{
for(int i = 0 ; i < n ; i++)
{
Q[i] = Qmin + (Qmin - Qmax)* myRand.nextFloat();
for(int j = 0 ; j < d ; j++)
V[i][j] = V[i][j] + (Sol[i][j]-Best[j])*Q[i];
for(int j = 0 ; j < d ; j++)
S[i][j] = Sol[i][j] + V[i][j];
Sol[i] = simpleBounds(Sol[i]);
if(myRand.nextFloat() > r)
for(int j = 0 ; j < d ; j++)
S[i][j] = (float) (Best[j] + (.001 * myRand.nextFloat()) );
Fnew = function(S[i]);
if(Fnew <= fitness[i] && myRand.nextFloat() < A)
{
for(int j = 0 ; j < d ; j++)
Sol[i][j] = S[i][j];
fitness[i] = Fnew;
}
if(Fnew <= fmin)
{
fmin = Fnew;
for(int j = 0 ; j < d ; j++)
Best[j] = S[i][j];
}
}
loopCounter++;
}
}
public float[] simpleBounds(float p[])
{
for(int i = 0 ; i < d ; i++)
{
if(p[i] < UL_bound[0][i])
p[i] = UL_bound[0][i];
if(p[i] > UL_bound[1][i])
p[i] = UL_bound[1][i];
}
return p;
}
float function(float p[])
{
// Sphere function with fmin=0 at (0,0,...,0)
float sum = 0;
for(int i = 0 ; i < p.length ; i++)
sum = sum + p[i]*p[i];
return sum;
}
public float printResult()
{
System.out.println("After " + loopCounter + "Repeats :");
for(int i = 0 ; i < d ; i++)
System.out.print(Best[i] + ", ");
System.out.println ( "F(x) = " + fmin);
return fmin;
}
public void set_UL_Bound(int n, float L, float U)
{
if( n < d && n >= 0)
{
UL_bound[0][n] = L;
UL_bound[1][n] = U;
}
}
}
and this is the matlab versian
function [best,fmin,N_iter]=bat_algorithm(para)
% Display help
help bat_algorithm.m
% Default parameters
if nargin<1, para=[20 1000 0.5 0.5]; end
n=para(1); % Population size, typically 10 to 40
N_gen=para(2); % Number of generations
A=para(3); % Loudness (constant or decreasing)
r=para(4); % Pulse rate (constant or decreasing)
% This frequency range determines the scalings
% You should change these values if necessary
Qmin=0; % Frequency minimum
Qmax=2; % Frequency maximum
% Iteration parameters
N_iter=0; % Total number of function evaluations
% Dimension of the search variables
d=5; % Number of dimensions
% Lower limit/bounds/ a vector
Lb=-3*ones(1,d);
% Upper limit/bounds/ a vector
Ub=6*ones(1,d);
% Initializing arrays
Q=zeros(n,1); % Frequency
v=zeros(n,d); % Velocities
% Initialize the population/solutions
for i=1:n,
Sol(i,:)=Lb+(Ub-Lb).*rand(1,d);
Fitness(i)=Fun(Sol(i,:));
end
% Find the initial best solution
[fmin,I]=min(Fitness);
best=Sol(I,:);
for t=1:N_gen,
% Loop over all bats/solutions
for i=1:n,
Q(i)=Qmin+(Qmin-Qmax)*rand;
v(i,:)=v(i,:)+(Sol(i,:)-best)*Q(i);
S(i,:)=Sol(i,:)+v(i,:);
% Apply simple bounds/limits
Sol(i,:)=simplebounds(Sol(i,:),Lb,Ub);
% Pulse rate
if rand>r
% The factor 0.001 limits the step sizes of random walks
S(i,:)=best+0.001*randn(1,d);
end
% Evaluate new solutions
Fnew=Fun(S(i,:));
% Update if the solution improves, or not too loud
if (Fnew<=Fitness(i)) & (rand<A) ,
Sol(i,:)=S(i,:);
Fitness(i)=Fnew;
end
% Update the current best solution
if Fnew<=fmin,
best=S(i,:);
fmin=Fnew;
end
end
N_iter=N_iter+n;
end
% Output/display
disp(['Number of evaluations: ',num2str(N_iter)]);
disp(['Best =',num2str(best),' fmin=',num2str(fmin)]);
% Application of simple limits/bounds
function s=simplebounds(s,Lb,Ub)
% Apply the lower bound vector
ns_tmp=s;
I=ns_tmp<Lb;
ns_tmp(I)=Lb(I);
% Apply the upper bound vector
J=ns_tmp>Ub;
ns_tmp(J)=Ub(J);
% Update this new move
s=ns_tmp;
function z=Fun(u)
% Sphere function with fmin=0 at (0,0,...,0)
z=sum(u.^2);
%%%%% ============ end ====================================
The diff between two codes
In Matlab code:
S(i,:)=best+0.001*randn(1,d);
randn=>standard normal distribution.
While in Java code:
S[i][j] = (float) (Best[j] + (.001 * myRand.nextFloat()) );
java.util.Random.nextFloat()=>uniformly distributed float value between 0.0 and 1.0.
I was looking for the solution in C# and stumbled up on this. It was enough to get the job done. Here is the solution in C# translated from the java with variables renamed and an additional fitness function for finding the solution of two x,y equations xy=6 and x+y = 5. Also included is finding the square root of .3 :
using System;
namespace BatAlgorithmC
namespace BatAlgorithmC
{
class Program
{
static void Main(string[] args)
{
// Mybat x = new Mybat(100, 1000, 0.5, 0.5, 5, Mybat.sphere);
// Mybat x = new Mybat(1000, 1000, 0.5, 0.5, 1, Mybat.squareRoot);
Mybat x = new Mybat(1000, 1000, 0.5, 0.5, 2, Mybat.RootOfXYEquations);
Console.WriteLine("Hit any key to continue.");
Console.ReadLine();
}
}
public class Mybat
{
/**
* #param args the command line arguments
*/
public int _numberOfBats, _generations, Qmin, Qmax, N_iter, _dimension;
public double _volume, _pulseRate, min, max, fnew, fmin;
public double[][] _lowerBound, _upperBound, _velocity, _solution, S;
public double[] _fitness, _tempSolution, _bestSolution, Q;
public Random random;
//public static void main(String[] args) {
// Mybat x = new Mybat(20,1000,0.5,0.5,5, Mybat.sphere);
//}
public static void initJagged(double[][] array, int n, int d)
{
for (int i = 0; i < n; i++) array[i] = new double[d];
}
public Mybat(
int bats,
int generations,
double loud,
double pulse,
int dimension,
Func<double[], int, double> function
)
{
//initialization of variables
_numberOfBats = bats;
_generations = generations;
_volume = loud;
_pulseRate = pulse;
_dimension = dimension;
Random random = new Random();
//plan to change later and added as parameter
min = -15;
max = 15;
fmin = 0;
//decleration for the bounds
_lowerBound = new double[1][];
_upperBound = new double[1][];
Q = new double[_numberOfBats]; // frequency
_velocity = new double[_numberOfBats][]; //velocity
initJagged(_velocity, _numberOfBats, _dimension);
initJagged(_lowerBound, 1, _dimension);
initJagged(_upperBound, 1, _dimension);
//initialize solution array
_solution = new double[_numberOfBats][];
S = new double[_numberOfBats][];
_fitness = new double[_numberOfBats]; // fitness container
_bestSolution = new double[_dimension];
_tempSolution = new double[_dimension]; //temporary holder for a row in array _solution
initJagged(_solution, _numberOfBats, _dimension);
initJagged(S, _numberOfBats, _dimension);
for (int i = 0; i < _numberOfBats; i++)
{
// for minimal coding : added initialize Q[]array with '0' as element
Q[i] = 0;
for (int x = 0; x < _dimension; x++)
{
// for minimal coding : added initialize _velocity[][] array with '0' as element
_velocity[i][x] = 0;
//find random double values from LB to UB
_solution[i][x] = (random.NextDouble()*(max - min)) + min;
_tempSolution[x] = _solution[i][x];
//Console.WriteLine("sol["+i+"]["+x+"] = "+_solution[i][x]); //test line
//Console.WriteLine(rand.nextDouble()); //test line
}
_fitness[i] = function(_tempSolution, _dimension);
//initialize best and the fmin
if (i == 0 || fmin > _fitness[i])
{
fmin = _fitness[i];
for (int x = 0; x < _dimension; x++)
{
_bestSolution[x] = _solution[i][x];
}
}
Console.WriteLine("fitness[" + i + "]" + _fitness[i]); //test
}
Console.WriteLine("fmin = " + fmin); //test
// special note to these variables (below)
// change if required for maximum effectivity
Qmin = 0;
Qmax = 2;
N_iter = 1; //number of function evaluation
// bat proper
for (int loop = 0; loop < N_iter; loop++)
{
// loop over all bats/solutions
for (int nextBat = 0; nextBat < _numberOfBats; nextBat++)
{
Q[nextBat] = Qmin + ((Qmin - Qmax)*random.NextDouble());
// loop for velocity
for (int vel = 0; vel < _dimension; vel++)
{
_velocity[nextBat][vel] = _velocity[nextBat][vel] +
((_solution[nextBat][vel] - _bestSolution[vel])*Q[nextBat]);
}
//new solutions
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
S[nextBat][nextDimension] = _solution[nextBat][nextDimension] +
_velocity[nextBat][nextDimension];
}
/**
* RESERVED SPOT for the QUESTIONABLE AREA ON THE
* MATLAB CODE (i think it is not needed for the java equivalent)
*/
// pulse rate
if (random.NextDouble() > _pulseRate)
{
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
S[nextBat][nextDimension] = _bestSolution[nextDimension] + (0.001*random.NextGaussian());
}
}
//putting current row of _solution to a temp array
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
_tempSolution[nextDimension] = S[nextBat][nextDimension];
}
fnew = function(_tempSolution, _dimension);
// update if solution is improved, and not too loud
if ((fnew <= _fitness[nextBat]) && (random.NextDouble() < _volume))
{
for (int x = 0; x < _dimension; x++)
{
_solution[nextBat][x] = S[nextBat][x];
_fitness[nextBat] = fnew;
}
}
//update current best solution
if (fnew <= fmin)
{
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
_bestSolution[nextDimension] = S[nextBat][nextDimension];
fmin = fnew;
}
}
}
}
Console.WriteLine(" ");
Console.WriteLine("new fitness");
for (int i = 0; i < _numberOfBats; i++)
{
Console.WriteLine("fitness[" + i + "]" + _fitness[i]);
}
for (int nextDimension = 0; nextDimension < _dimension; nextDimension++)
{
Console.WriteLine("best[" + nextDimension + "]" + _bestSolution[nextDimension]);
}
Console.WriteLine("Fmin = " + fmin);
}
//possible that this function is not needed in java
public void set_bounds(int x, double L, double U)
{
//double temp_Lb[x];
//double temp_Ub[x];
for (int i = 0; i < x; i++)
{
_lowerBound[0][i] = L;
_upperBound[0][i] = U;
}
}
public static double sphere(double[] value, int d)
{
// sphere function where fmin is at 0
double result = 0;
for (int i = 0; i < d; i++)
{
result += (value[i]*value[i]);
}
return result;
}
public static double squareRoot(double[] value, int d)
{
// find the square root of .3
double result = 0;
for (int i = 0; i < d; i++)
{
result += Math.Abs(.3 - (value[i]*value[i]));
}
return result;
}
public static double RootOfXYEquations(double[] value, int d)
{
// solve for x and y xy = 6 and x+y = 5
double result = 0;
result += Math.Abs(5 - (value[0] + value[1]));
result += Math.Abs(6 - (value[0] * value[1]));
return result;
}
}
static class MathExtensiionns
{
public static double NextGaussian(this Random rand)
{
double u1 = rand.NextDouble(); //these are uniform(0,1) random doubles
double u2 = rand.NextDouble();
double mean = 0, stdDev = 1;
double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) *
Math.Sin(2.0 * Math.PI * u2); //random normal(0,1)
double randNormal =
mean + stdDev * randStdNormal; //random normal(mean,stdDev^2)
return randNormal;
}
}
}
this will be my first time here at stack overflow so i will say sorry beforehand if my response will be a bit ambiguous and has many problems. i just hope that this answer of mine will help future visitors on this thread who wants to study bat algo via java.
anyway, i did look at your code since i am studying bat algorithm at the moment.
tried running it and it does gives far off results compared to the matlab version.
what i noticed is that you just "literally" tried to convert the matlab code without fully understanding each matlab lines. i wanted to point out all of the stuff you missed but i am feeling lazy right now so i will just leave my version of bat algorithm in java.
NOTE: i just made a running bat algorithm in java. not an efficient, fully debugged, matlab's java-equivalent bat algorithm.
import java.util.Random;
public class Mybat {
/**
* #param args the command line arguments
*/
public int n, N_gen, Qmin, Qmax, N_iter, d;
public double A,r,min,max,fnew,fmin;
public double Lb[][],Ub[][],Q[],v[][],Sol[][],S[][],fitness[],temp[],best[];
public Random random;
public static void main(String[] args) {
Mybat x = new Mybat(20,1000,0.5,0.5,5);
}
public Mybat(
int bats,
int generations,
double loud,
double pulse,
int dimension
){
//initialization of variables
n=bats;
N_gen = generations;
A = loud;
r = pulse;
d = dimension;
Random rand = new Random();
//plan to change later and added as parameter
min = -15;
max = 15;
fmin = 0;
//decleration for the bounds
Lb = new double[1][d];
Ub = new double[1][d];
Q = new double[n]; // frequency
v = new double[n][d]; //velocity
//initialize solution array
Sol = new double[n][d];
S = new double[n][d];
fitness = new double[n]; // fitness container
best =new double[d];
temp = new double[d]; //temporary holder for a row in array Sol
for(int i=0;i<n;i++){
// for minimal coding : added initialize Q[]array with '0' as element
Q[i] = 0;
for(int x=0;x<d;x++){
// for minimal coding : added initialize v[][] array with '0' as element
v[i][x] = 0;
//find random double values from LB to UB
Sol[i][x]= (rand.nextDouble()*(max - min)) + min;
temp[x] = Sol[i][x];
//System.out.println("sol["+i+"]["+x+"] = "+Sol[i][x]); //test line
//System.out.println(rand.nextDouble()); //test line
}
fitness[i] = function(temp);
//initialize best and the fmin
if(i==0 || fmin > fitness[i]){
fmin = fitness[i];
for(int x=0;x<d;x++){
best[x] = Sol[i][x];
}
}
System.out.println("fitness["+i+"]"+fitness[i]); //test
}
System.out.println("fmin = "+fmin); //test
// special note to these variables (below)
// change if required for maximum effectivity
Qmin = 0;
Qmax = 2;
N_iter = 1; //number of function evaluation
// bat proper
for(int loop=0;loop<N_iter;loop++){
// loop over all bats/solutions
for(int i=0;i<n;i++){
Q[i] = Qmin+((Qmin-Qmax)*rand.nextDouble());
// loop for velocity
for(int vel=0;vel<d;vel++){
v[i][vel] = v[i][vel]+((Sol[i][vel]-best[vel])*Q[i]);
}
//new solutions
for(int x=0;x<d;x++){
S[i][x] = Sol[i][x] + v[i][x];
}
/**
* RESERVED SPOT for the QUESTIONABLE AREA ON THE
* MATLAB CODE (i think it is not needed for the java equivalent)
*/
// pulse rate
if(rand.nextDouble()>r){
for(int x=0;x<d;x++){
S[i][x] = best[x]+(0.001*rand.nextGaussian());
}
}
//putting current row of Sol to a temp array
for(int x=0;x<d;x++){
temp[x] = S[i][x];
}
fnew = function(temp);
// update if solution is improved, and not too loud
if((fnew<=fitness[i]) && (rand.nextDouble()<A)){
for(int x=0;x<d;x++){
Sol[i][x] = S[i][x];
fitness[i] = fnew;
}
}
//update current best solution
if(fnew<=fmin){
for(int x=0;x<d;x++){
best[x] = S[i][x];
fmin = fnew;
}
}
}
}
System.out.println(" ");
System.out.println("new fitness");
for(int i=0;i<n;i++){
System.out.println("fitness["+i+"]"+fitness[i]);
}
System.out.println("Fmin = "+fmin);
}
//possible that this function is not needed in java
public void set_bounds(int x, double L, double U){
//double temp_Lb[x];
//double temp_Ub[x];
for(int i=0; i<x; i++){
Lb[0][i] = L;
Ub[0][i] = U;
}
}
public double function(double value[]){
// sphere function where fmin is at 0
double result = 0;
for(int i=0;i<d;i++){
result += (value[i]*value[i]);
}
return result;
}
}