I have problem with my Wiegand RFID reader (26bit). I have write simple Java app and everything seems fine. But after 10 reads for example, it starts to shift bits. Is RPi Raspbian to slow for Wiegand time protocol?
Here is sample code and output
package classes;
import com.pi4j.io.gpio.GpioController;
import com.pi4j.io.gpio.GpioFactory;
import com.pi4j.io.gpio.GpioPinDigitalInput;
import com.pi4j.io.gpio.PinPullResistance;
import com.pi4j.io.gpio.RaspiPin;
public class Test {
public static char[] s = new char[10000];
static int bits = 0;
public static void main(String[] args) {
// create gpio controller
final GpioController gpio = GpioFactory.getInstance();
// provision gpio pin #02 as an input pin with its internal pull down
// resistor enabled
final GpioPinDigitalInput pin0 = gpio.provisionDigitalInputPin(RaspiPin.GPIO_00, PinPullResistance.PULL_UP);
final GpioPinDigitalInput pin1 = gpio.provisionDigitalInputPin(RaspiPin.GPIO_01, PinPullResistance.PULL_UP);
System.out.println("PINs ready");
Thread th = new Thread(new Runnable() {
#Override
public void run() {
while (true) {
if (pin0.isLow()) { // D1 on ground?
s[bits++] = '0';
while (pin0.isLow()) {
}
}
if (pin1.isLow()) { // D1 on ground?
s[bits++] = '1';
while (pin1.isLow()) {
}
}
if (bits == 26) {
bits=0;
Print();
}
}
}
});
th.setPriority(Thread.MAX_PRIORITY);
th.start();
System.out.println("Thread start");
for (;;) {
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
protected static void Print() {
for (int i = 0; i < 26; i++) {
System.out.write(s[i]);
}
System.out.println();
bits = 0;
}
}
and output:
10100100111111110110011011
10100100111111110110011011
10100100111111110110011011
10100100111111110110011011
10100100111111110001101110
10010011111111011001101110
10010011111111011001101110
10010011111111011001101110
10010011111111011001101110
10010011111111011001101110
10010011111111011001101110
Your printf statements may be causing the problem. Try storing the data and printing it at the end. printf tends to be slow (it involves several context switches).
Also, it seems you have no way of detecting if you miss a bit. I would say try a timeout, so if you don't get 26 bits in time reset your counter. That way you're not looping around reading nothing, and eventually getting misaligned data.
I have done this in C and python on a pi and also on an arduino. From my experience as #woodrow douglas says you need to capture the bits in a loop or use interrupts (better) and use a timeout which you increase each time you receive a bit and then print it out once you are sure you have all the bits (timed out).
This is how I do this on arduino using interrupts.
void zero(){
bit_count ++;
bit_holder = (bit_holder << 1) + 0; //shift left one and add a 0
timeout = t;
}
void one(){
bit_count ++;
bit_holder = (bit_holder << 1) + 1; //shift left one and add a 1
timeout = t;
}
void loop() {
timeout --;
if (timeout == 0 && bit_count > 0){
lcd.clear();
lcd.print("Dec:");
lcd.print(bit_holder);
lcd.setCursor(0,1);
lcd.print("Hex:");
lcd.print(String(bit_holder,HEX));
Serial.print("bit count= ");
Serial.println(bit_count);
Serial.print("bits= ");
Serial.println(bit_holder,BIN);
oldbit = bit_holder; //store previous this value as previous
bit_count = 0; //reset bit count
bit_holder = 0; //reset badge number
}
}
I never got any issues using C on Pi but I did get issues using python as it is not as real time. The only way it would work in Python was by using interrupts and I managed too get the bad read rate down to something like 1 in 200 but never completely remove it.
What I did in the end was use some C to collect the bits then call my python script with the bits for processing.
If you are interested this is the C code I use:
#include <wiringPi.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
int r1data0 = 6; //pin 22
int r1data1 = 7; // Pin 7
// green goes to same as relay
int r1beep = 0; //pin 11
int r2data0 = 10; /* P1-24 */
int r2data1 = 11; /* P1-26 */
//green goes to same as relay
int r2beep = 0;
int i1 = 0;
int i2 = 0;
//generic variables
int t = 5;
int blank; //blank variable to reset bits
int rel1time = 0;
int rel2time = 0;
int rel1 = 5;
int previoust;
//reader 1variables
int r1bits; // Collected bits storage area
int r1bit_count = 0; //to measure the size of bits
int oldr1bit_count = 0; //somewhere to store bitcount to send to python
int r1timeout; //timout to return correct value
char r1command[40];
int r1ret;
//reader 2 variables
int r2bits; // Collected bits storage area
int r2bit_count = 0; //to measure the size of bits
int oldr2bit_count = 0;
int r2timeout; //timout to return correct value
char r2command[40];
pthread_t threads;
void access_denied(int red){
int pin;
if(red = 1){pin = r1beep;}
if(red = 2){pin = r2beep;}
pinMode(pin, OUTPUT);
digitalWrite(pin, LOW);
delay(300);;
digitalWrite(pin, HIGH);
delay(200);
digitalWrite(pin, LOW);
delay(300);
digitalWrite(pin, HIGH);
delay(200);
digitalWrite(pin, LOW);
delay(300);
digitalWrite(pin, HIGH);
}
void *r1python_thread(void *val){
sprintf(r1command,"python access.py r1 %X %X", oldr1bit_count, val); //build python command
FILE* file = popen(r1command, "r"); //execute command using popen
char buffer[5];
fscanf(file, "%100s", buffer); //read command output
pclose(file);
//printf("buffer is : %s\n", buffer);
rel1time = atoi(buffer); //convert returned string to int
if(rel1time == 0){access_denied(1);}
pthread_exit(NULL);
}
void *r2python_thread(void *val){
sprintf(r2command,"python access.py r2 %X",val); //build python command
FILE* file = popen(r2command, "r"); //execute command using popen
char buffer[5];
fscanf(file, "%100s", buffer); //read command output
pclose(file);
//printf("buffer is : %s\n", buffer);
rel2time = atoi(buffer); //convert returned string to int
pthread_exit(NULL);
}
//reader 1 bit functions
void onebit0(){ //adds a 0
r1bit_count ++; //increase bit count
r1bits = (r1bits << 1) + 0;
r1timeout = t; //reset timeout
}
void onebit1(){ //adds a 1
r1bit_count ++;
r1bits = (r1bits << 1) + 1;
r1timeout = t;
}
//reader 2 bit functions
void twobit0(){ //adds a 0
r2bit_count ++; //increase bit count
r2bits = (r2bits << 1) + 0;
r2timeout = t; //reset timeout
}
void twobit1(){ //adds a 1
r2bit_count ++;
r2bits = (r2bits << 1) + 1;
r2timeout = t;
}
int main(){
wiringPiSetup(); //initialise wiringPi
pinMode (r1data0, INPUT); // set reader 1 data0 as input
pinMode (r1data1, INPUT); // set reader 1 data1 as input
pinMode (r2data0, INPUT); // set reader 2 data0 as input
pinMode (r2data1, INPUT); // set reader 2 data1 as input
//reader 1
wiringPiISR(r1data0, INT_EDGE_FALLING, onebit0); // set interrupt on data 0 if it falls call bit0 function
wiringPiISR(r1data1, INT_EDGE_FALLING, onebit1); // set interrupt on data 1 if it falls call bit1 function
//reader 2
wiringPiISR(r2data0, INT_EDGE_FALLING, twobit0); // set interrupt on data 0 if it falls call bit0 function
wiringPiISR(r2data1, INT_EDGE_FALLING, twobit1); // set interrupt on data 1 if it falls call bit1 function
while (1){ //loop
if (r1bit_count > 0 ){ //if bits is not empty
r1timeout--; // reduce timeout by 1
if(r1timeout == 0){ //and it has timed out ie no more bits coming
//printf("%X\n",r1bits);
pthread_create(&threads, NULL, r1python_thread,(void *) r1bits); //start new thread for python program
oldr1bit_count = r1bit_count;
r1bit_count = 0; //reset bit count
r1bits = blank; //clear bits cariable
r1timeout = t; //reset timeout
}
}
if (r2bit_count > 0 ){ //if bits is not empty
r2timeout--; // reduce timeout by 1
if(r2timeout == 0){ //and it has timed out ie no more bits coming
pthread_create(&threads, NULL, r2python_thread,(void *) r2bits); //start new thread for python program
r2bit_count = 0; //reset bit count
r2bits = blank; //clear bits cariable
r2timeout = t; //reset timeout
}
}
if (rel1time > 0){
pinMode(rel1, OUTPUT);
int diff = time(NULL) - previoust;
if(diff >= 1){
previoust = time(NULL);
rel1time--;
}
}
else{
pinMode(rel1, INPUT);
previoust = time(NULL);
}
delay(1);
}
return 0;
}
Related
Hello all for the second time,
Initially I was looking for a broad answer, but this thread got blocked for being "too broad"... so I've got no choice, but to go into detail. My apologies if asking the question again is against the forum guidelines, I'm new to stackoverflow so please be kind.
I’ve got data coming into a serial port at 250Hz and I’d like to save it all to a .csv file. Of course draw() is not going to be able to keep up with that rate of data...
At the moment I am using the serialEvent(port) to collect and parse the data. Once parsed out, I'm calling a function in draw to add the data to a new line in a table and then saving that table every 5 seconds...
Yes, I see the obvious flaw that if I'm saving the current data in draw then of course it's not going to be able to save all the data coming in, but rather just the data that happens to be present when the data saving function is called... but I'm not sure of the best way to solve that. A buffer scheme? Or can I have a separate thread that just adds ALL data coming in to a table?
which lead to my initial (broad) question...
Is there a way to save all incoming data to a file without polling?
Thanks to all in advance.. code below:
Twain
import processing.serial.*;
import static javax.swing.JOptionPane.*;
Table table;
String Path = "PathProvidedHere.csv";
String message;
//Some time keeping variables
int hours, minutes, seconds, milliseconds;
float SaveTime;
//Serial port selection
Serial myPort;
String COMx, COMlist = "";
final boolean debug = true;
String portName;
// Data variables
float yaw = 0.0; float pitch = 0.0; float roll = 0.0;
float A1, A2, A3, A4;
float E1, E2, E3, E4;
void setup()
{
//Set up GIU box
size(1024, 768, P3D);
frameRate(250);
smooth();
//Some other setups like fonts, graphs, etc.
//Set up the logging table
table = new Table();
table.addColumn("A1"); table.addColumn("A2"); table.addColumn("A3"); table.addColumn("A4");
table.addColumn(""); table.addColumn("E1"); table.addColumn("E3"); table.addColumn("E4");
table.addColumn(" "); table.addColumn("min"); table.addColumn("sec"); table.addColumn("milli");
portName = chooseCOM();
delay(1000);
}
void draw()
{
SavetoCSV();
//serialEvent(myPort); // read and parse incoming serial message
ACouple();
Unrelated();
FunctionsHere();
if(millis() - SaveTime > 5000)
{
saveTable(table, Path);
SaveTime=millis();
}
}
String chooseCOM()
{
setupP2 = true;
try
{
if (debug) printArray(Serial.list());
int i = Serial.list().length;
if (i != 0)
{
if (i >= 2)
{
// need to check which port the inst uses -
// for now we'll just let the user decide
for (int j = 0; j < i; )
{
COMlist += char(j+'a') + " = " + Serial.list()[j];
if (++j < i) COMlist += ", ";
}
COMx = showInputDialog("Which COM port is correct? (a,b,..):\n"+COMlist);
if (COMx == null) exit();
if (COMx.isEmpty()) exit();
i = int(COMx.toLowerCase().charAt(0) - 'a') + 1;
}
String portName = Serial.list()[i-1];
if (debug) //println(portName + " Selected");
myPort = new Serial(this, portName, 115200); // change baud rate to your liking
myPort.bufferUntil(13); // buffer until CR/LF appears, but not required..
return portName;
}
else
{
showMessageDialog(frame, "Device is not connected to the PC");
exit();
}
}
catch (Exception e)
{ //Print the type of error
showMessageDialog(frame, "COM port is not available (may\nbe in use by another program)");
//println("Error:", e);
exit();
}
return "noPort";
}
void serialEvent(Serial myPort)
{
int newLine = 13; // new line character in ASCII
do
{
message = myPort.readStringUntil(newLine); // read from port until new line
if (message != null)
{
String[] list = split(trim(message), " ");
if (list.length == 4 && list[0].equals("i"))
{
yaw = float(list[1]); // convert to float yaw
pitch = float(list[2]); // convert to float pitch
roll = float(list[3]); // convert to float roll
}
else if (list.length == 5 && list[0].equals("s"))
{
A1 = float(list[1]);
A2 = float(list[2]);
A3 = float(list[3]);
A4 = float(list[4]);
}
else if (list.length >=2 && list[0].equals("b"))
{
Battery = int(list[1]);
}
else if (list.length >= 2 && list[0].equals("m"))
{
MACid = int(list[1]);
}
else
{
//print anything extra to console
//println(message);
}
}
} while (message != null);
}
void SavetoCSV()
{
if (A1 != 0)
{
TableRow newRow = table.addRow();
newRow.setFloat("A1", (A1));
newRow.setFloat("A2", (A2));
newRow.setFloat("A3", (A3));
newRow.setFloat("A4", (A4));
//saveTable(table, Path);
}
}
Additional info:
- Processing P3
- For the record, with the rest of my script I can get draw up to 80hz or so
- I'd be okay with saving all the data and parsing it later
Went the buffer route.... I think I'm getting close now. Unsure if I'm saving the data in the right order or if the saving process will halt the rest of the processes...
Code:
import processing.serial.*;
import static javax.swing.JOptionPane.*;
//Arrays to save the data
LinkedList<Integer> A1c = new LinkedList<Integer>();
LinkedList<Integer> A2c = new LinkedList<Integer>();
LinkedList<Integer> A3c = new LinkedList<Integer>();
LinkedList<Integer> A4c = new LinkedList<Integer>();
int bufferLength = 500;
int bufflen = 0;
//Serial port selection
Serial myPort;
String COMx, COMlist = "";
final boolean debug = true;
String portName;
// Data variables
float yaw = 0.0; float pitch = 0.0; float roll = 0.0;
float A1, A2, A3, A4;
//Data log variables
Table table;
String Path = "PathtoFile.csv";
void setup() {
//Set up GIU box
size(1024, 768, P3D);
frameRate(250);
strokeWeight(50);
smooth();
//Set up the logging table
table = new Table();
table.addColumn("A1"); table.addColumn("A2"); table.addColumn("A3"); table.addColumn("A4");
portName = chooseCOM();
}
void draw() {
//SavetoCSV now called within SerialEvent()
//SavetoCSV();
//serialEvent(myPort); // read and parse incoming serial message
Some();
Unrelated();
FunctionsHere();
}
void serialEvent(Serial myPort) {
int newLine = 13; // new line character in ASCII
do {
message = myPort.readStringUntil(newLine); // read from port until new line
if (message != null) {
String[] list = split(trim(message), " ");
if (list.length == 4 && list[0].equals("i")) {
yaw = float(list[1]); // convert to float yaw
pitch = float(list[2]); // convert to float pitch
roll = float(list[3]); // convert to float roll
} else if (list.length == 5 && list[0].equals("s")) {
A1 = float(list[1]);
A2 = float(list[2]);
A3 = float(list[3]);
A4 = float(list[4]);
if (bufflen < bufferLength) {
A1c.push(int(A1));
A2c.push(int(A2));
A3c.push(int(A3));
A4c.push(int(A4));
bufflen++;
}
else{
bufflen = 0;
SavetoCSV();
}
} else if (list.length >=2 && list[0].equals("b")) {
Battery = int(list[1]);
} else if (list.length >= 2 && list[0].equals("m")) {
MACid = int(list[1]);
} else {
//print anything extra to console
//println(message);
}
}
} while (message != null);
}
void SavetoCSV() {
if (A1 != 0) {
for (int i = bufferLength - 1; i >= 0; i--){
if (i < bufferLength){
TableRow newRow = table.addRow();
newRow.setFloat("A1", (A1c.get(i)));
newRow.setFloat("A2", (A2c.get(i)));
newRow.setFloat("A3", (A3c.get(i)));
newRow.setFloat("A4", (A4c.get(i)));
} else saveTable(table, Path);
}
}
}
String chooseCOM() {
setupP2 = true;
try {
if (debug) printArray(Serial.list());
int i = Serial.list().length;
if (i != 0) {
if (i >= 2) {
// need to check which port the inst uses -
// for now we'll just let the user decide
for (int j = 0; j < i; ) {
COMlist += char(j+'a') + " = " + Serial.list()[j];
if (++j < i) COMlist += ", ";
}
COMx = showInputDialog("Which COM port is correct? (a,b,..):\n"+COMlist);
if (COMx == null) exit();
if (COMx.isEmpty()) exit();
i = int(COMx.toLowerCase().charAt(0) - 'a') + 1;
}
String portName = Serial.list()[i-1];
if (debug) //println(portName + " Selected");
myPort = new Serial(this, portName, 115200); // change baud rate to your liking
myPort.bufferUntil(13); // buffer until CR/LF appears, but not required..
return portName;
} else {
showMessageDialog(frame, "Device is not connected to the PC");
exit();
}
}
catch (Exception e)
{ //Print the type of error
showMessageDialog(frame, "COM port is not available (may\nbe in use by another program)");
//println("Error:", e);
exit();
}
return "noPort";
}
How can you play multiple (audio) byte arrays simultaneously? This "byte array" is recorded by TargetDataLine, transferred using a server.
What I've tried so far
Using SourceDataLine:
There is no way to play mulitple streams using SourceDataLine, because the write method blocks until the buffer is written. This problem cannot be fixed using Threads, because only one SourceDataLine can write concurrently.
Using the AudioPlayer Class:
ByteInputStream stream2 = new ByteInputStream(data, 0, data.length);
AudioInputStream stream = new AudioInputStream(stream2, VoiceChat.format, data.length);
AudioPlayer.player.start(stream);
This just plays noise on the clients.
EDIT
I don't receive the voice packets at the same time, it's not simultaneously, more "overlapping".
Apparently Java's Mixer interface was not designed for this.
http://docs.oracle.com/javase/7/docs/api/javax/sound/sampled/Mixer.html:
A mixer is an audio device with one or more lines. It need not be
designed for mixing audio signals.
And indeed, when I try to open multiple lines on the same mixer this fails with a LineUnavailableException. However if all your audio recordings have the same audio format it's quite easy to manually mix them together. For example if you have 2 inputs:
Convert both to the appropriate data type (for example byte[] for 8 bit audio, short[] for 16 bit, float[] for 32 bit floating point etc)
Sum them in another array. Make sure summed values do not exceed the range of the datatype.
Convert output back to bytes and write that to the SourceDataLine
See also How is audio represented with numbers?
Here's a sample mixing down 2 recordings and outputting as 1 signal, all in 16bit 48Khz stereo.
// print all devices (both input and output)
int i = 0;
Mixer.Info[] infos = AudioSystem.getMixerInfo();
for (Mixer.Info info : infos)
System.out.println(i++ + ": " + info.getName());
// select 2 inputs and 1 output
System.out.println("Select input 1: ");
int in1Index = Integer.parseInt(System.console().readLine());
System.out.println("Select input 2: ");
int in2Index = Integer.parseInt(System.console().readLine());
System.out.println("Select output: ");
int outIndex = Integer.parseInt(System.console().readLine());
// ugly java sound api stuff
try (Mixer in1Mixer = AudioSystem.getMixer(infos[in1Index]);
Mixer in2Mixer = AudioSystem.getMixer(infos[in2Index]);
Mixer outMixer = AudioSystem.getMixer(infos[outIndex])) {
in1Mixer.open();
in2Mixer.open();
outMixer.open();
try (TargetDataLine in1Line = (TargetDataLine) in1Mixer.getLine(in1Mixer.getTargetLineInfo()[0]);
TargetDataLine in2Line = (TargetDataLine) in2Mixer.getLine(in2Mixer.getTargetLineInfo()[0]);
SourceDataLine outLine = (SourceDataLine) outMixer.getLine(outMixer.getSourceLineInfo()[0])) {
// audio format 48khz 16 bit stereo (signed litte endian)
AudioFormat format = new AudioFormat(48000.0f, 16, 2, true, false);
// 4 bytes per frame (16 bit samples stereo)
int frameSize = 4;
int bufferSize = 4800;
int bufferBytes = frameSize * bufferSize;
// buffers for java audio
byte[] in1Bytes = new byte[bufferBytes];
byte[] in2Bytes = new byte[bufferBytes];
byte[] outBytes = new byte[bufferBytes];
// buffers for mixing
short[] in1Samples = new short[bufferBytes / 2];
short[] in2Samples = new short[bufferBytes / 2];
short[] outSamples = new short[bufferBytes / 2];
// how long to record & play
int framesProcessed = 0;
int durationSeconds = 10;
int durationFrames = (int) (durationSeconds * format.getSampleRate());
// open devices
in1Line.open(format, bufferBytes);
in2Line.open(format, bufferBytes);
outLine.open(format, bufferBytes);
in1Line.start();
in2Line.start();
outLine.start();
// start audio loop
while (framesProcessed < durationFrames) {
// record audio
in1Line.read(in1Bytes, 0, bufferBytes);
in2Line.read(in2Bytes, 0, bufferBytes);
// convert input bytes to samples
ByteBuffer.wrap(in1Bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(in1Samples);
ByteBuffer.wrap(in2Bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(in2Samples);
// mix samples - lower volume by 50% since we're mixing 2 streams
for (int s = 0; s < bufferBytes / 2; s++)
outSamples[s] = (short) ((in1Samples[s] + in2Samples[s]) * 0.5);
// convert output samples to bytes
ByteBuffer.wrap(outBytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(outSamples);
// play audio
outLine.write(outBytes, 0, bufferBytes);
framesProcessed += bufferBytes / frameSize;
}
in1Line.stop();
in2Line.stop();
outLine.stop();
}
}
Allright, I put something together which should get you started. I'll post the full code below but I'll first try and explain the steps involved.
The interesting part here is to create you're own audio "mixer" class which allows consumers of that class to schedule audio blocks at specific points in the (near) future. The specific-point-in-time part is important here: i'm assuming you receive network voices in packets where each packet needs to start exactly at the end of the previous one in order to play back a continuous sound for a single voice. Also since you say voices can overlap I'm assuming (yes, lots of assumptions) a new one can come in over the network while one or more old ones are still playing. So it seems reasonable to allow audio blocks to be scheduled from any thread. Note that there's only one thread actually writing to the dataline, it's just that any thread can submit audio packets to the mixer.
So for the submit-audio-packet part we now have this:
private final ConcurrentLinkedQueue<QueuedBlock> scheduledBlocks;
public void mix(long when, short[] block) {
scheduledBlocks.add(new QueuedBlock(when, Arrays.copyOf(block, block.length)));
}
The QueuedBlock class is just used to tag a byte array (the audio buffer) with the "when": the point in time where the block should be played.
Points in time are expressed relative to the current position of the audio stream. It is set to zero when the stream is created and updated with the buffer size each time an audio buffer is written to the dataline:
private final AtomicLong position = new AtomicLong();
public long position() {
return position.get();
}
Apart from all the hassle to set up the data line, the interesting part of the mixer class is obviously where the mixdown happens. For each scheduled audio block, it's split up into 3 cases:
The block is already played in it's entirety. Remove from the scheduledBlocks list.
The block is scheduled to start at some point in time after the current buffer. Do nothing.
(Part of) the block should be mixed down into the current buffer. Note that the beginning of the block may (or may not) be already played in previous buffer(s). Similarly, the end of the scheduled block may exceed the end of the current buffer in which case we mix down the first part of it and leave the rest for the next round, untill all of it has been played an the entire block is removed.
Also note that there's no reliable way to start playing audio data immediately, when you submit packets to the mixer be sure to always have them start at least the duration of 1 audio buffer from now otherwise you'll risk losing the beginning of your sound. Here's the mixdown code:
private static final double MIXDOWN_VOLUME = 1.0 / NUM_PRODUCERS;
private final List<QueuedBlock> finished = new ArrayList<>();
private final short[] mixBuffer = new short[BUFFER_SIZE_FRAMES * CHANNELS];
private final byte[] audioBuffer = new byte[BUFFER_SIZE_FRAMES * CHANNELS * 2];
private final AtomicLong position = new AtomicLong();
Arrays.fill(mixBuffer, (short) 0);
long bufferStartAt = position.get();
for (QueuedBlock block : scheduledBlocks) {
int blockFrames = block.data.length / CHANNELS;
// block fully played - mark for deletion
if (block.when + blockFrames <= bufferStartAt) {
finished.add(block);
continue;
}
// block starts after end of current buffer
if (bufferStartAt + BUFFER_SIZE_FRAMES <= block.when)
continue;
// mix in part of the block which overlaps current buffer
int blockOffset = Math.max(0, (int) (bufferStartAt - block.when));
int blockMaxFrames = blockFrames - blockOffset;
int bufferOffset = Math.max(0, (int) (block.when - bufferStartAt));
int bufferMaxFrames = BUFFER_SIZE_FRAMES - bufferOffset;
for (int f = 0; f < blockMaxFrames && f < bufferMaxFrames; f++)
for (int c = 0; c < CHANNELS; c++) {
int bufferIndex = (bufferOffset + f) * CHANNELS + c;
int blockIndex = (blockOffset + f) * CHANNELS + c;
mixBuffer[bufferIndex] += (short)
(block.data[blockIndex]*MIXDOWN_VOLUME);
}
}
scheduledBlocks.removeAll(finished);
finished.clear();
ByteBuffer
.wrap(audioBuffer)
.order(ByteOrder.LITTLE_ENDIAN)
.asShortBuffer()
.put(mixBuffer);
line.write(audioBuffer, 0, audioBuffer.length);
position.addAndGet(BUFFER_SIZE_FRAMES);
And finally a complete, self-contained sample which spawns a number of threads submitting audio blocks representing sinewaves of random duration and frequency to the mixer (called AudioConsumer in this sample). Replace sinewaves by incoming network packets and you should be halfway to a solution.
package test;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.Line;
import javax.sound.sampled.Mixer;
import javax.sound.sampled.SourceDataLine;
public class Test {
public static final int CHANNELS = 2;
public static final int SAMPLE_RATE = 48000;
public static final int NUM_PRODUCERS = 10;
public static final int BUFFER_SIZE_FRAMES = 4800;
// generates some random sine wave
public static class ToneGenerator {
private static final double[] NOTES = {261.63, 311.13, 392.00};
private static final double[] OCTAVES = {1.0, 2.0, 4.0, 8.0};
private static final double[] LENGTHS = {0.05, 0.25, 1.0, 2.5, 5.0};
private double phase;
private int framesProcessed;
private final double length;
private final double frequency;
public ToneGenerator() {
ThreadLocalRandom rand = ThreadLocalRandom.current();
length = LENGTHS[rand.nextInt(LENGTHS.length)];
frequency = NOTES[rand.nextInt(NOTES.length)] * OCTAVES[rand.nextInt(OCTAVES.length)];
}
// make sound
public void fill(short[] block) {
for (int f = 0; f < block.length / CHANNELS; f++) {
double sample = Math.sin(phase * 2.0 * Math.PI);
for (int c = 0; c < CHANNELS; c++)
block[f * CHANNELS + c] = (short) (sample * Short.MAX_VALUE);
phase += frequency / SAMPLE_RATE;
}
framesProcessed += block.length / CHANNELS;
}
// true if length of tone has been generated
public boolean done() {
return framesProcessed >= length * SAMPLE_RATE;
}
}
// dummy audio producer, based on sinewave generator
// above but could also be incoming network packets
public static class AudioProducer {
final Thread thread;
final AudioConsumer consumer;
final short[] buffer = new short[BUFFER_SIZE_FRAMES * CHANNELS];
public AudioProducer(AudioConsumer consumer) {
this.consumer = consumer;
thread = new Thread(() -> run());
thread.setDaemon(true);
}
public void start() {
thread.start();
}
// repeatedly play random sine and sleep for some time
void run() {
try {
ThreadLocalRandom rand = ThreadLocalRandom.current();
while (true) {
long pos = consumer.position();
ToneGenerator g = new ToneGenerator();
// if we schedule at current buffer position, first part of the tone will be
// missed so have tone start somewhere in the middle of the next buffer
pos += BUFFER_SIZE_FRAMES + rand.nextInt(BUFFER_SIZE_FRAMES);
while (!g.done()) {
g.fill(buffer);
consumer.mix(pos, buffer);
pos += BUFFER_SIZE_FRAMES;
// we can generate audio faster than it's played
// sleep a while to compensate - this more closely
// corresponds to playing audio coming in over the network
double bufferLengthMillis = BUFFER_SIZE_FRAMES * 1000.0 / SAMPLE_RATE;
Thread.sleep((int) (bufferLengthMillis * 0.9));
}
// sleep a while in between tones
Thread.sleep(1000 + rand.nextInt(2000));
}
} catch (Throwable t) {
System.out.println(t.getMessage());
t.printStackTrace();
}
}
}
// audio consumer - plays continuously on a background
// thread, allows audio to be mixed in from arbitrary threads
public static class AudioConsumer {
// audio block with "when to play" tag
private static class QueuedBlock {
final long when;
final short[] data;
public QueuedBlock(long when, short[] data) {
this.when = when;
this.data = data;
}
}
// need not normally be so low but in this example
// we're mixing down a bunch of full scale sinewaves
private static final double MIXDOWN_VOLUME = 1.0 / NUM_PRODUCERS;
private final List<QueuedBlock> finished = new ArrayList<>();
private final short[] mixBuffer = new short[BUFFER_SIZE_FRAMES * CHANNELS];
private final byte[] audioBuffer = new byte[BUFFER_SIZE_FRAMES * CHANNELS * 2];
private final Thread thread;
private final AtomicLong position = new AtomicLong();
private final AtomicBoolean running = new AtomicBoolean(true);
private final ConcurrentLinkedQueue<QueuedBlock> scheduledBlocks = new ConcurrentLinkedQueue<>();
public AudioConsumer() {
thread = new Thread(() -> run());
}
public void start() {
thread.start();
}
public void stop() {
running.set(false);
}
// gets the play cursor. note - this is not accurate and
// must only be used to schedule blocks relative to other blocks
// (e.g., for splitting up continuous sounds into multiple blocks)
public long position() {
return position.get();
}
// put copy of audio block into queue so we don't
// have to worry about caller messing with it afterwards
public void mix(long when, short[] block) {
scheduledBlocks.add(new QueuedBlock(when, Arrays.copyOf(block, block.length)));
}
// better hope mixer 0, line 0 is output
private void run() {
Mixer.Info[] mixerInfo = AudioSystem.getMixerInfo();
try (Mixer mixer = AudioSystem.getMixer(mixerInfo[0])) {
Line.Info[] lineInfo = mixer.getSourceLineInfo();
try (SourceDataLine line = (SourceDataLine) mixer.getLine(lineInfo[0])) {
line.open(new AudioFormat(SAMPLE_RATE, 16, CHANNELS, true, false), BUFFER_SIZE_FRAMES);
line.start();
while (running.get())
processSingleBuffer(line);
line.stop();
}
} catch (Throwable t) {
System.out.println(t.getMessage());
t.printStackTrace();
}
}
// mix down single buffer and offer to the audio device
private void processSingleBuffer(SourceDataLine line) {
Arrays.fill(mixBuffer, (short) 0);
long bufferStartAt = position.get();
// mixdown audio blocks
for (QueuedBlock block : scheduledBlocks) {
int blockFrames = block.data.length / CHANNELS;
// block fully played - mark for deletion
if (block.when + blockFrames <= bufferStartAt) {
finished.add(block);
continue;
}
// block starts after end of current buffer
if (bufferStartAt + BUFFER_SIZE_FRAMES <= block.when)
continue;
// mix in part of the block which overlaps current buffer
// note that block may have already started in the past
// but extends into the current buffer, or that it starts
// in the future but before the end of the current buffer
int blockOffset = Math.max(0, (int) (bufferStartAt - block.when));
int blockMaxFrames = blockFrames - blockOffset;
int bufferOffset = Math.max(0, (int) (block.when - bufferStartAt));
int bufferMaxFrames = BUFFER_SIZE_FRAMES - bufferOffset;
for (int f = 0; f < blockMaxFrames && f < bufferMaxFrames; f++)
for (int c = 0; c < CHANNELS; c++) {
int bufferIndex = (bufferOffset + f) * CHANNELS + c;
int blockIndex = (blockOffset + f) * CHANNELS + c;
mixBuffer[bufferIndex] += (short) (block.data[blockIndex] * MIXDOWN_VOLUME);
}
}
scheduledBlocks.removeAll(finished);
finished.clear();
ByteBuffer.wrap(audioBuffer).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(mixBuffer);
line.write(audioBuffer, 0, audioBuffer.length);
position.addAndGet(BUFFER_SIZE_FRAMES);
}
}
public static void main(String[] args) {
System.out.print("Press return to exit...");
AudioConsumer consumer = new AudioConsumer();
consumer.start();
for (int i = 0; i < NUM_PRODUCERS; i++)
new AudioProducer(consumer).start();
System.console().readLine();
consumer.stop();
}
}
You can use the Tritontus library to do software audio mixing (it's old but still works quite well).
Add the dependency to your project:
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>tritonus-all</artifactId>
<version>0.3.7.2</version>
</dependency>
Use the org.tritonus.share.sampled.FloatSampleBuffer. Both buffers must be of same AudioFormat before calling #mix.
// TODO instantiate these variables with real data
byte[] audio1, audio2;
AudioFormat af1, af2;
SourceDataLine sdl = AudioSystem.getSourceDataLine(af1);
FloatSampleBuffer fsb1 = new FloatSampleBuffer(audio1, 0, audio1.length, af1.getFormat());
FloatSampleBuffer fsb2 = new FloatSampleBuffer(audio2, 0, audio2.length, af2.getFormat());
fsb1.mix(fsb2);
byte[] result = fsb1.convertToByteArray(af1);
sdl.write(result, 0, result.length); // play it
This question already has answers here:
wait until all threads finish their work in java
(17 answers)
Closed 8 years ago.
Hello in my exercise I need to show resource access problems with multi threads.
I need to increment indexes of shared (between threads) int table called histogramTable[]the size of table is known, and data is in file. Each thread got it own range, called interval. For example: I have 4 threads and every threads getting the following indexes:
Thread 1: 0 - 1_000_000
Thread 2: 1_000_000 - 2_000_000
Thread 3: 2_000_000 - 3_000_000
Thread 4: 3_000_000 - 4_000_000
And here is my problem, once you start a program with given number on threads, it's threadsNumber variable, it seems that only one thread is running. Because sum of bytes is always tabSize / threadsNumber. For above example it's 1_000_000 bytes.
With problem of thread access it should be 3_800_000 - 4_000_000 bytes. Can you tell me what I'm doing wrong?
I'm giving you whole code cause in my opinion its short. Also there is a commented out funcion called randomizeBytes() to fastly generate byte file.
Ex.java
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
public class Ex2 {
private static int threadsNumber = 4, tabSize = 4000000;
public static int threadsCounter;
public static byte[] dataTab = loadBytes();
public static byte[] loadBytes() {
byte data[] = new byte[tabSize];
Path path = Paths.get("dane.txt");
try {
data = Files.readAllBytes(path);
}
catch (IOException e) {
e.printStackTrace();
}
return data;
}
/*private byte[] randomizeBytes() {
Path path = Paths.get("binaryData.txt");
byte bytes[] = new byte[tabSize];
new Random().nextBytes(bytes);
try {
Files.write(path, bytes);
} catch (IOException e) {
e.printStackTrace();
}
return bytes;
}*/
public static void runThreads(){
threadsCounter = threadsNumber;
int interval = tabSize / threadsNumber;
int endIndex = 0;
Thread[] threads = new Thread[threadsNumber];
MyThread w;
for(int i = 0 ; i < threadsNumber ; i ++){
endIndex = (i + 1) * interval;
if(endIndex >= tabSize)
endIndex = tabSize;
w = new MyThread(interval * i , endIndex);
threads[i] = new Thread(w);
threads[i].start();
if(threads[i].isAlive())
System.out.println("Thread number: " + i + " started and alive, indexes: " + interval*i + " - " + endIndex );
}
}
public synchronized static int decrementThreads(){
return --threadsCounter;
}
public static void main(String args[]){
runThreads();
}
}
MyTherad.java
public class MyThread implements Runnable{
private byte table[] = Ex2.dataTab;
int startIndex,endIndex,temp;
private int histogramTable[] = new int[256] ;
private long timeStart, timeStop;
public MyThread(int startIndex, int endIndex){
this.startIndex = startIndex;
this.endIndex = endIndex;
}
#Override
public void run() {
timeStart = System.currentTimeMillis();
for(int i = startIndex ; i < endIndex ; i ++) {
temp = Byte.toUnsignedInt(table[i]);
histogramTable[temp]++;
}
timeStop = System.currentTimeMillis();
System.out.println("Threads working: " + Ex2.threadsCounter);
if(Ex2.decrementThreads() == 0) printSummary();
}
public void printSummary() {
int sum = 0;
for(int i : histogramTable) System.out.print(i + " ");
System.out.println();
for(int i = 0 ; i < 256 ; i ++)
sum += histogramTable[i];
System.out.println("Bytes: " + sum);
System.out.println("Task complete in: " + (timeStop - timeStart) + "ms");
}
}
Check out Thread.join(). Look at the explanation in the answer at Java Multithreading concept and join() method.
I have a big text file (5Mb) that I use in my Android application. I create the file as a list of pre-sorted Strings, and the file doesn't change once it is created. How can I perform a binary search on the contents of this file, without reading line-by-line to find the matching String?
Since the content of the file does not change, you can break the file into multiple pieces. Say A-G, H-N, 0-T and U-Z. This allows you to check the first character and immediately be able to cut the possible set to a fourth of the original size. Now a linear search will not take as long or reading the whole file could be an option. This process could be extended if n/4 is still too large, but the idea is the same. Build the search breakdowns into the file structure instead of trying to do it all in memory.
A 5MB file isn't that big - you should be able to read each line into a String[] array, which you can then use java.util.Arrays.binarySearch() to find the line you want. This is my recommended approach.
If you don't want to read the whole file in to your app, then it gets more complicated. If each line of the file is the same length, and the file is already sorted, then you can open the file in RandomAccessFile and perform a binary search yourself by using seek() like this...
// open the file for reading
RandomAccessFile raf = new RandomAccessFile("myfile.txt","r");
String searchValue = "myline";
int lineSize = 50;
int numberOfLines = raf.length() / lineSize;
// perform the binary search...
byte[] lineBuffer = new byte[lineSize];
int bottom = 0;
int top = numberOfLines;
int middle;
while (bottom <= top){
middle = (bottom+top)/2;
raf.seek(middle*lineSize); // jump to this line in the file
raf.read(lineBuffer); // read the line from the file
String line = new String(lineBuffer); // convert the line to a String
int comparison = line.compareTo(searchValue);
if (comparison == 0){
// found it
break;
}
else if (comparison < 0){
// line comes before searchValue
bottom = middle + 1;
}
else {
// line comes after searchValue
top = middle - 1;
}
}
raf.close(); // close the file when you're finished
However, if the file doesn't have fixed-width lines, then you can't easily perform a binary search without loading it into memory first, as you can't quickly jump to a specific line in the file like you can with fixed-width lines.
Here's something I quickly put together. It uses two files, one with the words, the other with the offsets. The format of the offset file is this: the first 10 bits contains the word size, the last 22 bits contains the offset (the word position, for example, aaah would be 0, abasementable would be 4, etc.). It's encoded in big endian (java standard). Hope it helps somebody.
word.dat:
aaahabasementableabnormalabnormalityabortionistabortion-rightsabracadabra
wordx.dat:
00 80 00 00 01 20 00 04 00 80 00 0D 01 00 00 11 _____ __________
01 60 00 19 01 60 00 24 01 E0 00 2F 01 60 00 3E _`___`_$___/_`_>
I created these files in C#, but here's the code for it (it uses a txt file with words separated by crlfs)
static void Main(string[] args)
{
const string fIn = #"C:\projects\droid\WriteFiles\input\allwords.txt";
const string fwordxOut = #"C:\projects\droid\WriteFiles\output\wordx.dat";
const string fWordOut = #"C:\projects\droid\WriteFiles\output\word.dat";
int i = 0;
int offset = 0;
int j = 0;
var lines = File.ReadLines(fIn);
FileStream stream = new FileStream(fwordxOut, FileMode.Create, FileAccess.ReadWrite);
using (EndianBinaryWriter wwordxOut = new EndianBinaryWriter(EndianBitConverter.Big, stream))
{
using (StreamWriter wWordOut = new StreamWriter(File.Open(fWordOut, FileMode.Create)))
{
foreach (var line in lines)
{
wWordOut.Write(line);
i = offset | ((int)line.Length << 22); //first 10 bits to the left is the word size
offset = offset + (int)line.Length;
wwordxOut.Write(i);
//if (j == 7)
// break;
j++;
}
}
}
}
And this is the Java code for the binary file search:
public static void binarySearch() {
String TAG = "TEST";
String wordFilePath = Environment.getExternalStorageDirectory().getAbsolutePath() + "/word.dat";
String wordxFilePath = Environment.getExternalStorageDirectory().getAbsolutePath() + "/wordx.dat";
String target = "abracadabra";
boolean targetFound = false;
int searchCount = 0;
try {
RandomAccessFile raf = new RandomAccessFile(wordxFilePath, "r");
RandomAccessFile rafWord = new RandomAccessFile(wordFilePath, "r");
long low = 0;
long high = (raf.length() / 4) - 1;
int cur = 0;
long wordOffset = 0;
int len = 0;
while (high >= low) {
long mid = (low + high) / 2;
raf.seek(mid * 4);
cur = raf.readInt();
Log.v(TAG + "-cur", String.valueOf(cur));
len = cur >> 22; //word length
cur = cur & 0x3FFFFF; //first 10 bits are 0
rafWord.seek(cur);
byte [] bytes = new byte[len];
wordOffset = rafWord.read(bytes, 0, len);
Log.v(TAG + "-wordOffset", String.valueOf(wordOffset));
searchCount++;
String str = new String(bytes);
Log.v(TAG, str);
if (target.compareTo(str) < 0) {
high = mid - 1;
} else if (target.compareTo(str) == 0) {
targetFound = true;
break;
} else {
low = mid + 1;
}
}
raf.close();
rafWord.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
if (targetFound == true) {
Log.v(TAG + "-found " , String.valueOf(searchCount));
} else {
Log.v(TAG + "-not found " , String.valueOf(searchCount));
}
}
In a uniform character length text file you could seek to the middle of the interval in question character wise, start reading characters until you hit your deliminator, then use the subsequent string as an approximation for the element wise middle. The problem with doing this in android, though, is you apparently can't get random access to a resource (although I suppose you could just reopen it every time). Furthermore this technique doesn't generalize to maps and sets of other types.
Another option would be to (using a RandomAccessFile) write an "array" of ints - one for each String - at the beginning of the file then go back and update them with the locations of their corresponding Strings. Again the search will require jumping around.
What I would do (and did do in my own app) is implement a hash set in a file. This one does separate chaining with trees.
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.Set;
class StringFileSet {
private static final double loadFactor = 0.75;
public static void makeFile(String fileName, String comment, Set<String> set) throws IOException {
new File(fileName).delete();
RandomAccessFile fout = new RandomAccessFile(fileName, "rw");
//Write comment
fout.writeUTF(comment);
//Make bucket array
int numBuckets = (int)(set.size()/loadFactor);
ArrayList<ArrayList<String>> bucketArray = new ArrayList<ArrayList<String>>(numBuckets);
for (int ii = 0; ii < numBuckets; ii++){
bucketArray.add(new ArrayList<String>());
}
for (String key : set){
bucketArray.get(Math.abs(key.hashCode()%numBuckets)).add(key);
}
//Sort key lists in preparation for creating trees
for (ArrayList<String> keyList : bucketArray){
Collections.sort(keyList);
}
//Make queues in preparation for creating trees
class NodeInfo{
public final int lower;
public final int upper;
public final long callingOffset;
public NodeInfo(int lower, int upper, long callingOffset){
this.lower = lower;
this.upper = upper;
this.callingOffset = callingOffset;
}
}
ArrayList<LinkedList<NodeInfo>> queueList = new ArrayList<LinkedList<NodeInfo>>(numBuckets);
for (int ii = 0; ii < numBuckets; ii++){
queueList.add(new LinkedList<NodeInfo>());
}
//Write bucket array
fout.writeInt(numBuckets);
for (int index = 0; index < numBuckets; index++){
queueList.get(index).add(new NodeInfo(0, bucketArray.get(index).size()-1, fout.getFilePointer()));
fout.writeInt(-1);
}
//Write trees
for (int bucketIndex = 0; bucketIndex < numBuckets; bucketIndex++){
while (queueList.get(bucketIndex).size() != 0){
NodeInfo nodeInfo = queueList.get(bucketIndex).poll();
if (nodeInfo.lower <= nodeInfo.upper){
//Set respective pointer in parent node
fout.seek(nodeInfo.callingOffset);
fout.writeInt((int)(fout.length() - (nodeInfo.callingOffset + 4))); //Distance instead of absolute position so that the get method can use a DataInputStream
fout.seek(fout.length());
int middle = (nodeInfo.lower + nodeInfo.upper)/2;
//Key
fout.writeUTF(bucketArray.get(bucketIndex).get(middle));
//Left child
queueList.get(bucketIndex).add(new NodeInfo(nodeInfo.lower, middle-1, fout.getFilePointer()));
fout.writeInt(-1);
//Right child
queueList.get(bucketIndex).add(new NodeInfo(middle+1, nodeInfo.upper, fout.getFilePointer()));
fout.writeInt(-1);
}
}
}
fout.close();
}
private final String fileName;
private final int numBuckets;
private final int bucketArrayOffset;
public StringFileSet(String fileName) throws IOException {
this.fileName = fileName;
DataInputStream fin = new DataInputStream(new BufferedInputStream(new FileInputStream(fileName)));
short numBytes = fin.readShort();
fin.skipBytes(numBytes);
this.numBuckets = fin.readInt();
this.bucketArrayOffset = numBytes + 6;
fin.close();
}
public boolean contains(String key) throws IOException {
boolean containsKey = false;
DataInputStream fin = new DataInputStream(new BufferedInputStream(new FileInputStream(this.fileName)));
fin.skipBytes(4*(Math.abs(key.hashCode()%this.numBuckets)) + this.bucketArrayOffset);
int distance = fin.readInt();
while (distance != -1){
fin.skipBytes(distance);
String candidate = fin.readUTF();
if (key.compareTo(candidate) < 0){
distance = fin.readInt();
}else if (key.compareTo(candidate) > 0){
fin.skipBytes(4);
distance = fin.readInt();
}else{
fin.skipBytes(8);
containsKey = true;
break;
}
}
fin.close();
return containsKey;
}
}
A test program
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
class Test {
public static void main(String[] args) throws IOException {
HashSet<String> stringMemorySet = new HashSet<String>();
stringMemorySet.add("red");
stringMemorySet.add("yellow");
stringMemorySet.add("blue");
StringFileSet.makeFile("stringSet", "Provided under ... included in all copies and derivatives ...", stringMemorySet);
StringFileSet stringFileSet = new StringFileSet("stringSet");
System.out.println("orange -> " + stringFileSet.contains("orange"));
System.out.println("red -> " + stringFileSet.contains("red"));
System.out.println("yellow -> " + stringFileSet.contains("yellow"));
System.out.println("blue -> " + stringFileSet.contains("blue"));
new File("stringSet").delete();
System.out.println();
}
}
You'll also need to pass a Context to it, if and when you modify it for android, so it can access the getResources() method.
You're also probably going to want to stop the android build tools from compressing the file, which can apparently only be done - if you're working with the GUI - by changing the file's extension to something such as jpg. This made the process about 100 to 300 times faster in my app.
You might also look into giving yourself more memory by using the NDK.
Though it might sound like overkill, don't store data you need to do this with as a flat file. Make a database and query the data in the database. This should be both effective and fast.
Here is a function that I think works (using this in practice). Lines can have any length. You have to supply a lambda called "nav" to do the actual line check so you are flexible in the file's order (case-sensitive, case-insensitive, ordered by a certain field etc.).
import java.io.File;
import java.io.RandomAccessFile;
class main {
// returns pair(character range in file, line) or null if not found
// if no exact match found, return line above
// nav takes a line and returns -1 (move up), 0 (found) or 1 (move down)
// The line supplied to nav is stripped of the trailing \n, but not the \r
// UTF-8 encoding is assumed
static Pair<LongRange, String> binarySearchForLineInTextFile(File file, IF1<String, Integer> nav) {
long length = l(file);
int bufSize = 1024;
RandomAccessFile raf = randomAccessFileForReading(file);
try {
long min = 0, max = length;
int direction = 0;
Pair<LongRange, String> possibleResult = null;
while (min < max) {
ping();
long middle = (min + max) / 2;
long lineStart = raf_findBeginningOfLine(raf, middle, bufSize);
long lineEnd = raf_findEndOfLine(raf, middle, bufSize);
String line = fromUtf8(raf_readFilePart(raf, lineStart, (int) (lineEnd - 1 - lineStart)));
direction = nav.get(line);
possibleResult = (Pair<LongRange, String>) new Pair(new LongRange(lineStart, lineEnd), line);
if (direction == 0) return possibleResult;
// asserts are to assure that loop terminates
if (direction < 0) max = assertLessThan(max, lineStart);
else min = assertBiggerThan(min, lineEnd);
}
if (direction >= 0) return possibleResult;
long lineStart = raf_findBeginningOfLine(raf, min - 1, bufSize);
String line = fromUtf8(raf_readFilePart(raf, lineStart, (int) (min - 1 - lineStart)));
return new Pair(new LongRange(lineStart, min), line);
} finally {
_close(raf);
}
}
static int l(byte[] a) {
return a == null ? 0 : a.length;
}
static long l(File f) {
return f == null ? 0 : f.length();
}
static RandomAccessFile randomAccessFileForReading(File path) {
try {
return new RandomAccessFile(path, "r");
} catch (Exception __e) {
throw rethrow(__e);
}
}
// you can change this function to allow interrupting long calculations from the outside. just throw a RuntimeException.
static boolean ping() {
return true;
}
static long raf_findBeginningOfLine(RandomAccessFile raf, long pos, int bufSize) {
try {
byte[] buf = new byte[bufSize];
while (pos > 0) {
long start = Math.max(pos - bufSize, 0);
raf.seek(start);
raf.readFully(buf, 0, (int) Math.min(pos - start, bufSize));
int idx = lastIndexOf_byteArray(buf, (byte) '\n');
if (idx >= 0) return start + idx + 1;
pos = start;
}
return 0;
} catch (Exception __e) {
throw rethrow(__e);
}
}
static long raf_findEndOfLine(RandomAccessFile raf, long pos, int bufSize) {
try {
byte[] buf = new byte[bufSize];
long length = raf.length();
while (pos < length) {
raf.seek(pos);
raf.readFully(buf, 0, (int) Math.min(length - pos, bufSize));
int idx = indexOf_byteArray(buf, (byte) '\n');
if (idx >= 0) return pos + idx + 1;
pos += bufSize;
}
return length;
} catch (Exception __e) {
throw rethrow(__e);
}
}
static String fromUtf8(byte[] bytes) {
try {
return bytes == null ? null : new String(bytes, "UTF-8");
} catch (Exception __e) {
throw rethrow(__e);
}
}
static byte[] raf_readFilePart(RandomAccessFile raf, long start, int l) {
try {
byte[] buf = new byte[l];
raf.seek(start);
raf.readFully(buf);
return buf;
} catch (Exception __e) {
throw rethrow(__e);
}
}
static <A> A assertLessThan(A a, A b) {
assertTrue(cmp(b, a) < 0);
return b;
}
static <A> A assertBiggerThan(A a, A b) {
assertTrue(cmp(b, a) > 0);
return b;
}
static void _close(AutoCloseable c) {
try {
if (c != null)
c.close();
} catch (Throwable e) {
throw rethrow(e);
}
}
static RuntimeException rethrow(Throwable t) {
throw t instanceof RuntimeException ? (RuntimeException) t : new RuntimeException(t);
}
static int lastIndexOf_byteArray(byte[] a, byte b) {
for (int i = l(a) - 1; i >= 0; i--)
if (a[i] == b)
return i;
return -1;
}
static int indexOf_byteArray(byte[] a, byte b) {
int n = l(a);
for (int i = 0; i < n; i++)
if (a[i] == b)
return i;
return -1;
}
static boolean assertTrue(boolean b) {
if (!b)
throw fail("oops");
return b;
}
static int cmp(Object a, Object b) {
if (a == null) return b == null ? 0 : -1;
if (b == null) return 1;
return ((Comparable) a).compareTo(b);
}
static RuntimeException fail(String msg) {
throw new RuntimeException(msg == null ? "" : msg);
}
final static class LongRange {
long start, end;
LongRange(long start, long end) {
this.end = end;
this.start = start;
}
public String toString() {
return "[" + start + ";" + end + "]";
}
}
interface IF1<A, B> {
B get(A a);
}
static class Pair<A, B> {
A a;
B b;
Pair(A a, B b) {
this.b = b;
this.a = a;
}
public String toString() {
return "<" + a + ", " + b + ">";
}
}
}
I am struggling to port a Perl program to Java, and learning Java as I go. A central component of the original program is a Perl module that does string prefix lookups in a +500 GB sorted text file using binary search
(essentially, "seek" to a byte offset in the middle of the file, backtrack to nearest newline, compare line prefix with the search string, "seek" to half/double that byte offset, repeat until found...)
I have experimented with several database solutions but found that nothing beats this in sheer lookup speed with data sets of this size. Do you know of any existing Java library that implements such functionality? Failing that, could you point me to some idiomatic example code that does random access reads in text files?
Alternatively, I am not familiar with the new (?) Java I/O libraries but would it be an option to memory-map the 500 GB text file (I'm on a 64-bit machine with memory to spare) and do binary search on the memory-mapped byte array? I would be very interested to hear any experiences you have to share about this and similar problems.
I am a big fan of Java's MappedByteBuffers for situations like this. It is blazing fast. Below is a snippet I put together for you that maps a buffer to the file, seeks to the middle, and then searches backwards to a newline character. This should be enough to get you going?
I have similar code (seek, read, repeat until done) in my own application, benchmarked
java.io streams against MappedByteBuffer in a production environment and posted the results on my blog (Geekomatic posts tagged 'java.nio' ) with raw data, graphs and all.
Two second summary? My MappedByteBuffer-based implementation was about 275% faster. YMMV.
To work for files larger than ~2GB, which is a problem because of the cast and .position(int pos), I've crafted paging algorithm backed by an array of MappedByteBuffers. You'll need to be working on a 64-bit system for this to work with files larger than 2-4GB because MBB's use the OS's virtual memory system to work their magic.
public class StusMagicLargeFileReader {
private static final long PAGE_SIZE = Integer.MAX_VALUE;
private List<MappedByteBuffer> buffers = new ArrayList<MappedByteBuffer>();
private final byte raw[] = new byte[1];
public static void main(String[] args) throws IOException {
File file = new File("/Users/stu/test.txt");
FileChannel fc = (new FileInputStream(file)).getChannel();
StusMagicLargeFileReader buffer = new StusMagicLargeFileReader(fc);
long position = file.length() / 2;
String candidate = buffer.getString(position--);
while (position >=0 && !candidate.equals('\n'))
candidate = buffer.getString(position--);
//have newline position or start of file...do other stuff
}
StusMagicLargeFileReader(FileChannel channel) throws IOException {
long start = 0, length = 0;
for (long index = 0; start + length < channel.size(); index++) {
if ((channel.size() / PAGE_SIZE) == index)
length = (channel.size() - index * PAGE_SIZE) ;
else
length = PAGE_SIZE;
start = index * PAGE_SIZE;
buffers.add(index, channel.map(READ_ONLY, start, length));
}
}
public String getString(long bytePosition) {
int page = (int) (bytePosition / PAGE_SIZE);
int index = (int) (bytePosition % PAGE_SIZE);
raw[0] = buffers.get(page).get(index);
return new String(raw);
}
}
I have the same problem. I am trying to find all lines that start with some prefix in a sorted file.
Here is a method I cooked up which is largely a port of Python code found here: http://www.logarithmic.net/pfh/blog/01186620415
I have tested it but not thoroughly just yet. It does not use memory mapping, though.
public static List<String> binarySearch(String filename, String string) {
List<String> result = new ArrayList<String>();
try {
File file = new File(filename);
RandomAccessFile raf = new RandomAccessFile(file, "r");
long low = 0;
long high = file.length();
long p = -1;
while (low < high) {
long mid = (low + high) / 2;
p = mid;
while (p >= 0) {
raf.seek(p);
char c = (char) raf.readByte();
//System.out.println(p + "\t" + c);
if (c == '\n')
break;
p--;
}
if (p < 0)
raf.seek(0);
String line = raf.readLine();
//System.out.println("-- " + mid + " " + line);
if (line.compareTo(string) < 0)
low = mid + 1;
else
high = mid;
}
p = low;
while (p >= 0) {
raf.seek(p);
if (((char) raf.readByte()) == '\n')
break;
p--;
}
if (p < 0)
raf.seek(0);
while (true) {
String line = raf.readLine();
if (line == null || !line.startsWith(string))
break;
result.add(line);
}
raf.close();
} catch (IOException e) {
System.out.println("IOException:");
e.printStackTrace();
}
return result;
}
I am not aware of any library that has that functionality. However, a correct code for a external binary search in Java should be similar to this:
class ExternalBinarySearch {
final RandomAccessFile file;
final Comparator<String> test; // tests the element given as search parameter with the line. Insert a PrefixComparator here
public ExternalBinarySearch(File f, Comparator<String> test) throws FileNotFoundException {
this.file = new RandomAccessFile(f, "r");
this.test = test;
}
public String search(String element) throws IOException {
long l = file.length();
return search(element, -1, l-1);
}
/**
* Searches the given element in the range [low,high]. The low value of -1 is a special case to denote the beginning of a file.
* In contrast to every other line, a line at the beginning of a file doesn't need a \n directly before the line
*/
private String search(String element, long low, long high) throws IOException {
if(high - low < 1024) {
// search directly
long p = low;
while(p < high) {
String line = nextLine(p);
int r = test.compare(line,element);
if(r > 0) {
return null;
} else if (r < 0) {
p += line.length();
} else {
return line;
}
}
return null;
} else {
long m = low + ((high - low) / 2);
String line = nextLine(m);
int r = test.compare(line, element);
if(r > 0) {
return search(element, low, m);
} else if (r < 0) {
return search(element, m, high);
} else {
return line;
}
}
}
private String nextLine(long low) throws IOException {
if(low == -1) { // Beginning of file
file.seek(0);
} else {
file.seek(low);
}
int bufferLength = 65 * 1024;
byte[] buffer = new byte[bufferLength];
int r = file.read(buffer);
int lineBeginIndex = -1;
// search beginning of line
if(low == -1) { //beginning of file
lineBeginIndex = 0;
} else {
//normal mode
for(int i = 0; i < 1024; i++) {
if(buffer[i] == '\n') {
lineBeginIndex = i + 1;
break;
}
}
}
if(lineBeginIndex == -1) {
// no line begins within next 1024 bytes
return null;
}
int start = lineBeginIndex;
for(int i = start; i < r; i++) {
if(buffer[i] == '\n') {
// Found end of line
return new String(buffer, lineBeginIndex, i - lineBeginIndex + 1);
return line.toString();
}
}
throw new IllegalArgumentException("Line to long");
}
}
Please note: I made up this code ad-hoc: Corner cases are not tested nearly good enough, the code assumes that no single line is larger than 64K, etc.
I also think that building an index of the offsets where lines start might be a good idea. For a 500 GB file, that index should be stored in an index file. You should gain a not-so-small constant factor with that index because than there is no need to search for the next line in each step.
I know that was not the question, but building a prefix tree data structure like (Patrica) Tries (on disk/SSD) might be a good idea to do the prefix search.
This is a simple example of what you want to achieve. I would probably first index the file, keeping track of the file position for each string. I'm assuming the strings are separated by newlines (or carriage returns):
RandomAccessFile file = new RandomAccessFile("filename.txt", "r");
List<Long> indexList = new ArrayList();
long pos = 0;
while (file.readLine() != null)
{
Long linePos = new Long(pos);
indexList.add(linePos);
pos = file.getFilePointer();
}
int indexSize = indexList.size();
Long[] indexArray = new Long[indexSize];
indexList.toArray(indexArray);
The last step is to convert to an array for a slight speed improvement when doing lots of lookups. I would probably convert the Long[] to a long[] also, but I did not show that above. Finally the code to read the string from a given indexed position:
int i; // Initialize this appropriately for your algorithm.
file.seek(indexArray[i]);
String line = file.readLine();
// At this point, line contains the string #i.
If you are dealing with a 500GB file, then you might want to use a faster lookup method than binary search - namely a radix sort which is essentially a variant of hashing. The best method for doing this really depends on your data distributions and types of lookup, but if you are looking for string prefixes there should be a good way to do this.
I posted an example of a radix sort solution for integers, but you can use the same idea - basically to cut down the sort time by dividing the data into buckets, then using O(1) lookup to retrieve the bucket of data that is relevant.
Option Strict On
Option Explicit On
Module Module1
Private Const MAX_SIZE As Integer = 100000
Private m_input(MAX_SIZE) As Integer
Private m_table(MAX_SIZE) As List(Of Integer)
Private m_randomGen As New Random()
Private m_operations As Integer = 0
Private Sub generateData()
' fill with random numbers between 0 and MAX_SIZE - 1
For i = 0 To MAX_SIZE - 1
m_input(i) = m_randomGen.Next(0, MAX_SIZE - 1)
Next
End Sub
Private Sub sortData()
For i As Integer = 0 To MAX_SIZE - 1
Dim x = m_input(i)
If m_table(x) Is Nothing Then
m_table(x) = New List(Of Integer)
End If
m_table(x).Add(x)
' clearly this is simply going to be MAX_SIZE -1
m_operations = m_operations + 1
Next
End Sub
Private Sub printData(ByVal start As Integer, ByVal finish As Integer)
If start < 0 Or start > MAX_SIZE - 1 Then
Throw New Exception("printData - start out of range")
End If
If finish < 0 Or finish > MAX_SIZE - 1 Then
Throw New Exception("printData - finish out of range")
End If
For i As Integer = start To finish
If m_table(i) IsNot Nothing Then
For Each x In m_table(i)
Console.WriteLine(x)
Next
End If
Next
End Sub
' run the entire sort, but just print out the first 100 for verification purposes
Private Sub test()
m_operations = 0
generateData()
Console.WriteLine("Time started = " & Now.ToString())
sortData()
Console.WriteLine("Time finished = " & Now.ToString & " Number of operations = " & m_operations.ToString())
' print out a random 100 segment from the sorted array
Dim start As Integer = m_randomGen.Next(0, MAX_SIZE - 101)
printData(start, start + 100)
End Sub
Sub Main()
test()
Console.ReadLine()
End Sub
End Module
I post a gist https://gist.github.com/mikee805/c6c2e6a35032a3ab74f643a1d0f8249c
that is rather complete example based on what I found on stack overflow and some blogs hopefully someone else can use it
import static java.nio.file.Files.isWritable;
import static java.nio.file.StandardOpenOption.READ;
import static org.apache.commons.io.FileUtils.forceMkdir;
import static org.apache.commons.io.IOUtils.closeQuietly;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.trimToNull;
import java.io.File;
import java.io.IOException;
import java.nio.Buffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
public class FileUtils {
private FileUtils() {
}
private static boolean found(final String candidate, final String prefix) {
return isBlank(candidate) || candidate.startsWith(prefix);
}
private static boolean before(final String candidate, final String prefix) {
return prefix.compareTo(candidate.substring(0, prefix.length())) < 0;
}
public static MappedByteBuffer getMappedByteBuffer(final Path path) {
FileChannel fileChannel = null;
try {
fileChannel = FileChannel.open(path, READ);
return fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()).load();
}
catch (Exception e) {
throw new RuntimeException(e);
}
finally {
closeQuietly(fileChannel);
}
}
public static String binarySearch(final String prefix, final MappedByteBuffer buffer) {
if (buffer == null) {
return null;
}
try {
long low = 0;
long high = buffer.limit();
while (low < high) {
int mid = (int) ((low + high) / 2);
final String candidate = getLine(mid, buffer);
if (found(candidate, prefix)) {
return trimToNull(candidate);
}
else if (before(candidate, prefix)) {
high = mid;
}
else {
low = mid + 1;
}
}
}
catch (Exception e) {
throw new RuntimeException(e);
}
return null;
}
private static String getLine(int position, final MappedByteBuffer buffer) {
// search backwards to the find the proceeding new line
// then search forwards again until the next new line
// return the string in between
final StringBuilder stringBuilder = new StringBuilder();
// walk it back
char candidate = (char)buffer.get(position);
while (position > 0 && candidate != '\n') {
candidate = (char)buffer.get(--position);
}
// we either are at the beginning of the file or a new line
if (position == 0) {
// we are at the beginning at the first char
candidate = (char)buffer.get(position);
stringBuilder.append(candidate);
}
// there is/are char(s) after new line / first char
if (isInBuffer(buffer, position)) {
//first char after new line
candidate = (char)buffer.get(++position);
stringBuilder.append(candidate);
//walk it forward
while (isInBuffer(buffer, position) && candidate != ('\n')) {
candidate = (char)buffer.get(++position);
stringBuilder.append(candidate);
}
}
return stringBuilder.toString();
}
private static boolean isInBuffer(final Buffer buffer, int position) {
return position + 1 < buffer.limit();
}
public static File getOrCreateDirectory(final String dirName) {
final File directory = new File(dirName);
try {
forceMkdir(directory);
isWritable(directory.toPath());
}
catch (IOException e) {
throw new RuntimeException(e);
}
return directory;
}
}
I had similar problem, so I created (Scala) library from solutions provided in this thread:
https://github.com/avast/BigMap
It contains utility for sorting huge file and binary search in this sorted file...
If you truly want to try memory mapping the file, I found a tutorial on how to use memory mapping in Java nio.