Popping/Crackling When Using a Java Source Data Line for Audio - java

I am having a problem with a Java Source Data Line. I need to play a tone, so I've created a Tone class that just represents a tone. Everything works fine, except that when I play the sound, the speakers pop at the beginning of the sound. Is there any way to fix this? It is for a research project and needs to run without the crackling, as that could affect results. Source code below. Thanks!
package edu.jhu.halberda.audiopanamath;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.FloatControl;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.SourceDataLine;
import javax.swing.JOptionPane;
public class Tone {
public enum Channel {
LEFT, RIGHT, STEREO
};
public static final float SAMPLE_RATE = 44104; // Should be a multiple of 8
protected byte[] buf;
protected int hz, msecs;
protected double vol;
protected Channel channel;
Tone() {
} // necessary so that subclasses don't complain
public Tone(int hz, int msecs, double vol, Tone.Channel channel) {
if (hz <= 0)
throw new IllegalArgumentException("Frequency <= 0 hz");
if (msecs <= 0)
throw new IllegalArgumentException("Duration <= 0 msecs");
if (vol > 1.0 || vol < 0.0)
throw new IllegalArgumentException("Volume out of range 0.0 - 1.0");
this.channel = channel;
this.hz = hz;
this.vol = vol;
this.msecs = msecs;
generateTone();
}
private void generateTone() {
int len = (int)Math.ceil((2 * SAMPLE_RATE * msecs / 1000.0d));
if (len % 2 == 1)
len = len + 1;
buf = new byte[len];
for (int i = 0; i < buf.length /2; i++) {
double angle = (i * hz / SAMPLE_RATE) * 2.0 * Math.PI;
buf[2*i + 1] = buf[2*i] = (byte) Math.round(Math.sin(angle) * 127.0 * vol);
}
}
public void play(SourceDataLine sdl) { // takes an opened SourceDataLine
FloatControl panControl = (FloatControl) sdl
.getControl(FloatControl.Type.PAN);
if (panControl != null) { // Preferred method using built in sound
// control, but not guaranteed to be
// available
if (channel == Channel.LEFT) {
panControl.setValue(-1);
} else if (channel == Channel.RIGHT) {
panControl.setValue(1);
} else {
panControl.setValue(0);
}
} else { // fallback method is directly manipulates the buffer
if (channel != Channel.STEREO) {
int nSilenceOffset;
byte nSilenceValue = 0;
if (channel == Channel.LEFT) {
nSilenceOffset = 1;
} else {
nSilenceOffset = 0;
}
for (int i = 0; i < buf.length; i += 2) {
buf[i + nSilenceOffset] = nSilenceValue;
}
}
}
sdl.write(buf, 0, buf.length);
sdl.drain();
}
public static void main(String[] args) {
AudioFormat af = new AudioFormat(Tone.SAMPLE_RATE, 8, 2, true, false);
SourceDataLine sdl;
try {
sdl = AudioSystem.getSourceDataLine(af);
} catch (LineUnavailableException e) {
JOptionPane.showMessageDialog(null, "Couldn't get sound line");
return;
}
try {
sdl.open(af);
} catch (LineUnavailableException e) {
JOptionPane.showMessageDialog(null, "Couldn't open sound line");
return;
}
sdl.start();
Tone left = new Tone(400, 2000, .5, Tone.Channel.LEFT);
System.out.println("Playing left");
long t = System.currentTimeMillis();
left.play(sdl);
System.out.println(System.currentTimeMillis()-t);
System.out.println("Finished left");
Tone right = new Tone(400, 2000, .5, Tone.Channel.RIGHT);
System.out.println("Playing right");
right.play(sdl);
System.out.println("Finished right");
sdl.stop();
sdl.close();
}
}

Try this variant, that uses a crude fade in/fade out effect.
private void generateTone() {
int len = (int)Math.ceil((2 * SAMPLE_RATE * msecs / 1000.0d));
if (len % 2 == 1)
len = len + 1;
buf = new byte[len];
int fadeCount = 1600;
for (int i = 0; i < buf.length /2; i++) {
double fadeRate = 1.0;
double angle = (i * hz / SAMPLE_RATE) * 2.0 * Math.PI;
if (i<fadeCount) {
fadeRate = (double)i/(double)fadeCount;
} else if (i>(buf.length/2)-fadeCount) {
int bufLength = buf.length;
int buf = bufLength/2;
int countDown = buf-i;
fadeRate = (double)countDown/(double)(fadeCount);
}
buf[2*i + 1] = buf[2*i] = (byte) Math.round(
Math.cos(angle) * 127.0 * vol * fadeRate);
}
}

Related

Split wav files based on Silence in Java

I want to split wav files in to number of waves files, each of them
is split
by silence between words, I tried to code a bit. I managed to split the files
But things got complex when trying to dedect silence.. any help?
Thanks
Splits WAV-files in multiple parts. This class splits a big WAV-file in
multiple WAV-file, each with a fixed length (SPLIT_FILE_LENGTH_MS). It
takes
it input file from an embedded resource, and writes a series of out*.wav files.
heres my code so far:
public class WaveSplitter {
public static int SPLIT_FILE_LENGTH_MS = 0;
public static final String INPUT_FILE_LOCATION = "resources/AUD-20171027-WA0001.wav";
public static void main(String[] args) {
try {
// Get the wave file from the embedded resources
URL defaultImage =
WaveSplitter.class.getResource(INPUT_FILE_LOCATION);
// GSpeechDuplex duplex = new
// GSpeechDuplex("AIzaSyDHQsnCHDk71x-Dpp05IIK3tYNOEP84z1s");
// duplex.setLanguage("heb");
File audioFile = new File(defaultImage.toURI());
File soundFile = new File(defaultImage.toURI());
FileInputStream fileInputStream = null;
long duration = 0;
try {
fileInputStream = new FileInputStream(soundFile);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
try {
duration =
Objects.requireNonNull(fileInputStream).getChannel().size() / 128;
} catch (IOException e) {
}
AudioInputStream stream;
WavFile wavFile = WavFile.openWavFile(new File(defaultImage.toURI()));
AudioInputStream audioInputStream =
AudioSystem.getAudioInputStream(soundFile);
AudioFormat format = audioInputStream.getFormat();
long frames = audioInputStream.getFrameLength();
double durationInSeconds = (frames + 0.0) / format.getFrameRate();
System.out.println("Duration " + durationInSeconds + " seconds \n");
durationInSeconds = durationInSeconds * 1000;
SPLIT_FILE_LENGTH_MS = (int) durationInSeconds;
// wavFile.display();
// Get the number of audio channels in the wav file
int numberChannels = wavFile.getNumChannels();
System.out.println("number channels is " + numberChannels);
System.out.println("SPLIT_FILE_LENGTH_MS is " + SPLIT_FILE_LENGTH_MS);
int framesRead;
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
byte[] bytes = FileUtils.readFileToByteArray(soundFile);
// String encoded = Base64.encodeToString(bytes, 0);
// playSoundFile(soundFile);
WavFile inputWavFile = WavFile.openWavFile(soundFile);
// File audioFile = new File("AUD-20171027-WA0001.wav");
// sendToGoogleViaOnvegoServer(myByteData);
/// System.out.println("Seneteces:\n " + bytes);
// Get the number of audio channels in the wav file
int numChannels = inputWavFile.getNumChannels();
// set the maximum number of frames for a target file,
// based on the number of milliseconds assigned for each file
int maxFramesPerFile = (int) inputWavFile.getSampleRate() *
SPLIT_FILE_LENGTH_MS / 1000;
System.out.println("maxFramesPerFile is " + maxFramesPerFile + "\n");
// Create a buffer of maxFramesPerFile frames
double[] buffer = new double[maxFramesPerFile * numChannels];
// sendToGoogleViaOnvegoServer(myByteData);
framesRead = inputWavFile.readFrames(buffer, maxFramesPerFile);
// System.out.print(buffer.length);
int fileCount = 0;
int s = 0;
int j = 0;
int timercount = 0;
int audiorecord = 0;
int count = 0;
// SPLIT_FILE_LENGTH_MS = 0;
double maxlengh = framesRead * numChannels;
System.out.print("Audio Duration " + maxFramesPerFile + "\n");
int arraylengh = framesRead * numChannels;
System.out.print("frame*channels " + framesRead * numChannels);
///loop to find silent places
do {
// Read frames into buffer
framesRead = inputWavFile.readFrames(buffer, maxFramesPerFile);
// System.out.print(framesRead);
for (s = 0; s < arraylengh; s++) {
if (buffer[s] == 0) {
// System.out.print(s + ", " + buffer[s] + "\n");
count++;
timercount++;
}
else {
for (j = s; j < arraylengh; j++) {
if (buffer[j] != 0)
{
} else {
SPLIT_FILE_LENGTH_MS = 1500;
maxFramesPerFile = (int)
inputWavFile.getSampleRate() * SPLIT_FILE_LENGTH_MS /
1000;
framesRead = inputWavFile.readFrames(buffer,
maxFramesPerFile);
// framesRead = inputWavFile.readFrames(buffer,
maxFramesPerFile);
WavFile outputWavFile = WavFile.newWavFile(new
File("out" + (fileCount + 1) + ".wav"),
inputWavFile.getNumChannels(), framesRead,
inputWavFile.getValidBits(),
inputWavFile.getSampleRate());
// Write the buffer
outputWavFile.writeFrames(buffer, framesRead);
outputWavFile.close();
return;
}
}
}
// System.out.print(maxFramesPerFile);
}
// SPLIT_FILE_LENGTH_MS=1000;
maxFramesPerFile = (int) inputWavFile.getSampleRate() *
SPLIT_FILE_LENGTH_MS / 1000;
framesRead = inputWavFile.readFrames(buffer, maxFramesPerFile);
WavFile outputWavFile = WavFile.newWavFile(new File("out" + (
fileCount + 1) + ".wav"),
inputWavFile.getNumChannels(), framesRead,
inputWavFile.getValidBits(),
inputWavFile.getSampleRate());
// Write the buffer
outputWavFile.writeFrames(buffer, framesRead);
outputWavFile.close();
fileCount++;
// System.out.print(fileCount);
} while (framesRead != 0);
System.out.print(count + "\n");
System.out.print(fileCount);
//catch error
} catch (Exception e) {
System.err.println(e);
}
}
// Play sound function
public static void playSoundFile(File soundFile)
throws UnsupportedAudioFileException, IOException, LineUnavailableException {
try {
final Clip clip = (Clip) AudioSystem.getLine(new Info(Clip.class));
clip.removeLineListener(new LineListener() {
#Override
public void update(LineEvent event) {
if (event.getType() == LineEvent.Type.STOP)
clip.close();
}
});
clip.open(AudioSystem.getAudioInputStream(soundFile));
clip.start();
} catch (Exception exc) {
exc.printStackTrace(System.out);
}
}
}

How to create an audio wave in JavaFX?

I would like to get an Audio wave plot using Chart Area in JavaFX. Unfortunately, I am not clear how to do, what are the values ​​to be extracted from the sound to assign to x-axis and y-axis?
I tried to read other posts, but I found nothing on javafx.
You can help me?
Sample Image:
Below is the code that extract the waveform .
I'm pulling out the right parameters for my scope?
How can I use it to print the graph with JavaFX?
public class SimpleWaveformExtractor implements WaveformExtractor {
private static final int DEFAULT_BUFFER_SIZE = 32768;
#Override
public double[] extract(File inputFile) {
AudioInputStream in = null;
try {
in = AudioSystem.getAudioInputStream(inputFile);
} catch (Exception e) {
System.out.println("Cannot read audio file");
return new double[0];
}
AudioFormat format = in.getFormat();
byte[] audioBytes = readBytes(in);
int[] result = null;
if (format.getSampleSizeInBits() == 16) {
int samplesLength = audioBytes.length / 2;
result = new int[samplesLength];
if (format.isBigEndian()) {
for (int i = 0; i < samplesLength; ++i) {
byte MSB = audioBytes[i * 2];
byte LSB = audioBytes[i * 2 + 1];
result[i] = MSB << 8 | (255 & LSB);
}
} else {
for (int i = 0; i < samplesLength; i += 2) {
byte LSB = audioBytes[i * 2];
byte MSB = audioBytes[i * 2 + 1];
result[i / 2] = MSB << 8 | (255 & LSB);
}
}
} else {
int samplesLength = audioBytes.length;
result = new int[samplesLength];
if (format.getEncoding().toString().startsWith("PCM_SIGN")) {
for (int i = 0; i < samplesLength; ++i) {
result[i] = audioBytes[i];
}
} else {
for (int i = 0; i < samplesLength; ++i) {
result[i] = audioBytes[i] - 128;
}
}
}
return ArraysHelper.normalize(result);
}
private byte[] readBytes(AudioInputStream in) {
byte[] result = new byte[0];
byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
try {
int bytesRead = 0;
do {
bytesRead = in.read(buffer);
result = ArrayUtils.addAll(result, buffer);
} while (bytesRead != -1);
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
}
this is the interface:
public interface WaveformExtractor {
double[] extract(File in);
}
This is the code that return the array of double:
private double[] extractWaveform(File file) throws IOException, UnsupportedAudioFileException {
return new WavFileExtractor().extract(file);
}

Finding time difference between two audio spikes? [duplicate]

How can I detect silence when recording operation is started in Java? What is PCM data? How can I calculate PCM data in Java?
I found the solution :
package bemukan.voiceRecognition.speechToText;
import javax.sound.sampled.*;
import java.io.*;
public class RecordAudio {
private File audioFile;
protected boolean running;
private ByteArrayOutputStream out;
private AudioInputStream inputStream;
final static float MAX_8_BITS_SIGNED = Byte.MAX_VALUE;
final static float MAX_8_BITS_UNSIGNED = 0xff;
final static float MAX_16_BITS_SIGNED = Short.MAX_VALUE;
final static float MAX_16_BITS_UNSIGNED = 0xffff;
private AudioFormat format;
private float level;
private int frameSize;
public RecordAudio(){
getFormat();
}
private AudioFormat getFormat() {
File file = new File("src/Facebook/1.wav");
AudioInputStream stream;
try {
stream = AudioSystem.getAudioInputStream(file);
format=stream.getFormat();
frameSize=stream.getFormat().getFrameSize();
return stream.getFormat();
} catch (UnsupportedAudioFileException e) {
} catch (IOException e) {
}
return null;
}
public void stopAudio() {
running = false;
}
public void recordAudio() {
try {
final AudioFormat format = getFormat();
DataLine.Info info = new DataLine.Info(
TargetDataLine.class, format);
final TargetDataLine line = (TargetDataLine)
AudioSystem.getLine(info);
line.open(format);
line.start();
Runnable runner = new Runnable() {
int bufferSize = (int) format.getSampleRate()
* format.getFrameSize();
byte buffer[] = new byte[bufferSize];
public void run() {
int readPoint = 0;
out = new ByteArrayOutputStream();
running = true;
int sum=0;
while (running) {
int count =
line.read(buffer, 0, buffer.length);
calculateLevel(buffer,0,0);
System.out.println(level);
if (count > 0) {
out.write(buffer, 0, count);
}
}
line.stop();
}
};
Thread captureThread = new Thread(runner);
captureThread.start();
} catch (LineUnavailableException e) {
System.err.println("Line unavailable: " + e);
System.exit(-2);
}
}
public File getAudioFile() {
byte[] audio = out.toByteArray();
InputStream input = new ByteArrayInputStream(audio);
try {
final AudioFormat format = getFormat();
final AudioInputStream ais =
new AudioInputStream(input, format,
audio.length / format.getFrameSize());
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, new File("temp.wav"));
input.close();
System.out.println("New file created!");
} catch (IOException e) {
System.out.println(e.getMessage());
}
return new File("temp.wav");
}
private void calculateLevel (byte[] buffer,
int readPoint,
int leftOver) {
int max = 0;
boolean use16Bit = (format.getSampleSizeInBits() == 16);
boolean signed = (format.getEncoding() ==
AudioFormat.Encoding.PCM_SIGNED);
boolean bigEndian = (format.isBigEndian());
if (use16Bit) {
for (int i=readPoint; i<buffer.length-leftOver; i+=2) {
int value = 0;
// deal with endianness
int hiByte = (bigEndian ? buffer[i] : buffer[i+1]);
int loByte = (bigEndian ? buffer[i+1] : buffer [i]);
if (signed) {
short shortVal = (short) hiByte;
shortVal = (short) ((shortVal << 8) | (byte) loByte);
value = shortVal;
} else {
value = (hiByte << 8) | loByte;
}
max = Math.max(max, value);
} // for
} else {
// 8 bit - no endianness issues, just sign
for (int i=readPoint; i<buffer.length-leftOver; i++) {
int value = 0;
if (signed) {
value = buffer [i];
} else {
short shortVal = 0;
shortVal = (short) (shortVal | buffer [i]);
value = shortVal;
}
max = Math.max (max, value);
} // for
} // 8 bit
// express max as float of 0.0 to 1.0 of max value
// of 8 or 16 bits (signed or unsigned)
if (signed) {
if (use16Bit) { level = (float) max / MAX_16_BITS_SIGNED; }
else { level = (float) max / MAX_8_BITS_SIGNED; }
} else {
if (use16Bit) { level = (float) max / MAX_16_BITS_UNSIGNED; }
else { level = (float) max / MAX_8_BITS_UNSIGNED; }
}
} // calculateLevel
}
How can I detect silence when recording operation is started in Java?
Calculate the dB or RMS value for a group of sound frames and decide at what level it is considered to be 'silence'.
What is PCM data?
Data that is in Pulse-code modulation format.
How can I calculate PCM data in Java?
I do not understand that question. But guessing it has something to do with the speech-recognition tag, I have some bad news. This might theoretically be done using the Java Speech API. But there are apparently no 'speech to text' implementations available for the API (only 'text to speech').
I have to calculate rms for speech-recognition project. But I do not know how can I calculate in Java.
For a single channel that is represented by signal sizes in a double ranging from -1 to 1, you might use this method.
/** Computes the RMS volume of a group of signal sizes ranging from -1 to 1. */
public double volumeRMS(double[] raw) {
double sum = 0d;
if (raw.length==0) {
return sum;
} else {
for (int ii=0; ii<raw.length; ii++) {
sum += raw[ii];
}
}
double average = sum/raw.length;
double sumMeanSquare = 0d;
for (int ii=0; ii<raw.length; ii++) {
sumMeanSquare += Math.pow(raw[ii]-average,2d);
}
double averageMeanSquare = sumMeanSquare/raw.length;
double rootMeanSquare = Math.sqrt(averageMeanSquare);
return rootMeanSquare;
}
There is a byte buffer to save input values from the line, and what I should have to do with this buffer?
If using the volumeRMS(double[]) method, convert the byte values to an array of double values ranging from -1 to 1. ;)
You need to catch the value like a number silence is zero or near
Please adapt your code to your requirement!!!
In this case a variable named UMBRAL (Threshold in spanish)...
Suppose that you have access to WAV file like bytes ByteHeader...
private Integer Byte2PosIntBig(byte Byte24, byte Byte16, byte Byte08, byte Byte00) {
return new Integer (
((Byte24) << 24)|
((Byte16 & 0xFF) << 16)|
((Byte08 & 0xFF) << 8)|
((Byte00 & 0xFF) << 0));
}
Before ....
RandomAccessFile RAFSource = new RandomAccessFile("your old file wav", "r");
Begins here...
int PSData = 44;
byte[] Bytes = new byte[4];
byte[] ByteHeader = new byte[44];
RAFSource.seek(0);
RAFSource.read(ByteHeader);
int WavSize = Byte2PosIntBig(ByteHeader[43],ByteHeader[42],ByteHeader[41],ByteHeader[40]);
int NumBits = Byte2PosIntBig(ByteHeader[35],ByteHeader[34]);
int NumByte = NumBits/8;
for (int i = PSData;i < PSData+WavSize;i+=NumByte) {
int WavSample = 0;
int WavResultI =0;
int WavResultO = 0;
if (NumByte == 2) {
RAFSource.seek(i);
Bytes[0] = RAFSource.readByte();
Bytes[1] = RAFSource.readByte();
WavSample = (int)(((Bytes[1]) << 8)|((Bytes[0] & 0xFF) << 0));
if (Math.abs(WavSample) < UMBRAL) {
//SILENCE DETECTED!!!
}
} else {
RAFSource.seek(i);
WavSample = (short)(RAFSource.readByte() & 0xFF);
short sSamT = (short)WavSample;
sSamT += 128;
double dSamD = (double)sSamT*Multiplier;
if ((double)sSamT < UMBRAL) {
//SILENCE DETECTED!!!
}
}

Play a WAV File Mulitple Times Through AudioTrack

I am making a metronome app that uses AudioTrack to play different sounds for different instruments. I want to use a .wav file that I have of one hit on a cowbell for one of the options. I can get the first cowbell hit to work but then it doesn't play any after that.
MainActivity.java
metronome.playInstrumental(getResources().openRawResource(R.raw.cowbell));
Metronome.java
public Metronome(Handler handler) {
audioGenerator.createPlayer();
this.mHandler = handler;
}
public void calcSilence() {
silence = (int) (((60 / bpm) * 8000) - tick);
soundTickArray = new double[this.tick];
soundTockArray = new double[this.tick];
silenceSoundArray = new double[this.silence];
double[] tick = audioGenerator.getSineWave(this.tick, 8000, beatSound);
double[] tock = audioGenerator.getSineWave(this.tick, 8000, electronicSound);
for(int i = 0; i < this.tick; i++) {
soundTickArray[i] = tick[i];
soundTockArray[i] = tock[i];
}
for(int i = 0; i < silence; i++)
silenceSoundArray[i] = 0;
}
public void playInstrumental(InputStream inputStream) {
calcSilence();
do {
msg = new Message();
msg.obj = ""+currentBeat;
audioGenerator.playSound(inputStream);
mHandler.sendMessage(msg);
audioGenerator.writeSound(silenceSoundArray);
} while(play);
}
AudioGenerator.java
public class AudioGenerator {
private int sampleRate;
private AudioTrack audioTrack;
public AudioGenerator(int sampleRate) {
this.sampleRate = sampleRate;
}
public double[] getSineWave(int samples, int sampleRate, double frequencyOfTone){
double[] sample = new double[samples];
for (int i = 0; i < samples; i++) {
sample[i] = Math.sin(2 * Math.PI * i / (sampleRate / frequencyOfTone));
}
return sample;
}
public byte[] get16BitPcm(double[] samples) {
byte[] generatedSound = new byte[2 * samples.length];
int index = 0;
for (double sample : samples) {
// scale to maximum amplitude
short maxSample = (short) ((sample * Short.MAX_VALUE));
// in 16 bit wav PCM, first byte is the low order byte
generatedSound[index++] = (byte) (maxSample & 0x00ff);
generatedSound[index++] = (byte) ((maxSample & 0xff00) >>> 8);
}
return generatedSound;
}
public void createPlayer(){
audioTrack = new AudioTrack(AudioManager.STREAM_MUSIC,
sampleRate, AudioFormat.CHANNEL_OUT_MONO,
AudioFormat.ENCODING_PCM_16BIT, sampleRate,
AudioTrack.MODE_STREAM);
audioTrack.play();
}
public void writeSound(double[] samples) {
byte[] generatedSnd = get16BitPcm(samples);
audioTrack.write(generatedSnd, 0, generatedSnd.length);
}
public void destroyAudioTrack() {
audioTrack.stop();
audioTrack.release();
}
public void playSound(InputStream inputStream) {
int i = 0;
int bufferSize = 512;
byte [] buffer = new byte[bufferSize];
try {
while((i = inputStream.read(buffer)) != -1) {
audioTrack.write(buffer, 0, i);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
I was able to get it to work using this:
public void playSound(InputStream inputStream) {
int bufferSize = 512;
byte[] buffer = new byte[bufferSize];
i = 0;
try {
while((i = inputStream.read(buffer)) > -1) {
audioTrack.write(buffer, 0, i);
}
inputStream.reset();
} catch (IOException e) {
e.printStackTrace();
}
}

Detect silence when recording

How can I detect silence when recording operation is started in Java? What is PCM data? How can I calculate PCM data in Java?
I found the solution :
package bemukan.voiceRecognition.speechToText;
import javax.sound.sampled.*;
import java.io.*;
public class RecordAudio {
private File audioFile;
protected boolean running;
private ByteArrayOutputStream out;
private AudioInputStream inputStream;
final static float MAX_8_BITS_SIGNED = Byte.MAX_VALUE;
final static float MAX_8_BITS_UNSIGNED = 0xff;
final static float MAX_16_BITS_SIGNED = Short.MAX_VALUE;
final static float MAX_16_BITS_UNSIGNED = 0xffff;
private AudioFormat format;
private float level;
private int frameSize;
public RecordAudio(){
getFormat();
}
private AudioFormat getFormat() {
File file = new File("src/Facebook/1.wav");
AudioInputStream stream;
try {
stream = AudioSystem.getAudioInputStream(file);
format=stream.getFormat();
frameSize=stream.getFormat().getFrameSize();
return stream.getFormat();
} catch (UnsupportedAudioFileException e) {
} catch (IOException e) {
}
return null;
}
public void stopAudio() {
running = false;
}
public void recordAudio() {
try {
final AudioFormat format = getFormat();
DataLine.Info info = new DataLine.Info(
TargetDataLine.class, format);
final TargetDataLine line = (TargetDataLine)
AudioSystem.getLine(info);
line.open(format);
line.start();
Runnable runner = new Runnable() {
int bufferSize = (int) format.getSampleRate()
* format.getFrameSize();
byte buffer[] = new byte[bufferSize];
public void run() {
int readPoint = 0;
out = new ByteArrayOutputStream();
running = true;
int sum=0;
while (running) {
int count =
line.read(buffer, 0, buffer.length);
calculateLevel(buffer,0,0);
System.out.println(level);
if (count > 0) {
out.write(buffer, 0, count);
}
}
line.stop();
}
};
Thread captureThread = new Thread(runner);
captureThread.start();
} catch (LineUnavailableException e) {
System.err.println("Line unavailable: " + e);
System.exit(-2);
}
}
public File getAudioFile() {
byte[] audio = out.toByteArray();
InputStream input = new ByteArrayInputStream(audio);
try {
final AudioFormat format = getFormat();
final AudioInputStream ais =
new AudioInputStream(input, format,
audio.length / format.getFrameSize());
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, new File("temp.wav"));
input.close();
System.out.println("New file created!");
} catch (IOException e) {
System.out.println(e.getMessage());
}
return new File("temp.wav");
}
private void calculateLevel (byte[] buffer,
int readPoint,
int leftOver) {
int max = 0;
boolean use16Bit = (format.getSampleSizeInBits() == 16);
boolean signed = (format.getEncoding() ==
AudioFormat.Encoding.PCM_SIGNED);
boolean bigEndian = (format.isBigEndian());
if (use16Bit) {
for (int i=readPoint; i<buffer.length-leftOver; i+=2) {
int value = 0;
// deal with endianness
int hiByte = (bigEndian ? buffer[i] : buffer[i+1]);
int loByte = (bigEndian ? buffer[i+1] : buffer [i]);
if (signed) {
short shortVal = (short) hiByte;
shortVal = (short) ((shortVal << 8) | (byte) loByte);
value = shortVal;
} else {
value = (hiByte << 8) | loByte;
}
max = Math.max(max, value);
} // for
} else {
// 8 bit - no endianness issues, just sign
for (int i=readPoint; i<buffer.length-leftOver; i++) {
int value = 0;
if (signed) {
value = buffer [i];
} else {
short shortVal = 0;
shortVal = (short) (shortVal | buffer [i]);
value = shortVal;
}
max = Math.max (max, value);
} // for
} // 8 bit
// express max as float of 0.0 to 1.0 of max value
// of 8 or 16 bits (signed or unsigned)
if (signed) {
if (use16Bit) { level = (float) max / MAX_16_BITS_SIGNED; }
else { level = (float) max / MAX_8_BITS_SIGNED; }
} else {
if (use16Bit) { level = (float) max / MAX_16_BITS_UNSIGNED; }
else { level = (float) max / MAX_8_BITS_UNSIGNED; }
}
} // calculateLevel
}
How can I detect silence when recording operation is started in Java?
Calculate the dB or RMS value for a group of sound frames and decide at what level it is considered to be 'silence'.
What is PCM data?
Data that is in Pulse-code modulation format.
How can I calculate PCM data in Java?
I do not understand that question. But guessing it has something to do with the speech-recognition tag, I have some bad news. This might theoretically be done using the Java Speech API. But there are apparently no 'speech to text' implementations available for the API (only 'text to speech').
I have to calculate rms for speech-recognition project. But I do not know how can I calculate in Java.
For a single channel that is represented by signal sizes in a double ranging from -1 to 1, you might use this method.
/** Computes the RMS volume of a group of signal sizes ranging from -1 to 1. */
public double volumeRMS(double[] raw) {
double sum = 0d;
if (raw.length==0) {
return sum;
} else {
for (int ii=0; ii<raw.length; ii++) {
sum += raw[ii];
}
}
double average = sum/raw.length;
double sumMeanSquare = 0d;
for (int ii=0; ii<raw.length; ii++) {
sumMeanSquare += Math.pow(raw[ii]-average,2d);
}
double averageMeanSquare = sumMeanSquare/raw.length;
double rootMeanSquare = Math.sqrt(averageMeanSquare);
return rootMeanSquare;
}
There is a byte buffer to save input values from the line, and what I should have to do with this buffer?
If using the volumeRMS(double[]) method, convert the byte values to an array of double values ranging from -1 to 1. ;)
You need to catch the value like a number silence is zero or near
Please adapt your code to your requirement!!!
In this case a variable named UMBRAL (Threshold in spanish)...
Suppose that you have access to WAV file like bytes ByteHeader...
private Integer Byte2PosIntBig(byte Byte24, byte Byte16, byte Byte08, byte Byte00) {
return new Integer (
((Byte24) << 24)|
((Byte16 & 0xFF) << 16)|
((Byte08 & 0xFF) << 8)|
((Byte00 & 0xFF) << 0));
}
Before ....
RandomAccessFile RAFSource = new RandomAccessFile("your old file wav", "r");
Begins here...
int PSData = 44;
byte[] Bytes = new byte[4];
byte[] ByteHeader = new byte[44];
RAFSource.seek(0);
RAFSource.read(ByteHeader);
int WavSize = Byte2PosIntBig(ByteHeader[43],ByteHeader[42],ByteHeader[41],ByteHeader[40]);
int NumBits = Byte2PosIntBig(ByteHeader[35],ByteHeader[34]);
int NumByte = NumBits/8;
for (int i = PSData;i < PSData+WavSize;i+=NumByte) {
int WavSample = 0;
int WavResultI =0;
int WavResultO = 0;
if (NumByte == 2) {
RAFSource.seek(i);
Bytes[0] = RAFSource.readByte();
Bytes[1] = RAFSource.readByte();
WavSample = (int)(((Bytes[1]) << 8)|((Bytes[0] & 0xFF) << 0));
if (Math.abs(WavSample) < UMBRAL) {
//SILENCE DETECTED!!!
}
} else {
RAFSource.seek(i);
WavSample = (short)(RAFSource.readByte() & 0xFF);
short sSamT = (short)WavSample;
sSamT += 128;
double dSamD = (double)sSamT*Multiplier;
if ((double)sSamT < UMBRAL) {
//SILENCE DETECTED!!!
}
}

Categories

Resources