Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 423d840d83 | |||
| 1997b0bf9b | |||
| b206fb33aa | |||
| 11b26e396c |
38
README.md
38
README.md
@@ -29,12 +29,13 @@ From here on much is controlled by key presses.
|
||||
appended to the currently selected chapter, or to the last chapter if none is selected.
|
||||
* Press and hold "T" to record a new phrase that is the start of a new paragraph. This adds the "post paragraph" gap to the previous sentence. Otherwise it does the same as "R".
|
||||
* Press and hold "F" to record a "continuation" phrase. This sets the previous phrase's post-gap to be the "short" gap instead of the normal length gap.
|
||||
* Press and hold "Y" to record a new phrase that is the start of a new section. This add the "post section" gap to the previous sentence. Otherwise it does the same as "R".
|
||||
* Press "D" to delete the last phrase you recorded.
|
||||
* Press "E" to re-record the currently selected phrase.
|
||||
|
||||
Each phrase you record will be briefly analysed using FFT to find the start and end of the audio and set
|
||||
Each phrase you record can be automatically analysed to find the start and end of the audio and set
|
||||
crop marks appropriately. These can be adjusted in the waveform display when a phrase is selected. You can also
|
||||
re-run the analysis using either the default FFT method or using a peak detector method (finding the first and last points
|
||||
re-run the analysis using either FFT or a peak detector method (finding the first and last points
|
||||
where the audio amplitude rises above the backround noise).
|
||||
|
||||
The phrases also have a "post gap" associated with them. This is the amount of room noise (in milliseconds) to place between
|
||||
@@ -53,18 +54,14 @@ edit the text of this ID to identify the recordings. You
|
||||
may, for instance, change it to have the same text as the
|
||||
audio contains.
|
||||
|
||||
To help with this the Haven On-Demand online speech recognition
|
||||
service is integrated with the system and can be used to try and convert the
|
||||
audio into text. Right clicking on a recording brings
|
||||
up a menu which includes the option to try and convert
|
||||
the audio into text. The detected text is then used to
|
||||
replace the current recording ID / text.
|
||||
The audio can also be automatically converted to text if you have an suitable command-line
|
||||
executable that will work. One example is (on Linux) [DeepSpeech](https://github.com/mozilla/DeepSpeech) by Mozilla.
|
||||
|
||||
File layout
|
||||
-----------
|
||||
|
||||
All data is stored in your "storage" directory (specified in Options). Each book (which is a directory named after the
|
||||
title of the book) has an associated XML file (audiobook.abk) and a directory "files" where all the audio (stored as WAV
|
||||
title of the book) has an associated XML file (audiobook.abx) and a directory "files" where all the audio (stored as WAV
|
||||
files) is placed.
|
||||
|
||||
When you export the book as MP3 a new folder "export" is created within the book's folder where the MP3 files are placed.
|
||||
@@ -81,3 +78,26 @@ Building
|
||||
5. Build with `ant build`
|
||||
6. Run with `java -jar ./AudiobookRecorder.jar`
|
||||
|
||||
|
||||
----
|
||||
|
||||
Extra Resources
|
||||
===============
|
||||
|
||||
* DeepSpeech wrapper script
|
||||
|
||||
This is a small script that will convert the audio into a format DeepSpeech likes and call the `deepspeech` executable, removing any extra rubbish from the output. It
|
||||
also requires `sox` to be installed for the audio conversion.
|
||||
|
||||
```
|
||||
#!/bin/bash
|
||||
|
||||
ID=$$
|
||||
FILE=$1
|
||||
BINPATH=${HOME}/local/bin
|
||||
MODELS=${HOME}/ds/deepspeech-0.6.1-models
|
||||
|
||||
sox "$FILE" -r 16000 -c 1 -b 16 "/tmp/ds-${ID}.wav"
|
||||
${BINPATH}/deepspeech --model ${MODELS}/output_graph.pbmm --lm ${MODELS}/lm.binary --trie ${MODELS}/trie --audio "/tmp/ds-${ID}.wav" 2>/dev/null
|
||||
rm /tmp/ds-${ID}.wav
|
||||
```
|
||||
|
||||
@@ -1 +1 @@
|
||||
version=0.3.7
|
||||
version=0.3.8
|
||||
|
||||
@@ -1684,10 +1684,11 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
|
||||
Sentence snt = (Sentence)s.nextElement();
|
||||
if (!snt.isLocked()) {
|
||||
if (!snt.beenDetected()) {
|
||||
Debug.d("Queueing recognition of", snt.getId());
|
||||
Runnable r = snt.getRecognitionRunnable();
|
||||
snt.setQueued();
|
||||
queueJob(r);
|
||||
queueJob(new SentenceJob(snt) {
|
||||
public void run() {
|
||||
sentence.doRecognition();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2096,7 +2097,7 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
|
||||
if (recording == null) return;
|
||||
recording.stopRecording();
|
||||
|
||||
book.reloadTree();
|
||||
// book.reloadTree();
|
||||
|
||||
bookTree.expandPath(new TreePath(((DefaultMutableTreeNode)recording.getParent()).getPath()));
|
||||
bookTree.setSelectionPath(new TreePath(recording.getPath()));
|
||||
|
||||
@@ -450,6 +450,7 @@ public class Book extends BookTreeNode {
|
||||
}
|
||||
|
||||
public void reloadTree() {
|
||||
Debug.trace();
|
||||
SwingUtilities.invokeLater(new Runnable() {
|
||||
public void run() {
|
||||
AudiobookRecorder.window.bookTreeModel.reload(Book.this);
|
||||
|
||||
@@ -222,7 +222,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
if (text.equals("")) text = id;
|
||||
|
||||
if ((crossStartOffset == -1) || (crossEndOffset == -1)) {
|
||||
updateCrossings(true);
|
||||
updateCrossings();
|
||||
}
|
||||
|
||||
if (runtime <= 0.01d) getLength();
|
||||
@@ -260,26 +260,40 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
CacheManager.removeFromCache(this);
|
||||
|
||||
if (!id.equals("room-noise")) {
|
||||
autoTrimSample(true);
|
||||
autoTrimSample();
|
||||
if (Options.getBoolean("process.sphinx")) {
|
||||
recognise();
|
||||
AudiobookRecorder.window.queueJob(new SentenceJob(this) {
|
||||
public void run() {
|
||||
sentence.doRecognition();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void autoTrimSample() {
|
||||
public void autoTrimSample(boolean ignored) {
|
||||
Debug.trace();
|
||||
autoTrimSample(false);
|
||||
autoTrimSample();
|
||||
}
|
||||
|
||||
public void autoTrimSample(boolean useRaw) {
|
||||
public void autoTrimSample() {
|
||||
Debug.trace();
|
||||
String tm = Options.get("audio.recording.trim");
|
||||
if (tm.equals("peak")) {
|
||||
autoTrimSamplePeak(useRaw);
|
||||
AudiobookRecorder.window.queueJob(new SentenceJob(this) {
|
||||
public void run() {
|
||||
sentence.autoTrimSamplePeak();
|
||||
AudiobookRecorder.window.updateWaveformMarkers();
|
||||
}
|
||||
});
|
||||
} else if (tm.equals("fft")) {
|
||||
autoTrimSampleFFT(useRaw);
|
||||
AudiobookRecorder.window.queueJob(new SentenceJob(this) {
|
||||
public void run() {
|
||||
sentence.autoTrimSampleFFT();
|
||||
AudiobookRecorder.window.updateWaveformMarkers();
|
||||
}
|
||||
});
|
||||
} else {
|
||||
startOffset = 0;
|
||||
crossStartOffset = 0;
|
||||
@@ -288,13 +302,13 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
processed = false;
|
||||
// peak = -1d;
|
||||
}
|
||||
AudiobookRecorder.window.updateWaveform(true);
|
||||
}
|
||||
|
||||
public static final int FFTBuckets = 1024;
|
||||
|
||||
public void autoTrimSampleFFT() {
|
||||
public void autoTrimSampleFFT(boolean ignored) {
|
||||
Debug.trace();
|
||||
autoTrimSampleFFT(false);
|
||||
}
|
||||
|
||||
public double bucketDifference(double[] a, double[] b) {
|
||||
@@ -308,16 +322,12 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
return diff;
|
||||
}
|
||||
|
||||
public void autoTrimSampleFFT(boolean useRaw) {
|
||||
public void autoTrimSampleFFT() {
|
||||
Debug.trace();
|
||||
crossStartOffset = -1;
|
||||
crossEndOffset = -1;
|
||||
double[][] samples;
|
||||
if (useRaw) {
|
||||
samples = getRawAudioData();
|
||||
} else {
|
||||
samples = getProcessedAudioData();
|
||||
}
|
||||
samples = getProcessedAudioData();
|
||||
if (samples == null) {
|
||||
return;
|
||||
}
|
||||
@@ -389,7 +399,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
if (endOffset <= startOffset) endOffset = startOffset + fftSize;
|
||||
if (endOffset < 0) endOffset = 0;
|
||||
if (endOffset >= samples[LEFT].length) endOffset = samples[LEFT].length;
|
||||
updateCrossings(useRaw);
|
||||
updateCrossings();
|
||||
intens = null;
|
||||
samples = null;
|
||||
processed = true;
|
||||
@@ -440,21 +450,17 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
}
|
||||
|
||||
|
||||
public void autoTrimSamplePeak() {
|
||||
public void autoTrimSamplePeak(boolean ignored) {
|
||||
Debug.trace();
|
||||
autoTrimSamplePeak(false);
|
||||
autoTrimSamplePeak();
|
||||
}
|
||||
|
||||
public void autoTrimSamplePeak(boolean useRaw) {
|
||||
public void autoTrimSamplePeak() {
|
||||
Debug.trace();
|
||||
crossStartOffset = -1;
|
||||
crossEndOffset = -1;
|
||||
double[][] samples;
|
||||
if (useRaw) {
|
||||
samples = getRawAudioData();
|
||||
} else {
|
||||
samples = getProcessedAudioData();
|
||||
}
|
||||
samples = getProcessedAudioData();
|
||||
if (samples == null) return;
|
||||
double noiseFloor = AudiobookRecorder.window.getNoiseFloor();
|
||||
noiseFloor *= 1.1;
|
||||
@@ -494,7 +500,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
|
||||
if (startOffset < 0) startOffset = 0;
|
||||
if (endOffset >= samples[LEFT].length) endOffset = samples[LEFT].length-1;
|
||||
updateCrossings(useRaw);
|
||||
updateCrossings();
|
||||
processed = true;
|
||||
reloadTree();
|
||||
}
|
||||
@@ -601,38 +607,23 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
|
||||
public void updateCrossings() {
|
||||
Debug.trace();
|
||||
updateCrossings(false);
|
||||
}
|
||||
|
||||
public void updateCrossings(boolean useRaw) {
|
||||
Debug.trace();
|
||||
updateStartCrossing(useRaw);
|
||||
updateEndCrossing(useRaw);
|
||||
updateStartCrossing();
|
||||
updateEndCrossing();
|
||||
runtime = -1d;
|
||||
getLength();
|
||||
}
|
||||
|
||||
public void updateStartCrossing() {
|
||||
Debug.trace();
|
||||
updateStartCrossing(false);
|
||||
}
|
||||
|
||||
public void updateStartCrossing(boolean useRaw) {
|
||||
Debug.trace();
|
||||
if (crossStartOffset == -1) {
|
||||
crossStartOffset = findNearestZeroCrossing(useRaw, startOffset, 4096);
|
||||
crossStartOffset = findNearestZeroCrossing(startOffset, 4096);
|
||||
}
|
||||
}
|
||||
|
||||
public void updateEndCrossing() {
|
||||
Debug.trace();
|
||||
updateEndCrossing(false);
|
||||
}
|
||||
|
||||
public void updateEndCrossing(boolean useRaw) {
|
||||
Debug.trace();
|
||||
if (crossEndOffset == -1) {
|
||||
crossEndOffset = findNearestZeroCrossing(useRaw, endOffset, 4096);
|
||||
crossEndOffset = findNearestZeroCrossing(endOffset, 4096);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -698,16 +689,6 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Runnable getRecognitionRunnable() {
|
||||
Runnable r = new Runnable() {
|
||||
public void run() {
|
||||
Debug.d("Starting recognition of", getId());
|
||||
doRecognition();
|
||||
}
|
||||
};
|
||||
return r;
|
||||
}
|
||||
|
||||
public void doRecognition() {
|
||||
Debug.trace();
|
||||
try {
|
||||
@@ -735,12 +716,6 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
}
|
||||
}
|
||||
|
||||
public void recognise() {
|
||||
Debug.trace();
|
||||
Thread t = new Thread(getRecognitionRunnable());
|
||||
t.start();
|
||||
}
|
||||
|
||||
public void setLocked(boolean l) {
|
||||
Debug.trace();
|
||||
if (locked == l) return;
|
||||
@@ -776,18 +751,9 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
}
|
||||
|
||||
public int findNearestZeroCrossing(int pos, int range) {
|
||||
Debug.trace();
|
||||
return findNearestZeroCrossing(false, pos, range);
|
||||
}
|
||||
|
||||
public int findNearestZeroCrossing(boolean useRaw, int pos, int range) {
|
||||
Debug.trace();
|
||||
double[][] data = null;
|
||||
if (useRaw) {
|
||||
data = getRawAudioData();
|
||||
} else {
|
||||
data = getProcessedAudioData();
|
||||
}
|
||||
data = getProcessedAudioData();
|
||||
if (data == null) return 0;
|
||||
if (data[LEFT].length == 0) return 0;
|
||||
|
||||
@@ -855,7 +821,6 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
File to = sentence.getFile();
|
||||
Files.copy(from.toPath(), to.toPath());
|
||||
|
||||
// sentence.updateCrossings();
|
||||
return sentence;
|
||||
}
|
||||
|
||||
@@ -873,24 +838,15 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
|
||||
public double getPeakValue() {
|
||||
Debug.trace();
|
||||
return getPeakValue(false, true);
|
||||
return getPeakValue(true);
|
||||
}
|
||||
|
||||
public double getPeakValue(boolean useRaw) {
|
||||
Debug.trace();
|
||||
return getPeakValue(useRaw, true);
|
||||
}
|
||||
|
||||
public double getPeakValue(boolean useRaw, boolean applyGain) {
|
||||
public double getPeakValue(boolean applyGain) {
|
||||
Debug.trace();
|
||||
double oldGain = gain;
|
||||
gain = 1.0d;
|
||||
double[][] samples = null;
|
||||
if (useRaw) {
|
||||
samples = getRawAudioData();
|
||||
} else {
|
||||
samples = getProcessedAudioData(true, applyGain);
|
||||
}
|
||||
samples = getProcessedAudioData(true, applyGain);
|
||||
gain = oldGain;
|
||||
if (samples == null) {
|
||||
return 0;
|
||||
@@ -937,7 +893,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
public double normalize(double low, double high) {
|
||||
Debug.trace();
|
||||
if (locked) return gain;
|
||||
double max = getPeakValue(true, false);
|
||||
double max = getPeakValue(false);
|
||||
double d = 0.708 / max;
|
||||
if (d > 1d) d = 1d;
|
||||
if (d < low) d = low;
|
||||
@@ -952,7 +908,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
public double normalize() {
|
||||
Debug.trace();
|
||||
if (locked) return gain;
|
||||
double max = getPeakValue(true, false);
|
||||
double max = getPeakValue(false);
|
||||
double d = 0.708 / max;
|
||||
if (d > 1d) d = 1d;
|
||||
setGain(d);
|
||||
@@ -1427,21 +1383,23 @@ public class Sentence extends BookTreeNode implements Cacheable {
|
||||
|
||||
|
||||
String def = AudiobookRecorder.window.getDefaultEffectsChain();
|
||||
Effect eff = AudiobookRecorder.window.effects.get(def);
|
||||
|
||||
if (effectsEnabled) {
|
||||
if (eff != null) {
|
||||
eff.init(getAudioFormat().getFrameRate());
|
||||
eff.process(processedAudio);
|
||||
}
|
||||
if ((def != null) && (AudiobookRecorder.window.effects != null)) {
|
||||
Effect eff = AudiobookRecorder.window.effects.get(def);
|
||||
|
||||
if (effectsEnabled) {
|
||||
if (eff != null) {
|
||||
eff.init(getAudioFormat().getFrameRate());
|
||||
eff.process(processedAudio);
|
||||
}
|
||||
|
||||
if (effectChain != null) {
|
||||
// Don't double up the default chain
|
||||
if (!effectChain.equals(def)) {
|
||||
eff = AudiobookRecorder.window.effects.get(effectChain);
|
||||
if (eff != null) {
|
||||
eff.init(getAudioFormat().getFrameRate());
|
||||
eff.process(processedAudio);
|
||||
if (effectChain != null) {
|
||||
// Don't double up the default chain
|
||||
if (!effectChain.equals(def)) {
|
||||
eff = AudiobookRecorder.window.effects.get(effectChain);
|
||||
if (eff != null) {
|
||||
eff.init(getAudioFormat().getFrameRate());
|
||||
eff.process(processedAudio);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user