Release 0.3.8

Fix tree collapse on recording finish
Added deepspeech wrapper example to readme
2020-02-02 17:13:12 +00:00 · 2020-02-02 17:09:06 +00:00 · 2020-02-02 15:08:55 +00:00 · 2020-02-02 15:01:39 +00:00
5 changed files with 95 additions and 115 deletions
--- a/README.md
+++ b/README.md
@@ -29,12 +29,13 @@ From here on much is controlled by key presses.
  appended to the currently selected chapter, or to the last chapter if none is selected.
 * Press and hold "T" to record a new phrase that is the start of a new paragraph.  This adds the "post paragraph" gap to the previous sentence. Otherwise it does the same as "R".
 * Press and hold "F" to record a "continuation" phrase. This sets the previous phrase's post-gap to be the "short" gap instead of the normal length gap.
+* Press and hold "Y" to record a new phrase that is the start of a new section. This add the "post section" gap to the previous sentence. Otherwise it does the same as "R".
 * Press "D" to delete the last phrase you recorded.
 * Press "E" to re-record the currently selected phrase.

-Each phrase you record will be briefly analysed using FFT to find the start and end of the audio and set
+Each phrase you record can be automatically analysed to find the start and end of the audio and set
 crop marks appropriately.  These can be adjusted in the waveform display when a phrase is selected. You can also
-re-run the analysis using either the default FFT method or using a peak detector method (finding the first and last points
+re-run the analysis using either FFT or a peak detector method (finding the first and last points
 where the audio amplitude rises above the backround noise).

 The phrases also have a "post gap" associated with them.  This is the amount of room noise (in milliseconds) to place between
@@ -53,18 +54,14 @@ edit the text of this ID to identify the recordings. You
 may, for instance, change it to have the same text as the
 audio contains.  

-To help with this the Haven On-Demand online speech recognition
-service is integrated with the system and can be used to try and convert the 
-audio into text.  Right clicking on a recording brings
-up a menu which includes the option to try and convert
-the audio into text.  The detected text is then used to
-replace the current recording ID / text.
+The audio can also be automatically converted to text if you have an suitable command-line
+executable that will work. One example is (on Linux) [DeepSpeech](https://github.com/mozilla/DeepSpeech) by Mozilla.

 File layout
 -----------

 All data is stored in your "storage" directory (specified in Options).  Each book (which is a directory named after the
-title of the book) has an associated XML file (audiobook.abk) and a directory "files" where all the audio (stored as WAV
+title of the book) has an associated XML file (audiobook.abx) and a directory "files" where all the audio (stored as WAV
 files) is placed.

 When you export the book as MP3 a new folder "export" is created within the book's folder where the MP3 files are placed.
@@ -81,3 +78,26 @@ Building
 5. Build with `ant build`
 6. Run with `java -jar ./AudiobookRecorder.jar`

+
+----
+
+Extra Resources
+===============
+
+* DeepSpeech wrapper script
+
+This is a small script that will convert the audio into a format DeepSpeech likes and call the `deepspeech` executable, removing any extra rubbish from the output. It
+also requires `sox` to be installed for the audio conversion.
+
+```
+#!/bin/bash
+
+ID=$$
+FILE=$1
+BINPATH=${HOME}/local/bin
+MODELS=${HOME}/ds/deepspeech-0.6.1-models
+
+sox "$FILE" -r 16000 -c 1 -b 16 "/tmp/ds-${ID}.wav"
+${BINPATH}/deepspeech --model ${MODELS}/output_graph.pbmm --lm ${MODELS}/lm.binary --trie ${MODELS}/trie --audio "/tmp/ds-${ID}.wav" 2>/dev/null
+rm /tmp/ds-${ID}.wav
+```
--- a/resources/uk/co/majenko/audiobookrecorder/config.txt
+++ b/resources/uk/co/majenko/audiobookrecorder/config.txt
@@ -1 +1 @@
-version=0.3.7
+version=0.3.8
--- a/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java
+++ b/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java
@@ -1684,10 +1684,11 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
                            Sentence snt = (Sentence)s.nextElement();
                            if (!snt.isLocked()) {
                                if (!snt.beenDetected()) {
-                                    Debug.d("Queueing recognition of", snt.getId());
-                                    Runnable r = snt.getRecognitionRunnable();
-                                    snt.setQueued();
-                                    queueJob(r);
+                                    queueJob(new SentenceJob(snt) {
+                                        public void run() {
+                                            sentence.doRecognition();
+                                        }
+                                    });
                                }
                            }
                        }
@@ -2096,7 +2097,7 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
        if (recording == null) return;
        recording.stopRecording();

-        book.reloadTree();
+//        book.reloadTree();

        bookTree.expandPath(new TreePath(((DefaultMutableTreeNode)recording.getParent()).getPath()));
        bookTree.setSelectionPath(new TreePath(recording.getPath()));
--- a/src/uk/co/majenko/audiobookrecorder/Book.java
+++ b/src/uk/co/majenko/audiobookrecorder/Book.java
@@ -450,6 +450,7 @@ public class Book extends BookTreeNode {
    }

    public void reloadTree() {
+        Debug.trace();
        SwingUtilities.invokeLater(new Runnable() {
            public void run() {
                AudiobookRecorder.window.bookTreeModel.reload(Book.this);
--- a/src/uk/co/majenko/audiobookrecorder/Sentence.java
+++ b/src/uk/co/majenko/audiobookrecorder/Sentence.java
@@ -222,7 +222,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
        if (text.equals("")) text = id;

        if ((crossStartOffset == -1) || (crossEndOffset == -1)) {
-            updateCrossings(true);
+            updateCrossings();
        }

        if (runtime <= 0.01d) getLength();
@@ -260,26 +260,40 @@ public class Sentence extends BookTreeNode implements Cacheable {
        CacheManager.removeFromCache(this);

        if (!id.equals("room-noise")) {
-            autoTrimSample(true);
+            autoTrimSample();
            if (Options.getBoolean("process.sphinx")) {
-                recognise();
+                AudiobookRecorder.window.queueJob(new SentenceJob(this) {
+                    public void run() {
+                        sentence.doRecognition();
+                    }
+                });
            }
        }

    }

-    public void autoTrimSample() {
+    public void autoTrimSample(boolean ignored) {
        Debug.trace();
-        autoTrimSample(false);
+        autoTrimSample();
    }

-    public void autoTrimSample(boolean useRaw) {
+    public void autoTrimSample() {
        Debug.trace();
        String tm = Options.get("audio.recording.trim");
        if (tm.equals("peak")) {
-            autoTrimSamplePeak(useRaw);
+            AudiobookRecorder.window.queueJob(new SentenceJob(this) {
+                public void run() {
+                    sentence.autoTrimSamplePeak();
+                    AudiobookRecorder.window.updateWaveformMarkers();
+                }
+            });
        } else if (tm.equals("fft")) {
-            autoTrimSampleFFT(useRaw);
+            AudiobookRecorder.window.queueJob(new SentenceJob(this) {
+                public void run() {
+                    sentence.autoTrimSampleFFT();
+                    AudiobookRecorder.window.updateWaveformMarkers();
+                }
+            });
        } else {
            startOffset = 0;
            crossStartOffset = 0;
@@ -288,13 +302,13 @@ public class Sentence extends BookTreeNode implements Cacheable {
            processed = false;
 //            peak = -1d;
        }
+        AudiobookRecorder.window.updateWaveform(true);
    }

    public static final int FFTBuckets = 1024;

-    public void autoTrimSampleFFT() {
+    public void autoTrimSampleFFT(boolean ignored) {
        Debug.trace();
-        autoTrimSampleFFT(false);
    }
    
    public double bucketDifference(double[] a, double[] b) {
@@ -308,16 +322,12 @@ public class Sentence extends BookTreeNode implements Cacheable {
        return diff;
    }

-    public void autoTrimSampleFFT(boolean useRaw) {
+    public void autoTrimSampleFFT() {
        Debug.trace();
        crossStartOffset = -1;
        crossEndOffset = -1;
        double[][] samples;
-        if (useRaw) {
-            samples = getRawAudioData();
-        } else {
-            samples = getProcessedAudioData();
-        }
+        samples = getProcessedAudioData();
        if (samples == null) {
            return;
        }
@@ -389,7 +399,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
        if (endOffset <= startOffset) endOffset = startOffset + fftSize;
        if (endOffset < 0) endOffset = 0;
        if (endOffset >= samples[LEFT].length) endOffset = samples[LEFT].length;
-        updateCrossings(useRaw);
+        updateCrossings();
        intens = null;
        samples = null;
        processed = true;
@@ -440,21 +450,17 @@ public class Sentence extends BookTreeNode implements Cacheable {
    }


-    public void autoTrimSamplePeak() {
+    public void autoTrimSamplePeak(boolean ignored) {
        Debug.trace();
-        autoTrimSamplePeak(false);
+        autoTrimSamplePeak();
    }

-    public void autoTrimSamplePeak(boolean useRaw) {
+    public void autoTrimSamplePeak() {
        Debug.trace();
        crossStartOffset = -1;
        crossEndOffset = -1;
        double[][] samples;
-        if (useRaw) {
-            samples = getRawAudioData();
-        } else {
-            samples = getProcessedAudioData();
-        }
+        samples = getProcessedAudioData();
        if (samples == null) return;
        double noiseFloor = AudiobookRecorder.window.getNoiseFloor();
        noiseFloor *= 1.1;
@@ -494,7 +500,7 @@ public class Sentence extends BookTreeNode implements Cacheable {

        if (startOffset < 0) startOffset = 0;
        if (endOffset >= samples[LEFT].length) endOffset = samples[LEFT].length-1;
-        updateCrossings(useRaw);
+        updateCrossings();
        processed = true;
        reloadTree();
    }
@@ -601,38 +607,23 @@ public class Sentence extends BookTreeNode implements Cacheable {

    public void updateCrossings() {
        Debug.trace();
-        updateCrossings(false);
-    }
-
-    public void updateCrossings(boolean useRaw) {
-        Debug.trace();
-        updateStartCrossing(useRaw);
-        updateEndCrossing(useRaw);
+        updateStartCrossing();
+        updateEndCrossing();
        runtime = -1d;
        getLength();
    }

    public void updateStartCrossing() {
-        Debug.trace();
-        updateStartCrossing(false);
-    }
-
-    public void updateStartCrossing(boolean useRaw) {
        Debug.trace();
        if (crossStartOffset == -1) {
-            crossStartOffset = findNearestZeroCrossing(useRaw, startOffset, 4096);
+            crossStartOffset = findNearestZeroCrossing(startOffset, 4096);
        }
    }

    public void updateEndCrossing() {
-        Debug.trace();
-        updateEndCrossing(false);
-    }
-
-    public void updateEndCrossing(boolean useRaw) {
        Debug.trace();
        if (crossEndOffset == -1) {
-            crossEndOffset = findNearestZeroCrossing(useRaw, endOffset, 4096);
+            crossEndOffset = findNearestZeroCrossing(endOffset, 4096);
        }
    }

@@ -698,16 +689,6 @@ public class Sentence extends BookTreeNode implements Cacheable {
        return null;
    }

-    public Runnable getRecognitionRunnable() {
-        Runnable r = new Runnable() {
-            public void run() {
-                Debug.d("Starting recognition of", getId());
-                doRecognition();
-            }
-        };
-        return r;
-    }
-
    public void doRecognition() {
        Debug.trace();
        try {
@@ -735,12 +716,6 @@ public class Sentence extends BookTreeNode implements Cacheable {
        }
    }

-    public void recognise() {
-        Debug.trace();
-        Thread t = new Thread(getRecognitionRunnable());
-        t.start();
-    }
-
    public void setLocked(boolean l) {
        Debug.trace();
        if (locked == l) return;
@@ -776,18 +751,9 @@ public class Sentence extends BookTreeNode implements Cacheable {
    }

    public int findNearestZeroCrossing(int pos, int range) {
-        Debug.trace();
-        return findNearestZeroCrossing(false, pos, range);
-    }
-
-    public int findNearestZeroCrossing(boolean useRaw, int pos, int range) {
        Debug.trace();
        double[][] data = null;
-        if (useRaw) {
-            data = getRawAudioData();
-        } else {
-            data = getProcessedAudioData();
-        }
+        data = getProcessedAudioData();
        if (data == null) return 0;
        if (data[LEFT].length == 0) return 0;

@@ -855,7 +821,6 @@ public class Sentence extends BookTreeNode implements Cacheable {
        File to = sentence.getFile();
        Files.copy(from.toPath(), to.toPath());

-//        sentence.updateCrossings();
        return sentence;
    }

@@ -873,24 +838,15 @@ public class Sentence extends BookTreeNode implements Cacheable {

    public double getPeakValue() {
        Debug.trace();
-        return getPeakValue(false, true);
+        return getPeakValue(true);
    }

-    public double getPeakValue(boolean useRaw) {
-        Debug.trace();
-        return getPeakValue(useRaw, true);
-    }
-
-    public double getPeakValue(boolean useRaw, boolean applyGain) {
+    public double getPeakValue(boolean applyGain) {
        Debug.trace();
        double oldGain = gain;
        gain = 1.0d;
        double[][] samples = null;
-        if (useRaw) {
-            samples = getRawAudioData();
-        } else {
-            samples = getProcessedAudioData(true, applyGain);
-        }
+        samples = getProcessedAudioData(true, applyGain);
        gain = oldGain;
        if (samples == null) {
            return 0;
@@ -937,7 +893,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
    public double normalize(double low, double high) {
        Debug.trace();
        if (locked) return gain;
-        double max = getPeakValue(true, false);
+        double max = getPeakValue(false);
        double d = 0.708 / max;
        if (d > 1d) d = 1d;
        if (d < low) d = low;
@@ -952,7 +908,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
    public double normalize() {
        Debug.trace();
        if (locked) return gain;
-        double max = getPeakValue(true, false);
+        double max = getPeakValue(false);
        double d = 0.708 / max;
        if (d > 1d) d = 1d;
        setGain(d);
@@ -1427,21 +1383,23 @@ public class Sentence extends BookTreeNode implements Cacheable {


        String def = AudiobookRecorder.window.getDefaultEffectsChain();
-        Effect eff = AudiobookRecorder.window.effects.get(def);
-    
-        if (effectsEnabled) {
-            if (eff != null) {
-                eff.init(getAudioFormat().getFrameRate());
-                eff.process(processedAudio);
-            }
+        if ((def != null) && (AudiobookRecorder.window.effects != null)) {
+            Effect eff = AudiobookRecorder.window.effects.get(def);
+        
+            if (effectsEnabled) {
+                if (eff != null) {
+                    eff.init(getAudioFormat().getFrameRate());
+                    eff.process(processedAudio);
+                }

-            if (effectChain != null) {
-                // Don't double up the default chain
-                if (!effectChain.equals(def)) {
-                    eff = AudiobookRecorder.window.effects.get(effectChain);
-                    if (eff != null) {
-                        eff.init(getAudioFormat().getFrameRate());
-                        eff.process(processedAudio);
+                if (effectChain != null) {
+                    // Don't double up the default chain
+                    if (!effectChain.equals(def)) {
+                        eff = AudiobookRecorder.window.effects.get(effectChain);
+                        if (eff != null) {
+                            eff.init(getAudioFormat().getFrameRate());
+                            eff.process(processedAudio);
+                        }
                    }
                }
            }
Author	SHA1	Message	Date
Matt Jenkins	423d840d83	Release 0.3.8	2020-02-02 17:13:12 +00:00
Matt Jenkins	1997b0bf9b	Fix tree collapse on recording finish	2020-02-02 17:09:06 +00:00
Matt Jenkins	b206fb33aa	Added deepspeech wrapper example to readme	2020-02-02 15:08:55 +00:00
Matt Jenkins	11b26e396c	Minor improvements to readme	2020-02-02 15:01:39 +00:00