Reinstate sphinx to replace defunct Haven

2019-07-18 15:07:04 +01:00
parent 45d6882527
commit ebf961449a
5 changed files with 131 additions and 6 deletions
--- a/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java
+++ b/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java
@@ -20,6 +20,10 @@ import javax.imageio.*;
 import org.w3c.dom.*;
 import javax.xml.parsers.*;
 import java.io.*;
+import edu.cmu.sphinx.api.*;
+import edu.cmu.sphinx.decoder.adaptation.*;
+import edu.cmu.sphinx.result.*;
+import org.w3c.dom.Node;

 public class AudiobookRecorder extends JFrame {

@@ -104,8 +108,25 @@ public class AudiobookRecorder extends JFrame {
    public TargetDataLine microphone = null;
    public AudioInputStream microphoneStream = null;

+    public Configuration sphinxConfig;
+    public StreamSpeechRecognizer recognizer;
+
    public static AudiobookRecorder window;

+    void initSphinx() {
+        sphinxConfig = new Configuration();
+
+        sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
+        sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
+        sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
+
+        try {
+            recognizer = new StreamSpeechRecognizer(sphinxConfig);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
    void buildToolbar(Container ob) {
        toolBar = new MainToolBar(this);
        toolBar.addSeparator();
@@ -790,6 +811,42 @@ public class AudiobookRecorder extends JFrame {
        }
    }

+    class BatchConversionThread implements Runnable {
+        Chapter chapter;
+
+        public BatchConversionThread(Chapter c) {
+            chapter = c;
+        }
+        public void run() {
+            try {
+                Configuration sphinxConfig = new Configuration();
+
+                sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
+                sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
+                sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
+
+                sphinxConfig.setSampleRate((int)(book.getAudioFormat().getSampleRate() / 4f));
+
+                StreamSpeechRecognizer recognizer;
+
+                recognizer = new StreamSpeechRecognizer(sphinxConfig);
+
+
+                for (Enumeration s = chapter.children(); s.hasMoreElements();) {
+                    Sentence snt = (Sentence)s.nextElement();
+                    if (!snt.isLocked()) {
+                        if (snt.getId().equals(snt.getText())) {
+                            snt.doRecognition(recognizer);
+                        }
+                    }
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+
    @SuppressWarnings("unchecked")
    void treePopup(MouseEvent e) {

@@ -811,6 +868,7 @@ public class AudiobookRecorder extends JFrame {
                        JMenuObject o = (JMenuObject)e.getSource();
                        Sentence s = (Sentence)o.getObject();
                        if (!s.isLocked()) {
+                            s.recognise();
                        }
                    }
                });
@@ -1120,11 +1178,9 @@ public class AudiobookRecorder extends JFrame {
                    public void actionPerformed(ActionEvent e) {
                        JMenuObject o = (JMenuObject)e.getSource();
                        Chapter c = (Chapter)o.getObject();
-                        for (Enumeration s = c.children(); s.hasMoreElements();) {
-                            Sentence snt = (Sentence)s.nextElement();
-                            if (snt.getId().equals(snt.getText())) {
-                            }
-                        }
+                        BatchConversionThread r = new BatchConversionThread(c);
+                        Thread t = new Thread(r);
+                        t.start();
                    }
                });

--- a/src/uk/co/majenko/audiobookrecorder/Options.java
+++ b/src/uk/co/majenko/audiobookrecorder/Options.java
@@ -321,6 +321,10 @@ public class Options extends JDialog {
        exportRate = addDropdown(optionsPanel, "Export sample rate:", getSampleRateList(), get("audio.export.samplerate"));
        

+        addSeparator(optionsPanel);
+
+        enableParsing = addCheckBox(optionsPanel, "Enable automatic sphinx speech-to-text (**SLOW**)", getBoolean("process.sphinx"));
+    
        addSeparator(optionsPanel);

        externalEditor = addTextField(optionsPanel, "External Editor Command", get("editor.external"));
@@ -577,6 +581,7 @@ public class Options extends JDialog {

        defaultPrefs.put("audio.export.bitrate", "256000");
        defaultPrefs.put("audio.export.samplerate", "44100");
+        defaultPrefs.put("process.sphinx", "false");

        defaultPrefs.put("editor.external", "");

@@ -699,6 +704,7 @@ public class Options extends JDialog {
        set("catenation.post-section", postSectionGap.getValue());
        set("audio.export.bitrate", ((KVPair)bitRate.getSelectedItem()).key);
        set("audio.export.samplerate", ((KVPair)exportRate.getSelectedItem()).key);
+        set("process.sphinx", enableParsing.isSelected());
        set("editor.external", externalEditor.getText());
        set("cache.size", cacheSize.getValue());
        set("audio.recording.trim.fft", fftThreshold.getValue());
--- a/src/uk/co/majenko/audiobookrecorder/Sentence.java
+++ b/src/uk/co/majenko/audiobookrecorder/Sentence.java
@@ -21,6 +21,10 @@ import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.apache.http.util.EntityUtils;

+import edu.cmu.sphinx.api.*;
+import edu.cmu.sphinx.decoder.adaptation.*;
+import edu.cmu.sphinx.result.*;
+
 import org.json.*;

 import java.util.Timer;
@@ -182,8 +186,10 @@ public class Sentence extends DefaultMutableTreeNode implements Cacheable {
            } else if (tm.equals("fft")) {
                autoTrimSampleFFT();
            }
+            if (Options.getBoolean("process.sphinx")) {
+                recognise();
+            }
        }
-
    }

    public static final int FFTBuckets = 1024;
@@ -483,7 +489,58 @@ public class Sentence extends DefaultMutableTreeNode implements Cacheable {
        return null;
    }

+    public void doRecognition(StreamSpeechRecognizer recognizer) {
+        try {
+            setText("[recognising...]");
+            AudiobookRecorder.window.bookTreeModel.reload(this);
+
+            byte[] inData = getPCMData();
+
+            ByteArrayInputStream bas = new ByteArrayInputStream(inData);
+            recognizer.startRecognition(bas);
+            SpeechResult result;
+            String res = "";
+            while ((result = recognizer.getResult()) != null) {
+                res += result.getHypothesis();
+                res += " ";
+System.err.println(res);
+            }
+            recognizer.stopRecognition();
+
+            setText(res);
+            AudiobookRecorder.window.bookTreeModel.reload(this);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
    public void recognise() {
+        Thread t = new Thread(new Runnable() {
+            public void run() {
+                try {
+                    Configuration sphinxConfig = new Configuration();
+
+                    sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
+                    sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
+                    sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
+
+                    AudioInputStream s = AudioSystem.getAudioInputStream(getFile());
+                    AudioFormat format = getAudioFormat();
+
+                    sphinxConfig.setSampleRate((int)(format.getSampleRate()));
+
+                    StreamSpeechRecognizer recognizer;
+
+                    recognizer = new StreamSpeechRecognizer(sphinxConfig);
+
+                    doRecognition(recognizer);
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        t.start();
    }

    public void setLocked(boolean l) {