diff --git a/deps/sphinx4-core-5prealpha-SNAPSHOT.jar b/deps/sphinx4-core-5prealpha-SNAPSHOT.jar new file mode 100644 index 0000000..42c6a26 --- /dev/null +++ b/deps/sphinx4-core-5prealpha-SNAPSHOT.jar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2f52e3499b6d54b04791895b7900f9a86fcf3f0884a942b1bac29a23fa81fcc +size 1259746 diff --git a/deps/sphinx4-data-5prealpha-SNAPSHOT.jar b/deps/sphinx4-data-5prealpha-SNAPSHOT.jar new file mode 100644 index 0000000..ace1779 --- /dev/null +++ b/deps/sphinx4-data-5prealpha-SNAPSHOT.jar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5865a7591ebece3de20e49d0a4567a6dcdaf778bf532caf23149465abf434b30 +size 35644210 diff --git a/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java b/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java index 5a5fe9e..f0495e3 100644 --- a/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java +++ b/src/uk/co/majenko/audiobookrecorder/AudiobookRecorder.java @@ -20,6 +20,10 @@ import javax.imageio.*; import org.w3c.dom.*; import javax.xml.parsers.*; import java.io.*; +import edu.cmu.sphinx.api.*; +import edu.cmu.sphinx.decoder.adaptation.*; +import edu.cmu.sphinx.result.*; +import org.w3c.dom.Node; public class AudiobookRecorder extends JFrame { @@ -104,8 +108,25 @@ public class AudiobookRecorder extends JFrame { public TargetDataLine microphone = null; public AudioInputStream microphoneStream = null; + public Configuration sphinxConfig; + public StreamSpeechRecognizer recognizer; + public static AudiobookRecorder window; + void initSphinx() { + sphinxConfig = new Configuration(); + + sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us"); + sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"); + sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin"); + + try { + recognizer = new StreamSpeechRecognizer(sphinxConfig); + } catch (Exception e) { + e.printStackTrace(); + } + } + void buildToolbar(Container ob) { toolBar = new MainToolBar(this); toolBar.addSeparator(); @@ -790,6 +811,42 @@ public class AudiobookRecorder extends JFrame { } } + class BatchConversionThread implements Runnable { + Chapter chapter; + + public BatchConversionThread(Chapter c) { + chapter = c; + } + public void run() { + try { + Configuration sphinxConfig = new Configuration(); + + sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us"); + sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"); + sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin"); + + sphinxConfig.setSampleRate((int)(book.getAudioFormat().getSampleRate() / 4f)); + + StreamSpeechRecognizer recognizer; + + recognizer = new StreamSpeechRecognizer(sphinxConfig); + + + for (Enumeration s = chapter.children(); s.hasMoreElements();) { + Sentence snt = (Sentence)s.nextElement(); + if (!snt.isLocked()) { + if (snt.getId().equals(snt.getText())) { + snt.doRecognition(recognizer); + } + } + } + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + @SuppressWarnings("unchecked") void treePopup(MouseEvent e) { @@ -811,6 +868,7 @@ public class AudiobookRecorder extends JFrame { JMenuObject o = (JMenuObject)e.getSource(); Sentence s = (Sentence)o.getObject(); if (!s.isLocked()) { + s.recognise(); } } }); @@ -1120,11 +1178,9 @@ public class AudiobookRecorder extends JFrame { public void actionPerformed(ActionEvent e) { JMenuObject o = (JMenuObject)e.getSource(); Chapter c = (Chapter)o.getObject(); - for (Enumeration s = c.children(); s.hasMoreElements();) { - Sentence snt = (Sentence)s.nextElement(); - if (snt.getId().equals(snt.getText())) { - } - } + BatchConversionThread r = new BatchConversionThread(c); + Thread t = new Thread(r); + t.start(); } }); diff --git a/src/uk/co/majenko/audiobookrecorder/Options.java b/src/uk/co/majenko/audiobookrecorder/Options.java index 89d0ceb..f591c0a 100644 --- a/src/uk/co/majenko/audiobookrecorder/Options.java +++ b/src/uk/co/majenko/audiobookrecorder/Options.java @@ -321,6 +321,10 @@ public class Options extends JDialog { exportRate = addDropdown(optionsPanel, "Export sample rate:", getSampleRateList(), get("audio.export.samplerate")); + addSeparator(optionsPanel); + + enableParsing = addCheckBox(optionsPanel, "Enable automatic sphinx speech-to-text (**SLOW**)", getBoolean("process.sphinx")); + addSeparator(optionsPanel); externalEditor = addTextField(optionsPanel, "External Editor Command", get("editor.external")); @@ -577,6 +581,7 @@ public class Options extends JDialog { defaultPrefs.put("audio.export.bitrate", "256000"); defaultPrefs.put("audio.export.samplerate", "44100"); + defaultPrefs.put("process.sphinx", "false"); defaultPrefs.put("editor.external", ""); @@ -699,6 +704,7 @@ public class Options extends JDialog { set("catenation.post-section", postSectionGap.getValue()); set("audio.export.bitrate", ((KVPair)bitRate.getSelectedItem()).key); set("audio.export.samplerate", ((KVPair)exportRate.getSelectedItem()).key); + set("process.sphinx", enableParsing.isSelected()); set("editor.external", externalEditor.getText()); set("cache.size", cacheSize.getValue()); set("audio.recording.trim.fft", fftThreshold.getValue()); diff --git a/src/uk/co/majenko/audiobookrecorder/Sentence.java b/src/uk/co/majenko/audiobookrecorder/Sentence.java index c5ef96f..70525e9 100644 --- a/src/uk/co/majenko/audiobookrecorder/Sentence.java +++ b/src/uk/co/majenko/audiobookrecorder/Sentence.java @@ -21,6 +21,10 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; +import edu.cmu.sphinx.api.*; +import edu.cmu.sphinx.decoder.adaptation.*; +import edu.cmu.sphinx.result.*; + import org.json.*; import java.util.Timer; @@ -182,8 +186,10 @@ public class Sentence extends DefaultMutableTreeNode implements Cacheable { } else if (tm.equals("fft")) { autoTrimSampleFFT(); } + if (Options.getBoolean("process.sphinx")) { + recognise(); + } } - } public static final int FFTBuckets = 1024; @@ -483,7 +489,58 @@ public class Sentence extends DefaultMutableTreeNode implements Cacheable { return null; } + public void doRecognition(StreamSpeechRecognizer recognizer) { + try { + setText("[recognising...]"); + AudiobookRecorder.window.bookTreeModel.reload(this); + + byte[] inData = getPCMData(); + + ByteArrayInputStream bas = new ByteArrayInputStream(inData); + recognizer.startRecognition(bas); + SpeechResult result; + String res = ""; + while ((result = recognizer.getResult()) != null) { + res += result.getHypothesis(); + res += " "; +System.err.println(res); + } + recognizer.stopRecognition(); + + setText(res); + AudiobookRecorder.window.bookTreeModel.reload(this); + } catch (Exception e) { + e.printStackTrace(); + } + } + public void recognise() { + Thread t = new Thread(new Runnable() { + public void run() { + try { + Configuration sphinxConfig = new Configuration(); + + sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us"); + sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"); + sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin"); + + AudioInputStream s = AudioSystem.getAudioInputStream(getFile()); + AudioFormat format = getAudioFormat(); + + sphinxConfig.setSampleRate((int)(format.getSampleRate())); + + StreamSpeechRecognizer recognizer; + + recognizer = new StreamSpeechRecognizer(sphinxConfig); + + doRecognition(recognizer); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + + t.start(); } public void setLocked(boolean l) {