Reinstate sphinx to replace defunct Haven

This commit is contained in:
2019-07-18 15:07:04 +01:00
parent 45d6882527
commit ebf961449a
5 changed files with 131 additions and 6 deletions

BIN
deps/sphinx4-core-5prealpha-SNAPSHOT.jar LFS vendored Normal file

Binary file not shown.

BIN
deps/sphinx4-data-5prealpha-SNAPSHOT.jar LFS vendored Normal file

Binary file not shown.

View File

@@ -20,6 +20,10 @@ import javax.imageio.*;
import org.w3c.dom.*; import org.w3c.dom.*;
import javax.xml.parsers.*; import javax.xml.parsers.*;
import java.io.*; import java.io.*;
import edu.cmu.sphinx.api.*;
import edu.cmu.sphinx.decoder.adaptation.*;
import edu.cmu.sphinx.result.*;
import org.w3c.dom.Node;
public class AudiobookRecorder extends JFrame { public class AudiobookRecorder extends JFrame {
@@ -104,8 +108,25 @@ public class AudiobookRecorder extends JFrame {
public TargetDataLine microphone = null; public TargetDataLine microphone = null;
public AudioInputStream microphoneStream = null; public AudioInputStream microphoneStream = null;
public Configuration sphinxConfig;
public StreamSpeechRecognizer recognizer;
public static AudiobookRecorder window; public static AudiobookRecorder window;
void initSphinx() {
sphinxConfig = new Configuration();
sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
try {
recognizer = new StreamSpeechRecognizer(sphinxConfig);
} catch (Exception e) {
e.printStackTrace();
}
}
void buildToolbar(Container ob) { void buildToolbar(Container ob) {
toolBar = new MainToolBar(this); toolBar = new MainToolBar(this);
toolBar.addSeparator(); toolBar.addSeparator();
@@ -790,6 +811,42 @@ public class AudiobookRecorder extends JFrame {
} }
} }
class BatchConversionThread implements Runnable {
Chapter chapter;
public BatchConversionThread(Chapter c) {
chapter = c;
}
public void run() {
try {
Configuration sphinxConfig = new Configuration();
sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
sphinxConfig.setSampleRate((int)(book.getAudioFormat().getSampleRate() / 4f));
StreamSpeechRecognizer recognizer;
recognizer = new StreamSpeechRecognizer(sphinxConfig);
for (Enumeration s = chapter.children(); s.hasMoreElements();) {
Sentence snt = (Sentence)s.nextElement();
if (!snt.isLocked()) {
if (snt.getId().equals(snt.getText())) {
snt.doRecognition(recognizer);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
void treePopup(MouseEvent e) { void treePopup(MouseEvent e) {
@@ -811,6 +868,7 @@ public class AudiobookRecorder extends JFrame {
JMenuObject o = (JMenuObject)e.getSource(); JMenuObject o = (JMenuObject)e.getSource();
Sentence s = (Sentence)o.getObject(); Sentence s = (Sentence)o.getObject();
if (!s.isLocked()) { if (!s.isLocked()) {
s.recognise();
} }
} }
}); });
@@ -1120,11 +1178,9 @@ public class AudiobookRecorder extends JFrame {
public void actionPerformed(ActionEvent e) { public void actionPerformed(ActionEvent e) {
JMenuObject o = (JMenuObject)e.getSource(); JMenuObject o = (JMenuObject)e.getSource();
Chapter c = (Chapter)o.getObject(); Chapter c = (Chapter)o.getObject();
for (Enumeration s = c.children(); s.hasMoreElements();) { BatchConversionThread r = new BatchConversionThread(c);
Sentence snt = (Sentence)s.nextElement(); Thread t = new Thread(r);
if (snt.getId().equals(snt.getText())) { t.start();
}
}
} }
}); });

View File

@@ -321,6 +321,10 @@ public class Options extends JDialog {
exportRate = addDropdown(optionsPanel, "Export sample rate:", getSampleRateList(), get("audio.export.samplerate")); exportRate = addDropdown(optionsPanel, "Export sample rate:", getSampleRateList(), get("audio.export.samplerate"));
addSeparator(optionsPanel);
enableParsing = addCheckBox(optionsPanel, "Enable automatic sphinx speech-to-text (**SLOW**)", getBoolean("process.sphinx"));
addSeparator(optionsPanel); addSeparator(optionsPanel);
externalEditor = addTextField(optionsPanel, "External Editor Command", get("editor.external")); externalEditor = addTextField(optionsPanel, "External Editor Command", get("editor.external"));
@@ -577,6 +581,7 @@ public class Options extends JDialog {
defaultPrefs.put("audio.export.bitrate", "256000"); defaultPrefs.put("audio.export.bitrate", "256000");
defaultPrefs.put("audio.export.samplerate", "44100"); defaultPrefs.put("audio.export.samplerate", "44100");
defaultPrefs.put("process.sphinx", "false");
defaultPrefs.put("editor.external", ""); defaultPrefs.put("editor.external", "");
@@ -699,6 +704,7 @@ public class Options extends JDialog {
set("catenation.post-section", postSectionGap.getValue()); set("catenation.post-section", postSectionGap.getValue());
set("audio.export.bitrate", ((KVPair)bitRate.getSelectedItem()).key); set("audio.export.bitrate", ((KVPair)bitRate.getSelectedItem()).key);
set("audio.export.samplerate", ((KVPair)exportRate.getSelectedItem()).key); set("audio.export.samplerate", ((KVPair)exportRate.getSelectedItem()).key);
set("process.sphinx", enableParsing.isSelected());
set("editor.external", externalEditor.getText()); set("editor.external", externalEditor.getText());
set("cache.size", cacheSize.getValue()); set("cache.size", cacheSize.getValue());
set("audio.recording.trim.fft", fftThreshold.getValue()); set("audio.recording.trim.fft", fftThreshold.getValue());

View File

@@ -21,6 +21,10 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
import edu.cmu.sphinx.api.*;
import edu.cmu.sphinx.decoder.adaptation.*;
import edu.cmu.sphinx.result.*;
import org.json.*; import org.json.*;
import java.util.Timer; import java.util.Timer;
@@ -182,8 +186,10 @@ public class Sentence extends DefaultMutableTreeNode implements Cacheable {
} else if (tm.equals("fft")) { } else if (tm.equals("fft")) {
autoTrimSampleFFT(); autoTrimSampleFFT();
} }
if (Options.getBoolean("process.sphinx")) {
recognise();
}
} }
} }
public static final int FFTBuckets = 1024; public static final int FFTBuckets = 1024;
@@ -483,7 +489,58 @@ public class Sentence extends DefaultMutableTreeNode implements Cacheable {
return null; return null;
} }
public void doRecognition(StreamSpeechRecognizer recognizer) {
try {
setText("[recognising...]");
AudiobookRecorder.window.bookTreeModel.reload(this);
byte[] inData = getPCMData();
ByteArrayInputStream bas = new ByteArrayInputStream(inData);
recognizer.startRecognition(bas);
SpeechResult result;
String res = "";
while ((result = recognizer.getResult()) != null) {
res += result.getHypothesis();
res += " ";
System.err.println(res);
}
recognizer.stopRecognition();
setText(res);
AudiobookRecorder.window.bookTreeModel.reload(this);
} catch (Exception e) {
e.printStackTrace();
}
}
public void recognise() { public void recognise() {
Thread t = new Thread(new Runnable() {
public void run() {
try {
Configuration sphinxConfig = new Configuration();
sphinxConfig.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
AudioInputStream s = AudioSystem.getAudioInputStream(getFile());
AudioFormat format = getAudioFormat();
sphinxConfig.setSampleRate((int)(format.getSampleRate()));
StreamSpeechRecognizer recognizer;
recognizer = new StreamSpeechRecognizer(sphinxConfig);
doRecognition(recognizer);
} catch (Exception e) {
e.printStackTrace();
}
}
});
t.start();
} }
public void setLocked(boolean l) { public void setLocked(boolean l) {