Moved speech recognition to configrable external command with worther thread pool

This commit is contained in:
2020-01-31 17:29:00 +00:00
parent 5e310b0224
commit 4c25fccc86
6 changed files with 102 additions and 86 deletions

View File

@@ -21,9 +21,6 @@ import javax.imageio.*;
import org.w3c.dom.*;
import javax.xml.parsers.*;
import java.io.*;
import edu.cmu.sphinx.api.*;
import edu.cmu.sphinx.decoder.adaptation.*;
import edu.cmu.sphinx.result.*;
import org.w3c.dom.Node;
import java.util.concurrent.*;
@@ -45,8 +42,6 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
// Settings - tweakable
public static final String SPHINX_MODEL = "resource:/edu/cmu/sphinx/models/en-us/en-us";
static Properties config = new Properties();
TreeMap<String, EffectGroup> effects;
@@ -147,25 +142,9 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
public TargetDataLine microphone = null;
public AudioInputStream microphoneStream = null;
public Configuration sphinxConfig;
public StreamSpeechRecognizer recognizer;
public static AudiobookRecorder window;
void initSphinx() {
Debug.trace();
sphinxConfig = new Configuration();
sphinxConfig.setAcousticModelPath(AudiobookRecorder.SPHINX_MODEL);
sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
try {
recognizer = new StreamSpeechRecognizer(sphinxConfig);
} catch (Exception e) {
e.printStackTrace();
}
}
public Queue<Runnable>speechProcessQueue = null;
void buildToolbar(Container ob) {
Debug.trace();
@@ -381,6 +360,8 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
Debug.debugEnabled = CLI.isSet("debug");
Debug.traceEnabled = CLI.isSet("trace");
speechProcessQueue = new ArrayDeque<Runnable>();
try {
String clsname = "com.jtattoo.plaf.hifi.HiFiLookAndFeel";
@@ -404,6 +385,12 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
Options.loadPreferences();
for (int i = 0; i < Options.getInteger("process.threads"); i++) {
WorkerThread worker = new WorkerThread(speechProcessQueue);
worker.start();
}
execScript(Options.get("scripts.startup"));
CacheManager.setCacheSize(Options.getInteger("cache.size"));
@@ -1136,24 +1123,11 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
public void run() {
Debug.trace();
try {
Configuration sphinxConfig = new Configuration();
sphinxConfig.setAcousticModelPath(AudiobookRecorder.SPHINX_MODEL);
sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
sphinxConfig.setSampleRate((int)(book.getAudioFormat().getSampleRate() / 4f));
StreamSpeechRecognizer recognizer;
recognizer = new StreamSpeechRecognizer(sphinxConfig);
for (Enumeration s = chapter.children(); s.hasMoreElements();) {
Sentence snt = (Sentence)s.nextElement();
if (!snt.isLocked()) {
if (snt.getId().equals(snt.getText())) {
snt.doRecognition(recognizer);
snt.doRecognition();
}
}
}
@@ -1629,9 +1603,20 @@ public class AudiobookRecorder extends JFrame implements DocumentListener {
Debug.trace();
JMenuObject o = (JMenuObject)e.getSource();
Chapter c = (Chapter)o.getObject();
BatchConversionThread r = new BatchConversionThread(c);
Thread t = new Thread(r);
t.start();
for (Enumeration s = c.children(); s.hasMoreElements();) {
Sentence snt = (Sentence)s.nextElement();
if (!snt.isLocked()) {
if (snt.getId().equals(snt.getText())) {
Debug.d("Queueing recognition of", snt.getId());
synchronized(speechProcessQueue) {
Runnable r = snt.getRecognitionRunnable();
speechProcessQueue.add(r);
speechProcessQueue.notify();
}
}
}
}
}
});

View File

@@ -50,6 +50,9 @@ public class Options extends JDialog {
JTextField externalEditor;
JTextField speechCommand;
JSpinner workerThreads;
JTextArea startupScript;
ArrayList<JTextField[]> processorList;
@@ -331,7 +334,9 @@ public class Options extends JDialog {
addSeparator(optionsPanel);
enableParsing = addCheckBox(optionsPanel, "Enable automatic sphinx speech-to-text (**SLOW**)", getBoolean("process.sphinx"));
enableParsing = addCheckBox(optionsPanel, "Enable automatic speech-to-text (**SLOW**)", getBoolean("process.sphinx"));
speechCommand = addTextField(optionsPanel, "Speech to text command (must take 1 filename parameter):", get("process.command"));
workerThreads = addSpinner(optionsPanel, "Worker threads:", 1, 100, 1, getInteger("process.threads"), "");
addSeparator(optionsPanel);
@@ -594,6 +599,8 @@ public class Options extends JDialog {
defaultPrefs.put("audio.export.channels", "2");
defaultPrefs.put("audio.export.samplerate", "44100");
defaultPrefs.put("process.sphinx", "false");
defaultPrefs.put("process.command", "speech-to-text \"%f\"");
defaultPrefs.put("process.threads", "10");
defaultPrefs.put("editor.external", "");
@@ -718,6 +725,8 @@ public class Options extends JDialog {
set("audio.export.channels", ((KVPair)channels.getSelectedItem()).key);
set("audio.export.samplerate", ((KVPair)exportRate.getSelectedItem()).key);
set("process.sphinx", enableParsing.isSelected());
set("process.command", speechCommand.getText());
set("process.threads", workerThreads.getValue());
set("editor.external", externalEditor.getText());
set("cache.size", cacheSize.getValue());
set("audio.recording.trim.fft", fftThreshold.getValue());

View File

@@ -21,10 +21,6 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import edu.cmu.sphinx.api.*;
import edu.cmu.sphinx.decoder.adaptation.*;
import edu.cmu.sphinx.result.*;
import org.json.*;
import java.util.Timer;
@@ -685,23 +681,35 @@ public class Sentence extends BookTreeNode implements Cacheable {
return null;
}
public void doRecognition(StreamSpeechRecognizer recognizer) {
public Runnable getRecognitionRunnable() {
Runnable r = new Runnable() {
public void run() {
Debug.d("Starting recognition of", getId());
doRecognition();
}
};
return r;
}
public void doRecognition() {
Debug.trace();
try {
setText("[recognising...]");
reloadTree();
byte[] inData = getPCMData();
String command = Options.get("process.command");
Debug.d("Recognizing with command", command);
ProcessBuilder builder = new ProcessBuilder(command, getFile().getCanonicalPath());
Process process = builder.start();
InputStream is = process.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
ByteArrayInputStream bas = new ByteArrayInputStream(inData);
recognizer.startRecognition(bas);
SpeechResult result;
String res = "";
while ((result = recognizer.getResult()) != null) {
res += result.getHypothesis();
res += " ";
String line = null;
while ((line = reader.readLine()) != null) {
res += line;
}
recognizer.stopRecognition();
setText(res);
reloadTree();
@@ -712,32 +720,7 @@ public class Sentence extends BookTreeNode implements Cacheable {
public void recognise() {
Debug.trace();
Thread t = new Thread(new Runnable() {
public void run() {
Debug.trace();
try {
Configuration sphinxConfig = new Configuration();
sphinxConfig.setAcousticModelPath(AudiobookRecorder.SPHINX_MODEL);
sphinxConfig.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
sphinxConfig.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
AudioInputStream s = AudioSystem.getAudioInputStream(getFile());
AudioFormat format = getAudioFormat();
sphinxConfig.setSampleRate((int)(format.getSampleRate()));
StreamSpeechRecognizer recognizer;
recognizer = new StreamSpeechRecognizer(sphinxConfig);
doRecognition(recognizer);
} catch (Exception e) {
e.printStackTrace();
}
}
});
Thread t = new Thread(getRecognitionRunnable());
t.start();
}
@@ -1668,7 +1651,9 @@ public class Sentence extends BookTreeNode implements Cacheable {
Debug.trace();
if (id.equals("room-noise")) return;
if (getParent() == null) return;
AudiobookRecorder.window.bookTreeModel.reload(this);
synchronized (AudiobookRecorder.window.bookTreeModel) {
AudiobookRecorder.window.bookTreeModel.reload(this);
}
}
public double getPeak() {

View File

@@ -0,0 +1,43 @@
package uk.co.majenko.audiobookrecorder;
import java.util.Queue;
public class WorkerThread extends Thread {
private static int instance = 0;
private final Queue<Runnable> queue;
public WorkerThread(Queue<Runnable> queue) {
this.queue = queue;
setName("Worker Thread " + (instance++));
}
@Override
public void run() {
Debug.d(getName(), "started");
while ( true ) {
try {
Runnable work = null;
synchronized ( queue ) {
while ( queue.isEmpty() ) {
Debug.d(getName(), "waiting on work");
queue.wait();
}
Debug.d(getName(), "got work");
// Get the next work item off of the queue
work = queue.remove();
}
// Process the work item
work.run();
}
catch ( InterruptedException ie ) {
ie.printStackTrace();
break; // Terminate
}
}
Debug.d(getName(), "died");
}
}