Skip to content

Commit 53bf477

Browse files
committed
res_speech_vosk.c: Add horse power.
Allows multiple horses (URLs) for semi-concurrent processing. These horses could be multiple language models racing against each other. Requires additional patches to Asterisk. See [Asterisk #593](asterisk/asterisk#593) for further discussion and links to additional required patches. Resolves: alphacep#8, alphacep#35
1 parent c21d288 commit 53bf477

File tree

2 files changed

+78
-14
lines changed

2 files changed

+78
-14
lines changed

conf/res_speech_vosk.conf

+10
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,12 @@
11
[general]
22
url = ws://localhost:2700
3+
4+
; Multiple horses can race to get you results
5+
6+
;[mage]
7+
;type=horse
8+
;url = ws://localhost:2700
9+
10+
;[secretariat]
11+
;type=horse
12+
;url = ws://localhost:2701

res-speech-vosk/res_speech_vosk.c

+68-14
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#define VOSK_ENGINE_NAME "vosk"
4343
#define VOSK_ENGINE_CONFIG "res_speech_vosk.conf"
4444
#define VOSK_BUF_SIZE 3200
45+
#define VOSK_MAX_HORSES 10
4546

4647
/** \brief Forward declaration of speech (client object) */
4748
typedef struct vosk_speech_t vosk_speech_t;
@@ -63,7 +64,11 @@ struct vosk_speech_t {
6364
/** \brief Declaration of Vosk recognition engine */
6465
struct vosk_engine_t {
6566
/* Websocket url*/
66-
char *ws_url;
67+
char *ws_url[VOSK_MAX_HORSES];
68+
/* Horse name */
69+
char *horse[VOSK_MAX_HORSES];
70+
/* Horse count */
71+
int num_horses;
6772
};
6873

6974
static struct vosk_engine_t vosk_engine;
@@ -73,33 +78,46 @@ static int vosk_recog_create(struct ast_speech *speech, struct ast_format *forma
7378
{
7479
vosk_speech_t *vosk_speech;
7580
enum ast_websocket_result result;
81+
int jockey = 0;
7682

7783
vosk_speech = ast_calloc(1, sizeof(vosk_speech_t));
7884
vosk_speech->name = "vosk";
7985
speech->data = vosk_speech;
8086

81-
ast_debug(1, "(%s) Create speech resource %s\n",vosk_speech->name, vosk_engine.ws_url);
87+
for (jockey=0; jockey < vosk_engine.num_horses; jockey++) {
88+
if ((jockey == 0 && (speech->horse == NULL || ast_strlen_zero(speech->horse))) || !strcasecmp(vosk_engine.horse[jockey], speech->horse)) {
89+
ast_debug(1, "(%s) Create speech resource %d horse '%s' url %s\n", vosk_speech->name, jockey, speech->horse, vosk_engine.ws_url[jockey]);
90+
vosk_speech->ws = ast_websocket_client_create(vosk_engine.ws_url[jockey], "ws", NULL, &result);
91+
if (vosk_speech->ws) {
92+
ast_debug(1, "(%s) Created speech resource result %d\n", vosk_speech->name, result);
93+
} else {
94+
ast_free(speech->data);
95+
return -1;
96+
}
97+
break;
98+
}
99+
}
82100

83-
vosk_speech->ws = ast_websocket_client_create(vosk_engine.ws_url, "ws", NULL, &result);
84101
if (!vosk_speech->ws) {
85-
ast_free(speech->data);
102+
ast_log(LOG_WARNING, "Syntax Error in Vosk configuration and/or dial plan invocation.\n");
86103
return -1;
87-
}
88-
89-
ast_debug(1, "(%s) Created speech resource result %d\n", vosk_speech->name, result);
104+
}
90105

91106
return 0;
92107
}
93108

94109
/** \brief Destroy any data set on the speech structure by the engine */
95110
static int vosk_recog_destroy(struct ast_speech *speech)
96111
{
112+
const char *eof = "{\"eof\" : 1}";
113+
97114
vosk_speech_t *vosk_speech = speech->data;
98115
ast_debug(1, "(%s) Destroy speech resource\n",vosk_speech->name);
99116

100117
if (vosk_speech->ws) {
101118
int fd = ast_websocket_fd(vosk_speech->ws);
102119
if (fd > 0) {
120+
ast_websocket_write_string(vosk_speech->ws, eof);
103121
ast_websocket_close(vosk_speech->ws, 1000);
104122
shutdown(fd, SHUT_RDWR);
105123
}
@@ -150,11 +168,13 @@ static int vosk_recog_write(struct ast_speech *speech, void *data, int len)
150168
vosk_speech_t *vosk_speech = speech->data;
151169
char *res;
152170
int res_len;
171+
int i = 0;
153172

154173
ast_assert (vosk_speech->offset + len < VOSK_BUF_SIZE);
155174

156175
memcpy(vosk_speech->buf + vosk_speech->offset, data, len);
157176
vosk_speech->offset += len;
177+
158178
if (vosk_speech->offset == VOSK_BUF_SIZE) {
159179
ast_websocket_write(vosk_speech->ws, AST_WEBSOCKET_OPCODE_BINARY, vosk_speech->buf, VOSK_BUF_SIZE);
160180
vosk_speech->offset = 0;
@@ -203,8 +223,12 @@ static int vosk_recog_dtmf(struct ast_speech *speech, const char *dtmf)
203223
static int vosk_recog_start(struct ast_speech *speech)
204224
{
205225
vosk_speech_t *vosk_speech = speech->data;
206-
ast_debug(1, "(%s) Start recognition\n",vosk_speech->name);
226+
/* does not appear that reset has any effect
227+
const char *reset = "{\"reset\" : 1}";
228+
ast_websocket_write_string(vosk_speech->ws, reset);
229+
*/
207230
ast_speech_change_state(speech, AST_SPEECH_STATE_READY);
231+
ast_debug(1, "(%s) Start recognition\n",vosk_speech->name);
208232
return 0;
209233
}
210234

@@ -266,19 +290,44 @@ static struct ast_speech_engine ast_engine = {
266290
static int vosk_engine_config_load()
267291
{
268292
const char *value = NULL;
293+
char *category = NULL;
294+
int num_horses = 0;
269295
struct ast_flags config_flags = { 0 };
270296
struct ast_config *cfg = ast_config_load(VOSK_ENGINE_CONFIG, config_flags);
271297
if(!cfg) {
272298
ast_log(LOG_WARNING, "No such configuration file %s\n", VOSK_ENGINE_CONFIG);
273299
return -1;
274300
}
275-
if((value = ast_variable_retrieve(cfg, "general", "url")) != NULL) {
276-
ast_log(LOG_DEBUG, "general.url=%s\n", value);
277-
vosk_engine.ws_url = ast_strdup(value);
301+
302+
if ((value = ast_variable_retrieve(cfg, "general", "url")) != NULL) {
303+
ast_debug(1, "general.url=%s\n", value);
304+
vosk_engine.ws_url[0] = ast_strdup(value);
305+
vosk_engine.horse[0] = ast_strdup("");
306+
vosk_engine.num_horses = 1;
307+
} else {
308+
while (category = ast_category_browse(cfg, category)) {
309+
if (strcasecmp(category, "general") != 0) {
310+
if ((value = ast_variable_retrieve(cfg, category, "type")) != NULL) {
311+
if (!strcasecmp(value, "horse")) {
312+
if ((value = ast_variable_retrieve(cfg, category, "url")) != NULL) {
313+
ast_debug(1, "%s.horse.url=%s\n", category, value);
314+
vosk_engine.ws_url[num_horses] = ast_strdup(value);
315+
vosk_engine.horse[num_horses] = ast_strdup(category);
316+
vosk_engine.num_horses = ++num_horses;
317+
}
318+
}
319+
}
320+
}
321+
}
278322
}
279-
if (!vosk_engine.ws_url) {
280-
vosk_engine.ws_url = ast_strdup("ws://localhost");
323+
324+
if (!vosk_engine.ws_url[0]) {
325+
vosk_engine.ws_url[0] = ast_strdup("ws://localhost");
326+
vosk_engine.horse[0] = ast_strdup("");
327+
vosk_engine.num_horses = 1;
328+
ast_debug(1, "default general.url=%s\n", vosk_engine.ws_url[0]);
281329
}
330+
282331
ast_config_destroy(cfg);
283332
return 0;
284333
}
@@ -311,12 +360,17 @@ static int load_module(void)
311360
/** \brief Unload module */
312361
static int unload_module(void)
313362
{
363+
int i = 0;
364+
314365
ast_log(LOG_NOTICE, "Unload res_speech_vosk module\n");
315366
if(ast_speech_unregister(VOSK_ENGINE_NAME)) {
316367
ast_log(LOG_ERROR, "Failed to unregister module\n");
317368
}
318369

319-
ast_free(vosk_engine.ws_url);
370+
for(i=0;i<vosk_engine.num_horses;i++) {
371+
ast_free(vosk_engine.ws_url[i]);
372+
ast_free(vosk_engine.horse[i]);
373+
}
320374
return 0;
321375
}
322376

0 commit comments

Comments
 (0)