00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <string.h>
00039 #include <assert.h>
00040
00041 #include "ckd_alloc.h"
00042 #include "strfuncs.h"
00043 #include "hash_table.h"
00044 #include "err.h"
00045
00046 #include "jsgf_internal.h"
00047 #include "jsgf_parser.h"
00048 #include "jsgf_scanner.h"
00049
00050 int yyparse (yyscan_t yyscanner, jsgf_t *jsgf);
00051
00059 jsgf_atom_t *
00060 jsgf_atom_new(char *name, float weight)
00061 {
00062 jsgf_atom_t *atom;
00063
00064 atom = ckd_calloc(1, sizeof(*atom));
00065 atom->name = ckd_salloc(name);
00066 atom->weight = weight;
00067 return atom;
00068 }
00069
00070 int
00071 jsgf_atom_free(jsgf_atom_t *atom)
00072 {
00073 if (atom == NULL)
00074 return 0;
00075 ckd_free(atom->name);
00076 ckd_free(atom);
00077 return 0;
00078 }
00079
00080 jsgf_t *
00081 jsgf_grammar_new(jsgf_t *parent)
00082 {
00083 jsgf_t *grammar;
00084
00085 grammar = ckd_calloc(1, sizeof(*grammar));
00086
00087
00088 if (parent) {
00089 grammar->rules = parent->rules;
00090 grammar->imports = parent->imports;
00091 grammar->searchpath = parent->searchpath;
00092 grammar->parent = parent;
00093 }
00094 else {
00095 char *jsgf_path;
00096
00097 grammar->rules = hash_table_new(64, 0);
00098 grammar->imports = hash_table_new(16, 0);
00099
00100
00101 #if !defined(_WIN32_WCE)
00102 if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
00103 char *word, *c;
00104
00105
00106
00107 word = jsgf_path = ckd_salloc(jsgf_path);
00108 while ((c = strchr(word, ':'))) {
00109 *c = '\0';
00110 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
00111 word = c + 1;
00112 }
00113 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
00114 grammar->searchpath = glist_reverse(grammar->searchpath);
00115 }
00116 else {
00117
00118 grammar->searchpath = glist_add_ptr(grammar->searchpath, ckd_salloc("."));
00119 }
00120 #endif
00121 }
00122
00123 return grammar;
00124 }
00125
00126 void
00127 jsgf_grammar_free(jsgf_t *jsgf)
00128 {
00129
00130 if (jsgf->parent == NULL) {
00131 hash_iter_t *itor;
00132 gnode_t *gn;
00133
00134 for (itor = hash_table_iter(jsgf->rules); itor;
00135 itor = hash_table_iter_next(itor)) {
00136 ckd_free((char *)itor->ent->key);
00137 jsgf_rule_free((jsgf_rule_t *)itor->ent->val);
00138 }
00139 hash_table_free(jsgf->rules);
00140 for (itor = hash_table_iter(jsgf->imports); itor;
00141 itor = hash_table_iter_next(itor)) {
00142 ckd_free((char *)itor->ent->key);
00143 jsgf_grammar_free((jsgf_t *)itor->ent->val);
00144 }
00145 hash_table_free(jsgf->imports);
00146 for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
00147 ckd_free(gnode_ptr(gn));
00148 glist_free(jsgf->searchpath);
00149 for (gn = jsgf->links; gn; gn = gnode_next(gn))
00150 ckd_free(gnode_ptr(gn));
00151 glist_free(jsgf->links);
00152 }
00153 ckd_free(jsgf->name);
00154 ckd_free(jsgf->version);
00155 ckd_free(jsgf->charset);
00156 ckd_free(jsgf->locale);
00157 ckd_free(jsgf);
00158 }
00159
00160 static void
00161 jsgf_rhs_free(jsgf_rhs_t *rhs)
00162 {
00163 gnode_t *gn;
00164
00165 if (rhs == NULL)
00166 return;
00167
00168 jsgf_rhs_free(rhs->alt);
00169 for (gn = rhs->atoms; gn; gn = gnode_next(gn))
00170 jsgf_atom_free(gnode_ptr(gn));
00171 glist_free(rhs->atoms);
00172 ckd_free(rhs);
00173 }
00174
00175 jsgf_atom_t *
00176 jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus)
00177 {
00178 jsgf_rule_t *rule;
00179 jsgf_atom_t *rule_atom;
00180 jsgf_rhs_t *rhs;
00181
00182
00183
00184 rhs = ckd_calloc(1, sizeof(*rhs));
00185 if (plus)
00186 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
00187 else
00188 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
00189 rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
00190 rule_atom = jsgf_atom_new(rule->name, 1.0);
00191 rhs = ckd_calloc(1, sizeof(*rhs));
00192 rhs->atoms = glist_add_ptr(NULL, rule_atom);
00193 rhs->atoms = glist_add_ptr(rhs->atoms, atom);
00194 rule->rhs->alt = rhs;
00195
00196 return jsgf_atom_new(rule->name, 1.0);
00197 }
00198
00199 jsgf_rule_t *
00200 jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp)
00201 {
00202 jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
00203 jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
00204 rhs->alt = exp;
00205 rhs->atoms = glist_add_ptr(NULL, atom);
00206 return jsgf_define_rule(jsgf, NULL, rhs, 0);
00207 }
00208
00209 void
00210 jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to)
00211 {
00212 jsgf_link_t *link;
00213
00214 link = ckd_calloc(1, sizeof(*link));
00215 link->from = from;
00216 link->to = to;
00217 link->atom = atom;
00218 grammar->links = glist_add_ptr(grammar->links, link);
00219 }
00220
00221 static char *
00222 extract_grammar_name(char *rule_name)
00223 {
00224 char* dot_pos;
00225 char* grammar_name = ckd_salloc(rule_name+1);
00226 if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
00227 ckd_free(grammar_name);
00228 return NULL;
00229 }
00230 *dot_pos='\0';
00231 return grammar_name;
00232 }
00233
00234 static char *
00235 jsgf_fullname(jsgf_t *jsgf, const char *name)
00236 {
00237 char *fullname;
00238
00239
00240 if (strchr(name + 1, '.'))
00241 return ckd_salloc(name);
00242
00243
00244 fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
00245 sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
00246 return fullname;
00247 }
00248
00249 static char *
00250 jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name)
00251 {
00252 char *fullname, *grammar_name;
00253
00254
00255 if (strchr(name + 1, '.'))
00256 return ckd_salloc(name);
00257
00258
00259 if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
00260 return ckd_salloc(name);
00261 fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
00262 sprintf(fullname, "<%s.%s", grammar_name, name + 1);
00263 ckd_free(grammar_name);
00264
00265 return fullname;
00266 }
00267
00268
00269
00270 static char *
00271 importname2rulename(char *importname)
00272 {
00273 char *rulename = ckd_salloc(importname);
00274 char *last_dotpos;
00275 char *secondlast_dotpos;
00276
00277 if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) {
00278 *last_dotpos='\0';
00279 if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) {
00280 *last_dotpos='.';
00281 *secondlast_dotpos='<';
00282 secondlast_dotpos = ckd_salloc(secondlast_dotpos);
00283 ckd_free(rulename);
00284 return secondlast_dotpos;
00285 }
00286 else {
00287 *last_dotpos='.';
00288 return rulename;
00289 }
00290 }
00291 else {
00292 return rulename;
00293 }
00294 }
00295
00296 static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule);
00297 static int
00298 expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs)
00299 {
00300 gnode_t *gn;
00301 int lastnode;
00302
00303
00304 lastnode = rule->entry;
00305
00306
00307 for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
00308 jsgf_atom_t *atom = gnode_ptr(gn);
00309 if (jsgf_atom_is_rule(atom)) {
00310 jsgf_rule_t *subrule;
00311 char *fullname;
00312 gnode_t *subnode;
00313 void *val;
00314
00315
00316 if (0 == strcmp(atom->name, "<NULL>")) {
00317
00318 jsgf_add_link(grammar, atom,
00319 lastnode, grammar->nstate);
00320 lastnode = grammar->nstate;
00321 ++grammar->nstate;
00322 continue;
00323 }
00324 else if (0 == strcmp(atom->name, "<VOID>")) {
00325
00326 return -1;
00327 }
00328
00329 fullname = jsgf_fullname_from_rule(rule, atom->name);
00330 if (hash_table_lookup(grammar->rules, fullname, &val) == -1) {
00331 E_ERROR("Undefined rule in RHS: %s\n", fullname);
00332 ckd_free(fullname);
00333 return -1;
00334 }
00335 ckd_free(fullname);
00336 subrule = val;
00337
00338 for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode))
00339 if (gnode_ptr(subnode) == (void *)subrule)
00340 break;
00341 if (subnode != NULL) {
00342
00343 if (gnode_next(gn) != NULL) {
00344 E_ERROR("Only right-recursion is permitted (in %s.%s)\n",
00345 grammar->name, rule->name);
00346 return -1;
00347 }
00348
00349 E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, subrule->entry);
00350 jsgf_add_link(grammar, atom, lastnode, subrule->entry);
00351 }
00352 else {
00353
00354 if (expand_rule(grammar, subrule) == -1)
00355 return -1;
00356
00357 jsgf_add_link(grammar, atom,
00358 lastnode, subrule->entry);
00359 lastnode = subrule->exit;
00360 }
00361 }
00362 else {
00363
00364 jsgf_add_link(grammar, atom,
00365 lastnode, grammar->nstate);
00366 lastnode = grammar->nstate;
00367 ++grammar->nstate;
00368 }
00369 }
00370
00371 return lastnode;
00372 }
00373
00374 static int
00375 expand_rule(jsgf_t *grammar, jsgf_rule_t *rule)
00376 {
00377 jsgf_rhs_t *rhs;
00378 float norm;
00379
00380
00381 grammar->rulestack = glist_add_ptr(grammar->rulestack, rule);
00382
00383
00384 norm = 0;
00385 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
00386 if (rhs->atoms) {
00387 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
00388 norm += atom->weight;
00389 }
00390 }
00391
00392 rule->entry = grammar->nstate++;
00393 rule->exit = grammar->nstate++;
00394 if (norm == 0) norm = 1;
00395 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
00396 int lastnode;
00397
00398 if (rhs->atoms) {
00399 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
00400 atom->weight /= norm;
00401 }
00402 lastnode = expand_rhs(grammar, rule, rhs);
00403 if (lastnode == -1) {
00404 return -1;
00405 }
00406 else {
00407 jsgf_add_link(grammar, NULL, lastnode, rule->exit);
00408 }
00409 }
00410
00411
00412 grammar->rulestack = gnode_free(grammar->rulestack, NULL);
00413 return rule->exit;
00414 }
00415
00416 jsgf_rule_iter_t *
00417 jsgf_rule_iter(jsgf_t *grammar)
00418 {
00419 return hash_table_iter(grammar->rules);
00420 }
00421
00422 jsgf_rule_t *
00423 jsgf_get_rule(jsgf_t *grammar, char const *name)
00424 {
00425 void *val;
00426
00427 if (hash_table_lookup(grammar->rules, name, &val) < 0)
00428 return NULL;
00429 return (jsgf_rule_t *)val;
00430 }
00431
00432 char const *
00433 jsgf_rule_name(jsgf_rule_t *rule)
00434 {
00435 return rule->name;
00436 }
00437
00438 int
00439 jsgf_rule_public(jsgf_rule_t *rule)
00440 {
00441 return rule->public;
00442 }
00443
00444 static fsg_model_t *
00445 jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule,
00446 logmath_t *lmath, float32 lw, int do_closure)
00447 {
00448 fsg_model_t *fsg;
00449 glist_t nulls;
00450 gnode_t *gn;
00451
00452
00453 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00454 ckd_free(gnode_ptr(gn));
00455 }
00456 glist_free(grammar->links);
00457 grammar->links = NULL;
00458 rule->entry = rule->exit = 0;
00459 grammar->nstate = 0;
00460 expand_rule(grammar, rule);
00461
00462 fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
00463 fsg->start_state = rule->entry;
00464 fsg->final_state = rule->exit;
00465 grammar->links = glist_reverse(grammar->links);
00466 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00467 jsgf_link_t *link = gnode_ptr(gn);
00468
00469 if (link->atom) {
00470 if (jsgf_atom_is_rule(link->atom)) {
00471 fsg_model_null_trans_add(fsg, link->from, link->to,
00472 logmath_log(lmath, link->atom->weight));
00473 }
00474 else {
00475 int wid = fsg_model_word_add(fsg, link->atom->name);
00476 fsg_model_trans_add(fsg, link->from, link->to,
00477 logmath_log(lmath, link->atom->weight), wid);
00478 }
00479 }
00480 else {
00481 fsg_model_null_trans_add(fsg, link->from, link->to, 0);
00482 }
00483 }
00484 if (do_closure) {
00485 nulls = fsg_model_null_trans_closure(fsg, NULL);
00486 glist_free(nulls);
00487 }
00488
00489 return fsg;
00490 }
00491
00492 fsg_model_t *
00493 jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule,
00494 logmath_t *lmath, float32 lw)
00495 {
00496 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
00497 }
00498
00499 fsg_model_t *
00500 jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule,
00501 logmath_t *lmath, float32 lw)
00502 {
00503 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
00504 }
00505
00506 int
00507 jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
00508 {
00509 fsg_model_t *fsg;
00510 logmath_t *lmath = logmath_init(1.0001, 0, 0);
00511
00512 if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL)
00513 goto error_out;
00514
00515 fsg_model_write(fsg, outfh);
00516 logmath_free(lmath);
00517 return 0;
00518
00519 error_out:
00520 logmath_free(lmath);
00521 return -1;
00522 }
00523 jsgf_rule_t *
00524 jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int public)
00525 {
00526 jsgf_rule_t *rule;
00527 void *val;
00528
00529 if (name == NULL) {
00530 name = ckd_malloc(strlen(jsgf->name) + 16);
00531 sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules));
00532 }
00533 else {
00534 char *newname;
00535
00536 newname = jsgf_fullname(jsgf, name);
00537 name = newname;
00538 }
00539
00540 rule = ckd_calloc(1, sizeof(*rule));
00541 rule->refcnt = 1;
00542 rule->name = ckd_salloc(name);
00543 rule->rhs = rhs;
00544 rule->public = public;
00545
00546 E_INFO("Defined rule: %s%s\n",
00547 rule->public ? "PUBLIC " : "",
00548 rule->name);
00549 val = hash_table_enter(jsgf->rules, name, rule);
00550 if (val != (void *)rule) {
00551 E_WARN("Multiply defined symbol: %s\n", name);
00552 }
00553 return rule;
00554 }
00555
00556 jsgf_rule_t *
00557 jsgf_rule_retain(jsgf_rule_t *rule)
00558 {
00559 ++rule->refcnt;
00560 return rule;
00561 }
00562
00563 int
00564 jsgf_rule_free(jsgf_rule_t *rule)
00565 {
00566 if (rule == NULL)
00567 return 0;
00568 if (--rule->refcnt > 0)
00569 return rule->refcnt;
00570 jsgf_rhs_free(rule->rhs);
00571 ckd_free(rule->name);
00572 ckd_free(rule);
00573 return 0;
00574 }
00575
00576
00577
00578 static char *
00579 path_list_search(glist_t paths, char *path)
00580 {
00581 gnode_t *gn;
00582
00583 for (gn = paths; gn; gn = gnode_next(gn)) {
00584 char *fullpath;
00585 FILE *tmp;
00586
00587 fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
00588 tmp = fopen(fullpath, "r");
00589 if (tmp != NULL) {
00590 fclose(tmp);
00591 return fullpath;
00592 }
00593 else
00594 ckd_free(fullpath);
00595 }
00596 return NULL;
00597 }
00598
00599 jsgf_rule_t *
00600 jsgf_import_rule(jsgf_t *jsgf, char *name)
00601 {
00602 char *c, *path, *newpath;
00603 size_t namelen, packlen;
00604 void *val;
00605 jsgf_t *imp;
00606 int import_all;
00607
00608
00609 namelen = strlen(name);
00610 path = ckd_malloc(namelen - 2 + 6);
00611 strcpy(path, name + 1);
00612
00613 c = strrchr(path, '.');
00614 if (c == NULL) {
00615 E_ERROR("Imported rule is not qualified: %s\n", name);
00616 ckd_free(path);
00617 return NULL;
00618 }
00619 packlen = c - path;
00620 *c = '\0';
00621
00622
00623 import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>"));
00624
00625
00626 for (c = path; *c; ++c)
00627 if (*c == '.') *c = '/';
00628 strcat(path, ".gram");
00629 newpath = path_list_search(jsgf->searchpath, path);
00630 ckd_free(path);
00631 if (newpath == NULL)
00632 return NULL;
00633
00634 path = newpath;
00635 E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
00636
00637
00638
00639
00640 if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
00641 E_INFO("Already imported %s\n", path);
00642 imp = val;
00643 ckd_free(path);
00644 }
00645 else {
00646
00647 imp = jsgf_parse_file(path, jsgf);
00648 val = hash_table_enter(jsgf->imports, path, imp);
00649 if (val != (void *)imp) {
00650 E_WARN("Multiply imported file: %s\n", path);
00651 }
00652 }
00653 if (imp != NULL) {
00654 hash_iter_t *itor;
00655
00656 for (itor = hash_table_iter(imp->rules); itor;
00657 itor = hash_table_iter_next(itor)) {
00658 hash_entry_t *he = itor->ent;
00659 jsgf_rule_t *rule = hash_entry_val(he);
00660 int rule_matches;
00661 char *rule_name = importname2rulename(name);
00662
00663 if (import_all) {
00664
00665 rule_matches = !strncmp(rule_name, rule->name, packlen + 1);
00666 }
00667 else {
00668
00669 rule_matches = !strcmp(rule_name, rule->name);
00670 }
00671 ckd_free(rule_name);
00672 if (rule->public && rule_matches) {
00673 void *val;
00674 char *newname;
00675
00676
00677 c = strrchr(rule->name, '.');
00678 assert(c != NULL);
00679 newname = jsgf_fullname(jsgf, c);
00680
00681 E_INFO("Imported %s\n", newname);
00682 val = hash_table_enter(jsgf->rules, newname,
00683 jsgf_rule_retain(rule));
00684 if (val != (void *)rule) {
00685 E_WARN("Multiply defined symbol: %s\n", newname);
00686 }
00687 if (!import_all) {
00688 hash_table_iter_free(itor);
00689 return rule;
00690 }
00691 }
00692 }
00693 }
00694
00695 return NULL;
00696 }
00697
00698 jsgf_t *
00699 jsgf_parse_file(const char *filename, jsgf_t *parent)
00700 {
00701 yyscan_t yyscanner;
00702 jsgf_t *jsgf;
00703 int yyrv;
00704 FILE *in = NULL;
00705
00706 yylex_init(&yyscanner);
00707 if (filename == NULL) {
00708 yyset_in(stdin, yyscanner);
00709 }
00710 else {
00711 in = fopen(filename, "r");
00712 if (in == NULL) {
00713 fprintf(stderr, "Failed to open %s for parsing: %s\n",
00714 filename, strerror(errno));
00715 return NULL;
00716 }
00717 yyset_in(in, yyscanner);
00718 }
00719
00720 jsgf = jsgf_grammar_new(parent);
00721 yyrv = yyparse(yyscanner, jsgf);
00722 if (yyrv != 0) {
00723 fprintf(stderr, "JSGF parse of %s failed\n",
00724 filename ? filename : "(stdin)");
00725 jsgf_grammar_free(jsgf);
00726 yylex_destroy(yyscanner);
00727 return NULL;
00728 }
00729 if (in)
00730 fclose(in);
00731 yylex_destroy(yyscanner);
00732
00733 return jsgf;
00734 }