PocketSphinx  0.6
ngram_search.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 #ifndef __NGRAM_SEARCH_H__
43 #define __NGRAM_SEARCH_H__
44 
45 /* SphinxBase headers. */
46 #include <sphinxbase/cmd_ln.h>
47 #include <sphinxbase/logmath.h>
48 #include <sphinxbase/ngram_model.h>
49 #include <sphinxbase/listelem_alloc.h>
50 #include <sphinxbase/err.h>
51 
52 /* Local headers. */
53 #include "pocketsphinx_internal.h"
54 #include "hmm.h"
55 
64 typedef struct chan_s {
68  struct chan_s *next;
71  struct chan_s *alt;
73  int32 ciphone;
74  union {
79  int32 rc_id;
80  } info;
81 } chan_t;
82 
90 typedef struct root_chan_s {
96  int32 penult_phn_wid;
97  int32 this_phn_wid;
100  int16 ciphone;
102  int16 ci2phone;
104 } root_chan_t;
105 
109 typedef struct bptbl_s {
111  uint8 valid;
112  uint8 refcnt;
113  int32 wid;
114  int32 bp;
115  int32 score;
116  int32 s_idx;
117  int32 real_wid;
119  int16 last_phone;
120  int16 last2_phone;
121 } bptbl_t;
122 
126 typedef struct bptbl_seg_s {
128  int32 *bpidx;
129  int16 n_bpidx;
130  int16 cur;
131 } bptbl_seg_t;
132 
133 /*
134  * Candidates words for entering their last phones. Cleared and rebuilt in each
135  * frame.
136  * NOTE: candidates can only be multi-phone, real dictionary words.
137  */
138 typedef struct lastphn_cand_s {
139  int32 wid;
140  int32 score;
141  int32 bp;
142  int32 next; /* next candidate starting at the same frame */
144 
145 /*
146  * Since the same instance of a word (i.e., <word,start-frame>) reaches its last
147  * phone several times, we can compute its best BP and LM transition score info
148  * just the first time and cache it for future occurrences. Structure for such
149  * a cache.
150  */
151 typedef struct {
152  int32 sf; /* Start frame */
153  int32 dscr; /* Delta-score upon entering last phone */
154  int32 bp; /* Best BP */
155 } last_ltrans_t;
156 
157 #define CAND_SF_ALLOCSIZE 32
158 typedef struct {
159  int32 bp_ef;
160  int32 cand;
161 } cand_sf_t;
162 
163 /*
164  * Structure for reorganizing the BP table entries in the current frame according
165  * to distinct right context ci-phones. Each entry contains the best BP entry for
166  * a given right context. Each successor word will pick up the correct entry based
167  * on its first ci-phone.
168  */
169 typedef struct bestbp_rc_s {
170  int32 score;
171  int32 path; /* BP table index corresponding to this entry */
172  int32 lc; /* right most ci-phone of above BP entry word */
173 } bestbp_rc_t;
174 
175 #define NO_BP -1
176 
180 typedef struct ngram_search_stats_s {
181  int32 n_phone_eval;
182  int32 n_root_chan_eval;
183  int32 n_nonroot_chan_eval;
184  int32 n_last_chan_eval;
185  int32 n_word_lastchan_eval;
186  int32 n_lastphn_cand_utt;
187  int32 n_fwdflat_chan;
188  int32 n_fwdflat_words;
189  int32 n_fwdflat_word_transition;
190  int32 n_senone_active_utt;
192 
193 
198  ps_search_t base;
199  ngram_model_t *lmset;
202  /* Flags to quickly indicate which passes are enabled. */
203  uint8 fwdtree;
204  uint8 fwdflat;
205  uint8 bestpath;
206 
207  /* State of procesing. */
208  uint8 done;
209 
210  /* Allocators */
211  listelem_alloc_t *chan_alloc;
212  listelem_alloc_t *root_chan_alloc;
213  listelem_alloc_t *latnode_alloc;
233  int32 n_root_chan;
247  bitvec_t *word_active;
265  int32 n_1ph_words;
276  int32 n_active_chan[2];
288  int32 n_active_word[2];
290  /*
291  * FIXME: Document all of these bits.
292  */
293  lastphn_cand_t *lastphn_cand;
294  int32 n_lastphn_cand;
295  last_ltrans_t *last_ltrans; /* one per word */
296  int32 cand_sf_alloc;
297  cand_sf_t *cand_sf;
298  bestbp_rc_t *bestbp_rc;
299 
300  bptbl_t *bp_table; /* Forward pass lattice */
301  int32 bpidx; /* First free BPTable entry */
302  int32 bp_table_size;
303  int32 *bscore_stack; /* Score stack for all possible right contexts */
304  int32 bss_head; /* First free BScoreStack entry */
305  int32 bscore_stack_size;
306 
308  int32 n_frame;
309  int32 *bp_table_idx; /* First BPTable entry for each frame */
310  int32 *word_lat_idx; /* BPTable index for any word in current frame;
311  cleared before each frame */
312 
313  /*
314  * Flat lexicon (2nd pass) search stuff.
315  */
318  bitvec_t *expand_word_flag;
319  int32 *expand_word_list;
320  int32 n_expand_words;
321  int32 min_ef_width;
322  int32 max_sf_win;
323  float32 fwdflat_fwdtree_lw_ratio;
324 
325  int32 best_score;
327  int32 renormalized;
328 
329  /*
330  * DAG (3rd pass) search stuff.
331  */
332  float32 bestpath_fwdtree_lw_ratio;
333  float32 ascale;
336  ptmr_t fwdtree_perf;
337  ptmr_t fwdflat_perf;
338  ptmr_t bestpath_perf;
339  int32 n_tot_frame;
340 
341  /* A collection of beam widths. */
342  int32 beam;
343  int32 dynamic_beam;
344  int32 pbeam;
345  int32 wbeam;
346  int32 lpbeam;
347  int32 lponlybeam;
348  int32 fwdflatbeam;
349  int32 fwdflatwbeam;
350  int32 fillpen;
351  int32 silpen;
352  int32 wip;
353  int32 nwpen;
354  int32 pip;
355  int32 maxwpf;
356  int32 maxhmmpf;
357 };
358 typedef struct ngram_search_s ngram_search_t;
359 
363 ps_search_t *ngram_search_init(cmd_ln_t *config,
364  acmod_t *acmod,
365  dict_t *dict,
366  dict2pid_t *d2p);
367 
371 void ngram_search_free(ps_search_t *ngs);
372 
378 int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx);
379 
383 void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w,
384  int32 score, int32 path, int32 rc);
385 
389 void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w);
390 
394 void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w);
395 
401 int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final);
402 
408 char const *ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx);
409 
413 void ngram_compute_seg_scores(ngram_search_t *ngs, float32 lwf);
414 
419 
423 int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone);
424 
425 #endif /* __NGRAM_SEARCH_H__ */