SphinxBase  0.6
lm3g_model.h
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * \file lm3g_model.h Core Sphinx 3-gram code used in
39  * DMP/DMP32/ARPA (for now) model code.
40  *
41  * Author: A cast of thousands, probably.
42  */
43 
44 #ifndef __NGRAM_MODEL_LM3G_H__
45 #define __NGRAM_MODEL_LM3G_H__
46 
48 
49 #include "ngram_model_internal.h"
50 
54 typedef union {
55  float32 f;
56  int32 l;
57 } lmprob_t;
58 
68 typedef struct sorted_entry_s {
70  uint16 lower;
73  uint16 higher;
77 
82 typedef struct {
83  sorted_entry_t *list;
84  int32 free;
86 
87 #define MAX_SORTED_ENTRIES 65534
88 
92 typedef struct unigram_s {
95  int32 bigrams;
96 } unigram_t;
97 
101 typedef struct bigram_s bigram_t;
105 typedef struct trigram_s trigram_t;
106 
107 
108 /*
109  * To conserve space, bigram info is kept in many tables. Since the number
110  * of distinct values << #bigrams, these table indices can be 16-bit values.
111  * prob2 and bo_wt2 are such indices, but keeping trigram index is less easy.
112  * It is supposed to be the index of the first trigram entry for each bigram.
113  * But such an index cannot be represented in 16-bits, hence the following
114  * segmentation scheme: Partition bigrams into segments of BG_SEG_SZ
115  * consecutive entries, such that #trigrams in each segment <= 2**16 (the
116  * corresponding trigram segment). The bigram_t.trigrams value is then a
117  * 16-bit relative index within the trigram segment. A separate table--
118  * lm_t.tseg_base--has the index of the 1st trigram for each bigram segment.
119  */
120 #define BG_SEG_SZ 512 /* chosen so that #trigram/segment <= 2**16 */
121 #define LOG_BG_SEG_SZ 9
122 
130 typedef struct tginfo_s {
131  int32 w1;
133  int32 n_tg;
134  int32 bowt;
135  int32 used;
137  struct tginfo_s *next;
138 } tginfo_t;
139 
143 typedef struct lm3g_model_s {
144  unigram_t *unigrams;
145  bigram_t *bigrams;
146  trigram_t *trigrams;
148  int32 n_prob2;
150  int32 n_bo_wt2;
152  int32 n_prob3;
153  int32 *tseg_base;
158 } lm3g_model_t;
159 
160 void lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g);
161 void lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g);
162 void lm3g_apply_weights(ngram_model_t *base,
163  lm3g_model_t *lm3g,
164  float32 lw, float32 wip, float32 uw);
165 int32 lm3g_add_ug(ngram_model_t *base,
166  lm3g_model_t *lm3g, int32 wid, int32 lweight);
167 
168 
173 void init_sorted_list(sorted_list_t *l);
174 void free_sorted_list(sorted_list_t *l);
175 lmprob_t *vals_in_sorted_list(sorted_list_t *l);
176 int32 sorted_id(sorted_list_t * l, int32 *val);
177 
178 #endif /* __NGRAM_MODEL_LM3G_H__ */