00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "lm3g_model.h"
00044 #include "listelem_alloc.h"
00045 #include "ckd_alloc.h"
00046 #include "err.h"
00047
00048 #include <string.h>
00049 #include <assert.h>
00050 #include <limits.h>
00051
00052 void
00053 lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g)
00054 {
00055 if (lm3g->tginfo == NULL)
00056 return;
00057 listelem_alloc_free(lm3g->le);
00058 ckd_free(lm3g->tginfo);
00059 }
00060
00061 void
00062 lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g)
00063 {
00064 if (lm3g->tginfo == NULL)
00065 return;
00066 listelem_alloc_free(lm3g->le);
00067 memset(lm3g->tginfo, 0, base->n_counts[0] * sizeof(tginfo_t *));
00068 lm3g->le = listelem_alloc_init(sizeof(tginfo_t));
00069 }
00070
00071 void
00072 lm3g_apply_weights(ngram_model_t *base,
00073 lm3g_model_t *lm3g,
00074 float32 lw, float32 wip, float32 uw)
00075 {
00076 int32 log_wip, log_uw, log_uniform_weight;
00077 int i;
00078
00079
00080 log_wip = logmath_log(base->lmath, wip);
00081 log_uw = logmath_log(base->lmath, uw);
00082 log_uniform_weight = logmath_log(base->lmath, 1.0 - uw);
00083
00084 for (i = 0; i < base->n_counts[0]; ++i) {
00085 int32 prob1, bo_wt, n_used;
00086
00087
00088 bo_wt = (int32)(lm3g->unigrams[i].bo_wt1.l / base->lw);
00089
00090
00091 prob1 = ngram_ng_prob(base, i, NULL, 0, &n_used);
00092
00093 lm3g->unigrams[i].bo_wt1.l = (int32)(bo_wt * lw);
00094 if (strcmp(base->word_str[i], "<s>") == 0) {
00095
00096 lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip;
00097 }
00098 else {
00099
00100 prob1 += log_uw;
00101 prob1 = logmath_add(base->lmath, prob1, base->log_uniform + log_uniform_weight);
00102
00103 lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip;
00104 }
00105 }
00106
00107 for (i = 0; i < lm3g->n_prob2; ++i) {
00108 int32 prob2;
00109
00110 prob2 = (int32)((lm3g->prob2[i].l - base->log_wip) / base->lw);
00111 lm3g->prob2[i].l = (int32)(prob2 * lw) + log_wip;
00112 }
00113
00114 if (base->n > 2) {
00115 for (i = 0; i < lm3g->n_bo_wt2; ++i) {
00116 lm3g->bo_wt2[i].l = (int32)(lm3g->bo_wt2[i].l / base->lw * lw);
00117 }
00118 for (i = 0; i < lm3g->n_prob3; i++) {
00119 int32 prob3;
00120
00121 prob3 = (int32)((lm3g->prob3[i].l - base->log_wip) / base->lw);
00122 lm3g->prob3[i].l = (int32)(prob3 * lw) + log_wip;
00123 }
00124 }
00125
00126
00127 base->log_wip = log_wip;
00128 base->log_uw = log_uw;
00129 base->log_uniform_weight = log_uniform_weight;
00130 base->lw = lw;
00131 }
00132
00133 int32
00134 lm3g_add_ug(ngram_model_t *base,
00135 lm3g_model_t *lm3g, int32 wid, int32 lweight)
00136 {
00137 int32 score;
00138
00139
00140 assert(!NGRAM_IS_CLASSWID(wid));
00141
00142
00143 lm3g->unigrams = ckd_realloc(lm3g->unigrams,
00144 sizeof(*lm3g->unigrams) * base->n_1g_alloc);
00145 memset(lm3g->unigrams + base->n_counts[0], 0,
00146 (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->unigrams));
00147
00148 lm3g->tginfo = ckd_realloc(lm3g->tginfo,
00149 sizeof(*lm3g->tginfo) * base->n_1g_alloc);
00150 memset(lm3g->tginfo + base->n_counts[0], 0,
00151 (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->tginfo));
00152
00153
00154
00155
00156
00157 score = lweight + base->log_uniform + base->log_uw;
00158 score = logmath_add(base->lmath, score,
00159 base->log_uniform + base->log_uniform_weight);
00160 lm3g->unigrams[wid].prob1.l = score;
00161
00162
00163 lm3g->unigrams[wid].bo_wt1.l = logmath_get_zero(base->lmath);
00164 lm3g->unigrams[wid].bigrams = 0;
00165
00166 ++base->n_counts[0];
00167
00168
00169
00170
00171 if (wid >= base->n_counts[0])
00172 base->n_counts[0] = wid + 1;
00173
00174 return score;
00175 }
00176
00177 void
00178 init_sorted_list(sorted_list_t * l)
00179 {
00180
00181 l->list = ckd_calloc(MAX_SORTED_ENTRIES,
00182 sizeof(sorted_entry_t));
00183 l->list[0].val.l = INT_MIN;
00184 l->list[0].lower = 0;
00185 l->list[0].higher = 0;
00186 l->free = 1;
00187 }
00188
00189 void
00190 free_sorted_list(sorted_list_t * l)
00191 {
00192 free(l->list);
00193 }
00194
00195 lmprob_t *
00196 vals_in_sorted_list(sorted_list_t * l)
00197 {
00198 lmprob_t *vals;
00199 int32 i;
00200
00201 vals = ckd_calloc(l->free, sizeof(lmprob_t));
00202 for (i = 0; i < l->free; i++)
00203 vals[i] = l->list[i].val;
00204 return (vals);
00205 }
00206
00207 int32
00208 sorted_id(sorted_list_t * l, int32 *val)
00209 {
00210 int32 i = 0;
00211
00212 for (;;) {
00213 if (*val == l->list[i].val.l)
00214 return (i);
00215 if (*val < l->list[i].val.l) {
00216 if (l->list[i].lower == 0) {
00217 if (l->free >= MAX_SORTED_ENTRIES) {
00218
00219 E_WARN("sorted list overflow (%d => %d)\n",
00220 *val, l->list[i].val.l);
00221 return i;
00222 }
00223
00224 l->list[i].lower = l->free;
00225 (l->free)++;
00226 i = l->list[i].lower;
00227 l->list[i].val.l = *val;
00228 return (i);
00229 }
00230 else
00231 i = l->list[i].lower;
00232 }
00233 else {
00234 if (l->list[i].higher == 0) {
00235 if (l->free >= MAX_SORTED_ENTRIES) {
00236
00237 E_WARN("sorted list overflow (%d => %d)\n",
00238 *val, l->list[i].val);
00239 return i;
00240 }
00241
00242 l->list[i].higher = l->free;
00243 (l->free)++;
00244 i = l->list[i].higher;
00245 l->list[i].val.l = *val;
00246 return (i);
00247 }
00248 else
00249 i = l->list[i].higher;
00250 }
00251 }
00252 }
00253