• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

include/ngram_model.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2007 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00043 #ifndef __NGRAM_MODEL_H__
00044 #define __NGRAM_MODEL_H__
00045 
00046 /* Win32/WinCE DLL gunk */
00047 #include <sphinxbase_export.h>
00048 #include <prim_type.h>
00049 #include <cmd_ln.h>
00050 #include <logmath.h>
00051 #include <mmio.h>
00052 #include <stdarg.h>
00053 
00054 #ifdef __cplusplus
00055 extern "C" {
00056 #endif
00057 #if 0
00058 /* Fool Emacs. */
00059 }
00060 #endif
00061 
00065 typedef struct ngram_model_s ngram_model_t;
00066 
00070 typedef struct ngram_class_s ngram_class_t;
00071 
00075 typedef enum ngram_file_type_e {
00076     NGRAM_INVALID = -1, 
00077     NGRAM_AUTO,  
00078     NGRAM_ARPA,  
00079     NGRAM_DMP,   
00080     NGRAM_DMP32, 
00081 } ngram_file_type_t;
00082 
00083 #define NGRAM_INVALID_WID -1 
00105 SPHINXBASE_EXPORT
00106 ngram_model_t *ngram_model_read(cmd_ln_t *config,
00107                                 const char *file_name,
00108                                 ngram_file_type_t file_type,
00109                                 logmath_t *lmath);
00110 
00116 SPHINXBASE_EXPORT
00117 int ngram_model_write(ngram_model_t *model, const char *file_name,
00118                       ngram_file_type_t format);
00119 
00125 SPHINXBASE_EXPORT
00126 ngram_file_type_t ngram_file_name_to_type(const char *file_name);
00127 
00133 SPHINXBASE_EXPORT
00134 ngram_file_type_t ngram_str_to_type(const char *str_name);
00135 
00142 SPHINXBASE_EXPORT
00143 char const *ngram_type_to_str(int type);
00144 
00150 SPHINXBASE_EXPORT
00151 ngram_model_t *ngram_model_retain(ngram_model_t *model);
00152 
00158 SPHINXBASE_EXPORT
00159 int ngram_model_free(ngram_model_t *model);
00160 
00177 SPHINXBASE_EXPORT
00178 int ngram_model_recode(ngram_model_t *model, const char *from, const char *to);
00179 
00183 typedef enum ngram_case_e {
00184     NGRAM_UPPER,
00185     NGRAM_LOWER
00186 } ngram_case_t;
00187 
00194 SPHINXBASE_EXPORT
00195 int ngram_model_casefold(ngram_model_t *model, int kase);
00196 
00208 SPHINXBASE_EXPORT
00209 int ngram_model_apply_weights(ngram_model_t *model,
00210                               float32 lw, float32 wip, float32 uw);
00211 
00220 SPHINXBASE_EXPORT
00221 float32 ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip,
00222                                 int32 *out_log_uw);
00223 
00256 SPHINXBASE_EXPORT
00257 int32 ngram_score(ngram_model_t *model, const char *word, ...);
00258 
00262 SPHINXBASE_EXPORT
00263 int32 ngram_tg_score(ngram_model_t *model,
00264                      int32 w3, int32 w2, int32 w1,
00265                      int32 *n_used);
00266 
00270 SPHINXBASE_EXPORT
00271 int32 ngram_bg_score(ngram_model_t *model,
00272                      int32 w2, int32 w1,
00273                      int32 *n_used);
00274 
00278 SPHINXBASE_EXPORT
00279 int32 ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history,
00280                      int32 n_hist, int32 *n_used);
00281 
00292 SPHINXBASE_EXPORT
00293 int32 ngram_prob(ngram_model_t *model, const char *word, ...);
00294 
00301 SPHINXBASE_EXPORT
00302 int32 ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history,
00303                     int32 n_hist, int32 *n_used);
00304 
00316 SPHINXBASE_EXPORT
00317 int32 ngram_score_to_prob(ngram_model_t *model, int32 score);
00318 
00322 SPHINXBASE_EXPORT
00323 int32 ngram_wid(ngram_model_t *model, const char *word);
00324 
00328 SPHINXBASE_EXPORT
00329 const char *ngram_word(ngram_model_t *model, int32 wid);
00330 
00344 SPHINXBASE_EXPORT
00345 int32 ngram_unknown_wid(ngram_model_t *model);
00346 
00350 SPHINXBASE_EXPORT
00351 int32 ngram_zero(ngram_model_t *model);
00352 
00356 SPHINXBASE_EXPORT
00357 int32 ngram_model_get_size(ngram_model_t *model);
00358 
00362 SPHINXBASE_EXPORT
00363 int32 const *ngram_model_get_counts(ngram_model_t *model);
00364 
00368 typedef struct ngram_iter_s ngram_iter_t;
00369 
00378 SPHINXBASE_EXPORT
00379 ngram_iter_t *ngram_model_mgrams(ngram_model_t *model, int m);
00380 
00384 SPHINXBASE_EXPORT
00385 ngram_iter_t *ngram_iter(ngram_model_t *model, const char *word, ...);
00386 
00390 SPHINXBASE_EXPORT
00391 ngram_iter_t *ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist);
00392 
00401 SPHINXBASE_EXPORT
00402 int32 const *ngram_iter_get(ngram_iter_t *itor,
00403                             int32 *out_score,
00404                             int32 *out_bowt);
00405 
00411 SPHINXBASE_EXPORT
00412 ngram_iter_t *ngram_iter_successors(ngram_iter_t *itor);
00413 
00417 SPHINXBASE_EXPORT
00418 ngram_iter_t *ngram_iter_next(ngram_iter_t *itor);
00419 
00423 SPHINXBASE_EXPORT
00424 void ngram_iter_free(ngram_iter_t *itor);
00425 
00438 SPHINXBASE_EXPORT
00439 int32 ngram_model_add_word(ngram_model_t *model,
00440                            const char *word, float32 weight);
00441 
00455 SPHINXBASE_EXPORT
00456 int32 ngram_model_read_classdef(ngram_model_t *model,
00457                                 const char *file_name);
00458 
00467 SPHINXBASE_EXPORT
00468 int32 ngram_model_add_class(ngram_model_t *model,
00469                             const char *classname,
00470                             float32 classweight,
00471                             char **words,
00472                             const float32 *weights,
00473                             int32 n_words);
00474 
00484 SPHINXBASE_EXPORT
00485 int32 ngram_model_add_class_word(ngram_model_t *model,
00486                                  const char *classname,
00487                                  const char *word,
00488                                  float32 weight);
00489 
00514 SPHINXBASE_EXPORT
00515 ngram_model_t *ngram_model_set_init(cmd_ln_t *config,
00516                                     ngram_model_t **models,
00517                                     char **names,
00518                                     const float32 *weights,
00519                                     int32 n_models);
00520 
00551 SPHINXBASE_EXPORT
00552 ngram_model_t *ngram_model_set_read(cmd_ln_t *config,
00553                                     const char *lmctlfile,
00554                                     logmath_t *lmath);
00555 
00559 SPHINXBASE_EXPORT
00560 int32 ngram_model_set_count(ngram_model_t *set);
00561 
00565 typedef struct ngram_model_set_iter_s ngram_model_set_iter_t;
00566 
00572 SPHINXBASE_EXPORT
00573 ngram_model_set_iter_t *ngram_model_set_iter(ngram_model_t *set);
00574 
00580 SPHINXBASE_EXPORT
00581 ngram_model_set_iter_t *ngram_model_set_iter_next(ngram_model_set_iter_t *itor);
00582 
00586 SPHINXBASE_EXPORT
00587 void ngram_model_set_iter_free(ngram_model_set_iter_t *itor);
00588 
00596 SPHINXBASE_EXPORT
00597 ngram_model_t *ngram_model_set_iter_model(ngram_model_set_iter_t *itor,
00598                                           char const **lmname);
00599 
00606 SPHINXBASE_EXPORT
00607 ngram_model_t *ngram_model_set_select(ngram_model_t *set,
00608                                       const char *name);
00609 
00616 SPHINXBASE_EXPORT
00617 ngram_model_t *ngram_model_set_lookup(ngram_model_t *set,
00618                                       const char *name);
00619 
00623 SPHINXBASE_EXPORT
00624 const char *ngram_model_set_current(ngram_model_t *set);
00625 
00633 SPHINXBASE_EXPORT
00634 ngram_model_t *ngram_model_set_interp(ngram_model_t *set,
00635                                       const char **names,
00636                                       const float32 *weights);
00637 
00650 SPHINXBASE_EXPORT
00651 ngram_model_t *ngram_model_set_add(ngram_model_t *set,
00652                                    ngram_model_t *model,
00653                                    const char *name,
00654                                    float32 weight,
00655                                    int reuse_widmap);
00656 
00665 SPHINXBASE_EXPORT
00666 ngram_model_t *ngram_model_set_remove(ngram_model_t *set,
00667                                       const char *name,
00668                                       int reuse_widmap);
00669 
00673 SPHINXBASE_EXPORT
00674 void ngram_model_set_map_words(ngram_model_t *set,
00675                                const char **words,
00676                                int32 n_words);
00677 
00685 SPHINXBASE_EXPORT
00686 int32 ngram_model_set_current_wid(ngram_model_t *set,
00687                                   int32 set_wid);
00688 
00698 SPHINXBASE_EXPORT
00699 int32 ngram_model_set_known_wid(ngram_model_t *set, int32 set_wid);
00700 
00708 SPHINXBASE_EXPORT
00709 void ngram_model_flush(ngram_model_t *lm);
00710 
00711 #ifdef __cplusplus
00712 }
00713 #endif
00714 
00715 
00716 #endif /* __NGRAM_MODEL_H__ */

Generated on Mon Aug 29 2011 for SphinxBase by  doxygen 1.7.1