• Main Page
  • Data Structures
  • Files
  • File List
  • Globals

include/cmdln_macro.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2006 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* cmdln_macro.h - Command line definitions for PocketSphinx */
00039 
00040 #ifndef __PS_CMDLN_MACRO_H__
00041 #define __PS_CMDLN_MACRO_H__
00042 
00043 #include <cmd_ln.h>
00044 #include <feat.h>
00045 #include <fe.h>
00046 
00048 #define POCKETSPHINX_OPTIONS \
00049     waveform_to_cepstral_command_line_macro(), \
00050     cepstral_to_feature_command_line_macro(), \
00051     POCKETSPHINX_ACMOD_OPTIONS, \
00052         POCKETSPHINX_BEAM_OPTIONS,   \
00053         POCKETSPHINX_SEARCH_OPTIONS, \
00054         POCKETSPHINX_DICT_OPTIONS, \
00055         POCKETSPHINX_NGRAM_OPTIONS, \
00056         POCKETSPHINX_FSG_OPTIONS, \
00057         POCKETSPHINX_DEBUG_OPTIONS
00058 
00060 #define POCKETSPHINX_DEBUG_OPTIONS                      \
00061     { "-logfn",                                         \
00062             ARG_STRING,                                 \
00063             NULL,                                       \
00064             "File to write log messages in"             \
00065      },                                                 \
00066     { "-debug",                                         \
00067             ARG_INT32,                                  \
00068             NULL,                                       \
00069             "Verbosity level for debugging messages"    \
00070      },                                                 \
00071      { "-mfclogdir",                                    \
00072              ARG_STRING,                                \
00073              NULL,                                      \
00074              "Directory to log feature files to"        \
00075              },                                         \
00076     { "-rawlogdir",                                     \
00077             ARG_STRING,                                 \
00078             NULL,                                       \
00079             "Directory to log raw audio files to" }
00080 
00082 #define POCKETSPHINX_BEAM_OPTIONS                                       \
00083 { "-beam",                                                              \
00084       ARG_FLOAT64,                                                      \
00085       "1e-48",                                                          \
00086       "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
00087 { "-wbeam",                                                             \
00088       ARG_FLOAT64,                                                      \
00089       "7e-29",                                                          \
00090       "Beam width applied to word exits" },                             \
00091 { "-pbeam",                                                             \
00092       ARG_FLOAT64,                                                      \
00093       "1e-48",                                                          \
00094       "Beam width applied to phone transitions" },                      \
00095 { "-lpbeam",                                                            \
00096       ARG_FLOAT64,                                                      \
00097       "1e-40",                                                          \
00098       "Beam width applied to last phone in words" },                    \
00099 { "-lponlybeam",                                                        \
00100       ARG_FLOAT64,                                                      \
00101       "7e-29",                                                          \
00102       "Beam width applied to last phone in single-phone words" },       \
00103 { "-fwdflatbeam",                                                       \
00104       ARG_FLOAT64,                                                      \
00105       "1e-64",                                                          \
00106       "Beam width applied to every frame in second-pass flat search" }, \
00107 { "-fwdflatwbeam",                                                      \
00108       ARG_FLOAT64,                                                      \
00109       "7e-29",                                                          \
00110       "Beam width applied to word exits in second-pass flat search" },  \
00111 { "-pl_window",                                                         \
00112       ARG_INT32,                                                        \
00113       "0",                                                              \
00114       "Phoneme lookahead window size, in frames" },                     \
00115 { "-pl_beam",                                                           \
00116       ARG_FLOAT64,                                                      \
00117       "1e-10",                                                          \
00118       "Beam width applied to phone loop search for lookahead" },        \
00119 { "-pl_pbeam",                                                          \
00120       ARG_FLOAT64,                                                      \
00121       "1e-5",                                                           \
00122       "Beam width applied to phone loop transitions for lookahead" }
00123 
00125 #define POCKETSPHINX_SEARCH_OPTIONS \
00126 { "-compallsen",                                                                                \
00127       ARG_BOOLEAN,                                                                              \
00128       "no",                                                                                     \
00129       "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
00130 { "-fwdtree",                                                                                   \
00131       ARG_BOOLEAN,                                                                              \
00132       "yes",                                                                                    \
00133       "Run forward lexicon-tree search (1st pass)" },                                           \
00134 { "-fwdflat",                                                                                   \
00135       ARG_BOOLEAN,                                                                              \
00136       "yes",                                                                                    \
00137       "Run forward flat-lexicon search over word lattice (2nd pass)" },                         \
00138 { "-bestpath",                                                                                  \
00139       ARG_BOOLEAN,                                                                              \
00140       "yes",                                                                                    \
00141       "Run bestpath (Dijkstra) search over word lattice (3rd pass)" },                          \
00142 { "-backtrace",                                                                                 \
00143       ARG_BOOLEAN,                                                                              \
00144       "no",                                                                                     \
00145       "Print results and backtraces to log file." },                                            \
00146 { "-latsize",                                                                                   \
00147       ARG_INT32,                                                                                \
00148       "5000",                                                                                   \
00149       "Initial backpointer table size" },                                                       \
00150 { "-maxwpf",                                                                                    \
00151       ARG_INT32,                                                                                \
00152       "-1",                                                                                     \
00153       "Maximum number of distinct word exits at each frame (or -1 for no pruning)" },           \
00154 { "-maxhmmpf",                                                                                  \
00155       ARG_INT32,                                                                                \
00156       "-1",                                                                                     \
00157       "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" },       \
00158 { "-fwdflatefwid",                                                                              \
00159       ARG_INT32,                                                                                \
00160       "4",                                                                                      \
00161       "Minimum number of end frames for a word to be searched in fwdflat search" },             \
00162 { "-fwdflatsfwin",                                                                              \
00163       ARG_INT32,                                                                                \
00164       "25",                                                                                     \
00165       "Window of frames in lattice to search for successor words in fwdflat search " }
00166 
00168 #define POCKETSPHINX_FSG_OPTIONS \
00169     { "-fsg",                                                   \
00170             ARG_STRING,                                         \
00171             NULL,                                               \
00172             "Sphinx format finite state grammar file"},         \
00173 { "-jsgf",                                                      \
00174         ARG_STRING,                                             \
00175         NULL,                                                   \
00176         "JSGF grammar file" },                                  \
00177 { "-toprule",                                                   \
00178         ARG_STRING,                                             \
00179         NULL,                                                   \
00180         "Start rule for JSGF (first public rule is default)" }, \
00181 { "-fsgusealtpron",                                             \
00182         ARG_BOOLEAN,                                            \
00183         "yes",                                                  \
00184         "Add alternate pronunciations to FSG"},                 \
00185 { "-fsgusefiller",                                              \
00186         ARG_BOOLEAN,                                            \
00187         "yes",                                                  \
00188         "Insert filler words at each state."}
00189 
00191 #define POCKETSPHINX_NGRAM_OPTIONS \
00192 { "-lm",                                                                                \
00193       ARG_STRING,                                                                       \
00194       NULL,                                                                             \
00195       "Word trigram language model input file" },                                       \
00196 { "-lmctl",                                                                             \
00197       ARG_STRING,                                                                       \
00198       NULL,                                                                             \
00199       "Specify a set of language model\n"},                                             \
00200 { "-lmname",                                                                            \
00201       ARG_STRING,                                                                       \
00202       "default",                                                                        \
00203       "Which language model in -lmctl to use by default"},                              \
00204 { "-lw",                                                                                \
00205       ARG_FLOAT32,                                                                      \
00206       "6.5",                                                                            \
00207       "Language model probability weight" },                                            \
00208 { "-fwdflatlw",                                                                         \
00209       ARG_FLOAT32,                                                                      \
00210       "8.5",                                                                            \
00211       "Language model probability weight for flat lexicon (2nd pass) decoding" },       \
00212 { "-bestpathlw",                                                                        \
00213       ARG_FLOAT32,                                                                      \
00214       "9.5",                                                                            \
00215       "Language model probability weight for bestpath search" },                        \
00216 { "-ascale",                                                                            \
00217       ARG_FLOAT32,                                                                      \
00218       "20.0",                                                                           \
00219       "Inverse of acoustic model scale for confidence score calculation" },             \
00220 { "-wip",                                                                               \
00221       ARG_FLOAT32,                                                                      \
00222       "0.65",                                                                           \
00223       "Word insertion penalty" },                                                       \
00224 { "-nwpen",                                                                             \
00225       ARG_FLOAT32,                                                                      \
00226       "1.0",                                                                            \
00227       "New word transition penalty" },                                                  \
00228 { "-pip",                                                                               \
00229       ARG_FLOAT32,                                                                      \
00230       "1.0",                                                                            \
00231       "Phone insertion penalty" },                                                      \
00232 { "-uw",                                                                                \
00233       ARG_FLOAT32,                                                                      \
00234       "1.0",                                                                            \
00235       "Unigram weight" },                                                               \
00236 { "-silprob",                                                                           \
00237       ARG_FLOAT32,                                                                      \
00238       "0.005",                                                                          \
00239       "Silence word transition probability" },                                          \
00240 { "-fillprob",                                                                          \
00241       ARG_FLOAT32,                                                                      \
00242       "1e-8",                                                                           \
00243         "Filler word transition probability" }, \
00244 { "-bghist",   \
00245       ARG_BOOLEAN, \
00246       "no", \
00247       "Bigram-mode: If TRUE only one BP entry/frame; else one per LM state" }, \
00248 { "-lextreedump", \
00249       ARG_INT32, \
00250       "0", \
00251       "Whether to dump the lextree structure to stderr (for debugging), 1 for Ravi's format, 2 for Dot format, Larger than 2 will be treated as Ravi's format" }
00252 
00254 #define POCKETSPHINX_DICT_OPTIONS \
00255     { "-dict",                                                  \
00256       REQARG_STRING,                                            \
00257       NULL,                                                     \
00258       "Main pronunciation dictionary (lexicon) input file" },   \
00259     { "-fdict",                                                 \
00260       ARG_STRING,                                               \
00261       NULL,                                                     \
00262       "Noise word pronunciation dictionary input file" },       \
00263     { "-dictcase",                                              \
00264       ARG_BOOLEAN,                                              \
00265       "no",                                                     \
00266       "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" },     \
00267     { "-maxnewoov",                                             \
00268       ARG_INT32,                                                \
00269       "20",                                                     \
00270       "Maximum new OOVs that can be added at run time" },       \
00271     { "-usewdphones",                                           \
00272       ARG_BOOLEAN,                                              \
00273       "no",                                                     \
00274       "Use within-word phones only" }
00275 
00277 #define POCKETSPHINX_ACMOD_OPTIONS \
00278 { "-hmm",                                                                       \
00279       ARG_STRING,                                                               \
00280       NULL,                                                                     \
00281       "Directory containing acoustic model files."},                            \
00282 { "-featparams",                                                                \
00283       ARG_STRING,                                                               \
00284       NULL,                                                                     \
00285       "File containing feature extraction parameters."},                        \
00286 { "-mdef",                                                                      \
00287       ARG_STRING,                                                               \
00288       NULL,                                                                     \
00289       "Model definition input file" },                                          \
00290 { "-senmgau", \
00291       ARG_STRING,                                                               \
00292       NULL,                                                                     \
00293       "Senone to codebook mapping input file (usually not needed)" }, \
00294 { "-tmat",                                                                      \
00295       ARG_STRING,                                                               \
00296       NULL,                                                                     \
00297       "HMM state transition matrix input file" },                               \
00298 { "-tmatfloor",                                                                 \
00299       ARG_FLOAT32,                                                              \
00300       "0.0001",                                                                 \
00301       "HMM state transition probability floor (applied to -tmat file)" },       \
00302 { "-mean",                                                                      \
00303       ARG_STRING,                                                               \
00304       NULL,                                                                     \
00305       "Mixture gaussian means input file" },                                    \
00306 { "-var",                                                                       \
00307       ARG_STRING,                                                               \
00308       NULL,                                                                     \
00309       "Mixture gaussian variances input file" },                                \
00310 { "-varfloor",                                                                  \
00311       ARG_FLOAT32,                                                              \
00312       "0.0001",                                                                 \
00313       "Mixture gaussian variance floor (applied to data from -var file)" },     \
00314 { "-mixw",                                                                      \
00315       ARG_STRING,                                                               \
00316       NULL,                                                                     \
00317       "Senone mixture weights input file (uncompressed)" },                     \
00318 { "-mixwfloor",                                                                 \
00319       ARG_FLOAT32,                                                              \
00320       "0.0000001",                                                              \
00321       "Senone mixture weights floor (applied to data from -mixw file)" },       \
00322 { "-sendump",                                                                   \
00323       ARG_STRING,                                                               \
00324       NULL,                                                                     \
00325       "Senone dump (compressed mixture weights) input file" },                  \
00326 { "-mllr",                                                                      \
00327       ARG_STRING,                                                               \
00328       NULL,                                                                     \
00329       "MLLR transformation to apply to means and variances" },                  \
00330 { "-mmap",                                                                      \
00331       ARG_BOOLEAN,                                                              \
00332       "yes",                                                                    \
00333       "Use memory-mapped I/O (if possible) for model files" },                  \
00334 { "-ds",                                                                        \
00335       ARG_INT32,                                                                \
00336       "1",                                                                      \
00337       "Frame GMM computation downsampling ratio" },                             \
00338 { "-topn",                                                                      \
00339       ARG_INT32,                                                                \
00340       "4",                                                                      \
00341       "Maximum number of top Gaussians to use in scoring." },                   \
00342 { "-topn_beam",                                                                 \
00343       ARG_STRING,                                                               \
00344       "0",                                                                     \
00345       "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
00346 { "-kdtree",                                                                    \
00347       ARG_STRING,                                                               \
00348       NULL,                                                                     \
00349       "kd-Tree file for Gaussian selection" },                                  \
00350 { "-kdmaxdepth",                                                                \
00351       ARG_INT32,                                                                \
00352       "0",                                                                      \
00353       "Maximum depth of kd-Trees to use" },                                     \
00354 { "-kdmaxbbi",                                                                  \
00355       ARG_INT32,                                                                \
00356       "-1",                                                                     \
00357       "Maximum number of Gaussians per leaf node in kd-Trees" },                \
00358 { "-logbase",                                                                   \
00359       ARG_FLOAT32,                                                              \
00360       "1.0001",                                                                 \
00361       "Base in which all log-likelihoods calculated" }
00362 
00363 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
00364 
00365 #endif /* __PS_CMDLN_MACRO_H__ */

Generated on Tue Aug 17 2010 for PocketSphinx by  doxygen 1.7.1