• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

include/feat.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * feat.h -- Cepstral features computation.
00039  *
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1999 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.1  2006/04/05  20:27:30  dhdfu
00050  * A Great Reorganzation of header files and executables
00051  * 
00052  * Revision 1.17  2006/02/23 03:59:40  arthchan2003
00053  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
00054  *
00055  * Revision 1.16.4.1  2005/07/05 06:25:08  arthchan2003
00056  * Fixed dox-doc.
00057  *
00058  * Revision 1.16  2005/06/22 03:29:35  arthchan2003
00059  * Makefile.am s  for all subdirectory of libs3decoder/
00060  *
00061  * Revision 1.5  2005/06/13 04:02:56  archan
00062  * Fixed most doxygen-style documentation under libs3decoder.
00063  *
00064  * Revision 1.4  2005/04/21 23:50:26  archan
00065  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
00066  *
00067  * Revision 1.3  2005/03/30 01:22:46  archan
00068  * Fixed mistakes in last updates. Add
00069  *
00070  * 
00071  * 20.Apr.2001  RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
00072  *              Adding feat_free() to free allocated memory
00073  * 
00074  * 04-Jan-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00075  *              Started.
00076  */
00077 
00078 
00079 #ifndef _S3_FEAT_H_
00080 #define _S3_FEAT_H_
00081 
00082 #include <stdio.h>
00083 
00084 /* Win32/WinCE DLL gunk */
00085 #include <sphinxbase_export.h>
00086 #include <prim_type.h>
00087 #include <fe.h>
00088 #include <cmn.h>
00089 #include <agc.h>
00090 
00091 #ifdef __cplusplus
00092 extern "C" {
00093 #endif
00094 #if 0
00095 /* Fool Emacs. */
00096 }
00097 #endif
00098 
00102 #define LIVEBUFBLOCKSIZE        256    
00104 #define S3_MAX_FRAMES           15000    /* RAH, I believe this is still too large, but better than before */
00105 
00106 #define cepstral_to_feature_command_line_macro()                        \
00107 { "-feat",                                                              \
00108       ARG_STRING,                                                       \
00109       "1s_c_d_dd",                                                      \
00110       "Feature stream type, depends on the acoustic model" },           \
00111 { "-ceplen",                                                            \
00112       ARG_INT32,                                                        \
00113       "13",                                                             \
00114      "Number of components in the input feature vector" },              \
00115 { "-cmn",                                                               \
00116       ARG_STRING,                                                       \
00117       "current",                                                        \
00118       "Cepstral mean normalization scheme ('current', 'prior', or 'none')" }, \
00119 { "-cmninit",                                                           \
00120       ARG_STRING,                                                       \
00121       "8.0",                                                            \
00122       "Initial values (comma-separated) for cepstral mean when 'prior' is used" }, \
00123 { "-varnorm",                                                           \
00124       ARG_BOOLEAN,                                                      \
00125       "no",                                                             \
00126       "Variance normalize each utterance (only if CMN == current)" },   \
00127 { "-agc",                                                               \
00128       ARG_STRING,                                                       \
00129       "none",                                                           \
00130       "Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')" }, \
00131 { "-agcthresh",                                                         \
00132       ARG_FLOAT32,                                                      \
00133       "2.0",                                                            \
00134       "Initial threshold for automatic gain control" },                 \
00135 { "-lda",                                                               \
00136       ARG_STRING,                                                       \
00137       NULL,                                                             \
00138       "File containing transformation matrix to be applied to features (single-stream features only)" }, \
00139 { "-ldadim",                                                            \
00140       ARG_INT32,                                                        \
00141       "0",                                                              \
00142       "Dimensionality of output of feature transformation (0 to use entire matrix)" }, \
00143 {"-svspec",                                                             \
00144      ARG_STRING,                                                        \
00145      NULL,                                                           \
00146      "Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)"}
00147 
00155 typedef struct feat_s {
00156     int refcount;       
00157     char *name;         
00158     int32 cepsize;      
00159     int32 n_stream;     
00160     int32 *stream_len;  
00161     int32 window_size;  
00163     int32 n_sv;         
00164     int32 *sv_len;      
00165     int32 **subvecs;    
00166     mfcc_t *sv_buf;      
00167     int32 sv_dim;       
00169     cmn_type_t cmn;     
00170     int32 varnorm;      
00172     agc_type_t agc;     
00186     void (*compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat);
00187     cmn_t *cmn_struct;  
00189     agc_t *agc_struct;  
00192     mfcc_t **cepbuf;    
00193     mfcc_t **tmpcepbuf; 
00194     int32   bufpos;     
00195     int32   curpos;     
00197     mfcc_t ***lda; 
00198     uint32 n_lda;   
00199     uint32 out_dim; 
00200 } feat_t;
00201 
00205 #define feat_name(f)            ((f)->name)
00206 
00209 #define feat_cepsize(f)         ((f)->cepsize)
00210 
00213 #define feat_window_size(f)     ((f)->window_size)
00214 
00219 #define feat_n_stream(f)        ((f)->n_stream)
00220 
00225 #define feat_stream_len(f,i)    ((f)->stream_len[i])
00226 
00229 #define feat_dimension1(f)      ((f)->n_sv ? (f)->n_sv : f->n_stream)
00230 
00233 #define feat_dimension2(f,i)    ((f)->sv_len ? (f)->sv_len[i] : f->stream_len[i])
00234 
00237 #define feat_dimension(f)       ((f)->out_dim)
00238 
00261 SPHINXBASE_EXPORT
00262 int32 **parse_subvecs(char const *str);
00263 
00267 SPHINXBASE_EXPORT
00268 void subvecs_free(int32 **subvecs);
00269 
00278 SPHINXBASE_EXPORT
00279 int32 feat_s2mfc_read(char *file,       
00280                 int32 win,              
00282                 int32 sf, int32 ef,     
00285                 mfcc_t ***out_mfc,      
00288                 int32 maxfr,            
00291                 int32 cepsize           
00292     );
00293 
00306 SPHINXBASE_EXPORT
00307 mfcc_t ***feat_array_alloc(feat_t *fcb, 
00309                            int32 nfr    
00310     );
00311 
00315 SPHINXBASE_EXPORT
00316 void feat_array_free(mfcc_t ***feat);
00317 
00318 
00334 SPHINXBASE_EXPORT
00335 feat_t *feat_init(char const *type,
00336                   cmn_type_t cmn, 
00339                   int32 varnorm,  
00342                   agc_type_t agc, 
00344                   int32 breport, 
00345                   int32 cepsize  
00348     );
00349 
00354 SPHINXBASE_EXPORT
00355 int32 feat_read_lda(feat_t *feat,        
00356                     const char *ldafile, 
00357                     int32 dim            
00358     );
00359 
00363 SPHINXBASE_EXPORT
00364 void feat_lda_transform(feat_t *fcb,            
00365                         mfcc_t ***inout_feat,   
00366                         uint32 nfr              
00367     );
00368 
00387 SPHINXBASE_EXPORT
00388 int feat_set_subvecs(feat_t *fcb, int32 **subvecs);
00389 
00393 SPHINXBASE_EXPORT
00394 void feat_print(feat_t *fcb,            
00395                 mfcc_t ***feat,         
00396                 int32 nfr,              
00397                 FILE *fp                
00398     );
00399 
00400   
00417 SPHINXBASE_EXPORT
00418 int32 feat_s2mfc2feat(feat_t *fcb,      
00419                       const char *file, 
00420                       const char *dir,  
00422                       const char *cepext,
00425                       int32 sf, int32 ef,   /* Start/End frames
00426                                                within file to be read. Use
00427                                                0,-1 to process entire
00428                                                file */
00429                       mfcc_t ***feat,   
00431                       int32 maxfr       
00435     );
00436 
00437 
00466 SPHINXBASE_EXPORT
00467 int32 feat_s2mfc2feat_live(feat_t  *fcb,     
00468                            mfcc_t **uttcep,  
00469                            int32 *inout_ncep,
00471                            int32 beginutt,   
00472                            int32 endutt,     
00473                            mfcc_t ***ofeat   
00476     );
00477 
00478 
00484 SPHINXBASE_EXPORT
00485 feat_t *feat_retain(feat_t *f);
00486 
00492 SPHINXBASE_EXPORT
00493 int feat_free(feat_t *f 
00494     );
00495 
00499 SPHINXBASE_EXPORT
00500 void feat_report(feat_t *f 
00501     );
00502 #ifdef __cplusplus
00503 }
00504 #endif
00505 
00506 
00507 #endif

Generated on Mon Aug 29 2011 for SphinxBase by  doxygen 1.7.1