• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libsphinxbase/fe/fe_internal.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1996-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 #ifndef __FE_INTERNAL_H__
00039 #define __FE_INTERNAL_H__
00040 
00041 #ifdef HAVE_CONFIG_H
00042 #include <config.h>
00043 #endif
00044 
00045 #include <fe.h>
00046 #include <fixpoint.h>
00047 
00048 #ifdef __cplusplus
00049 extern "C" {
00050 #endif
00051 #if 0
00052 /* Fool Emacs. */
00053 }
00054 #endif
00055 
00056 #ifdef FIXED16
00057 /* Q15 format */
00058 typedef int16 frame_t;
00059 typedef int16 window_t;
00060 typedef int32 powspec_t;
00061 typedef struct { int16 r, i; } complex;
00062 #elif defined(FIXED_POINT)
00063 typedef fixed32 frame_t;
00064 typedef int32 powspec_t;
00065 typedef fixed32 window_t;
00066 typedef struct { fixed32 r, i; } complex;
00067 #else /* FIXED_POINT */
00068 typedef float64 frame_t;
00069 typedef float64 powspec_t;
00070 typedef float64 window_t;
00071 typedef struct { float64 r, i; } complex;
00072 #endif /* FIXED_POINT */
00073 
00074 /* Values for the 'logspec' field. */
00075 enum {
00076         RAW_LOG_SPEC = 1,
00077         SMOOTH_LOG_SPEC = 2
00078 };
00079 
00080 /* Values for the 'transform' field. */
00081 enum {
00082         LEGACY_DCT = 0,
00083         DCT_II = 1,
00084         DCT_HTK = 2
00085 };
00086 
00087 typedef struct melfb_s melfb_t;
00089 struct melfb_s {
00090     float32 sampling_rate;
00091     int32 num_cepstra;
00092     int32 num_filters;
00093     int32 fft_size;
00094     float32 lower_filt_freq;
00095     float32 upper_filt_freq;
00096     /* DCT coefficients. */
00097     mfcc_t **mel_cosine;
00098     /* Filter coefficients. */
00099     mfcc_t *filt_coeffs;
00100     int16 *spec_start;
00101     int16 *filt_start;
00102     int16 *filt_width;
00103     /* Luxury mobile home. */
00104     int32 doublewide;
00105     char const *warp_type;
00106     char const *warp_params;
00107     uint32 warp_id;
00108     /* Precomputed normalization constants for unitary DCT-II/DCT-III */
00109     mfcc_t sqrt_inv_n, sqrt_inv_2n;
00110     /* Value and coefficients for HTK-style liftering */
00111     int32 lifter_val;
00112     mfcc_t *lifter;
00113     /* Normalize filters to unit area */
00114     int32 unit_area;
00115     /* Round filter frequencies to DFT points (hurts accuracy, but is
00116        useful for legacy purposes) */
00117     int32 round_filters;
00118 };
00119 
00120 /* sqrt(1/2), also used for unitary DCT-II/DCT-III */
00121 #define SQRT_HALF FLOAT2MFCC(0.707106781186548)
00122 
00124 struct fe_s {
00125     cmd_ln_t *config;
00126     int refcount;
00127 
00128     float32 sampling_rate;
00129     int16 frame_rate;
00130     int16 frame_shift;
00131 
00132     float32 window_length;
00133     int16 frame_size;
00134     int16 fft_size;
00135 
00136     uint8 fft_order;
00137     uint8 feature_dimension;
00138     uint8 num_cepstra;
00139     uint8 remove_dc;
00140     uint8 log_spec;
00141     uint8 swap;
00142     uint8 dither;
00143     uint8 transform;
00144 
00145     float32 pre_emphasis_alpha;
00146     int32 seed;
00147 
00148     int16 frame_counter;
00149     uint8 start_flag;
00150     uint8 reserved;
00151 
00152     /* Twiddle factors for FFT. */
00153     frame_t *ccc, *sss;
00154     /* Mel filter parameters. */
00155     melfb_t *mel_fb;
00156     /* Half of a Hamming Window. */
00157     window_t *hamming_window;
00158 
00159     /* Temporary buffers for processing. */
00160     /* FIXME: too many of these. */
00161     int16 *spch;
00162     frame_t *frame;
00163     powspec_t *spec, *mfspec;
00164     int16 *overflow_samps;
00165     int16 num_overflow_samps;    
00166     int16 prior;
00167 };
00168 
00169 #define BB_SAMPLING_RATE 16000
00170 #define DEFAULT_BB_FFT_SIZE 512
00171 #define DEFAULT_BB_FRAME_SHIFT 160
00172 #define DEFAULT_BB_NUM_FILTERS 40
00173 #define DEFAULT_BB_LOWER_FILT_FREQ 133.33334
00174 #define DEFAULT_BB_UPPER_FILT_FREQ 6855.4976
00175 
00176 #define NB_SAMPLING_RATE 8000
00177 #define DEFAULT_NB_FFT_SIZE 256
00178 #define DEFAULT_NB_FRAME_SHIFT 80
00179 #define DEFAULT_NB_NUM_FILTERS 31
00180 #define DEFAULT_NB_LOWER_FILT_FREQ 200
00181 #define DEFAULT_NB_UPPER_FILT_FREQ 3500
00182 
00183 void fe_init_dither(int32 seed);
00184 
00185 /* Apply 1/2 bit noise to a buffer of audio. */
00186 int32 fe_dither(int16 *buffer, int32 nsamps);
00187 
00188 /* Load a frame of data into the fe. */
00189 int fe_read_frame(fe_t *fe, int16 const *in, int32 len);
00190 
00191 /* Shift the input buffer back and read more data. */
00192 int fe_shift_frame(fe_t *fe, int16 const *in, int32 len);
00193 
00194 /* Process a frame of data into features. */
00195 int32 fe_write_frame(fe_t *fe, mfcc_t *fea);
00196 
00197 /* Initialization functions. */
00198 int32 fe_build_melfilters(melfb_t *MEL_FB);
00199 int32 fe_compute_melcosine(melfb_t *MEL_FB);
00200 void fe_create_hamming(window_t *in, int32 in_len);
00201 void fe_create_twiddle(fe_t *fe);
00202 
00203 /* Miscellaneous processing functions. */
00204 void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep);
00205 void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk);
00206 void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec);
00207 
00208 #ifdef __cplusplus
00209 }
00210 #endif
00211 
00212 #endif /* __FE_INTERNAL_H__ */

Generated on Mon Aug 29 2011 for SphinxBase by  doxygen 1.7.1