MeCab
/home/taku/mecab/mecab/src/mecab.h
Go to the documentation of this file.
00001 /*
00002   MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
00003 
00004   Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
00005   Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
00006 */
00007 #ifndef MECAB_MECAB_H_
00008 #define MECAB_MECAB_H_
00009 
00010 /* C/C++ common data structures  */
00011 
00015 struct mecab_dictionary_info_t {
00020   const char                     *filename;
00021 
00025   const char                     *charset;
00026 
00030   unsigned int                    size;
00031 
00036   int                             type;
00037 
00041   unsigned int                    lsize;
00042 
00046   unsigned int                    rsize;
00047 
00051   unsigned short                  version;
00052 
00056   struct mecab_dictionary_info_t *next;
00057 };
00058 
00062 struct mecab_path_t {
00066   struct mecab_node_t* rnode;
00067 
00071   struct mecab_path_t* rnext;
00072 
00076   struct mecab_node_t* lnode;
00077 
00082   struct mecab_path_t* lnext;
00083 
00087   int                  cost;
00088 
00092   float                prob;
00093 };
00094 
00098 struct mecab_node_t {
00102   struct mecab_node_t  *prev;
00103 
00107   struct mecab_node_t  *next;
00108 
00112   struct mecab_node_t  *enext;
00113 
00117   struct mecab_node_t  *bnext;
00118 
00123   struct mecab_path_t  *rpath;
00124 
00129   struct mecab_path_t  *lpath;
00130 
00136   const char           *surface;
00137 
00141   const char           *feature;
00142 
00146   unsigned int          id;
00147 
00151   unsigned short        length;
00152 
00156   unsigned short        rlength;
00157 
00161   unsigned short        rcAttr;
00162 
00166   unsigned short        lcAttr;
00167 
00171   unsigned short        posid;
00172 
00176   unsigned char         char_type;
00177 
00182   unsigned char         stat;
00183 
00187   unsigned char         isbest;
00188 
00193   float                 alpha;
00194 
00199   float                 beta;
00200 
00205   float                 prob;
00206 
00210   short                 wcost;
00211 
00215   long                  cost;
00216 };
00217 
00221 enum {
00225   MECAB_NOR_NODE = 0,
00229   MECAB_UNK_NODE = 1,
00233   MECAB_BOS_NODE = 2,
00237   MECAB_EOS_NODE = 3,
00238 
00242   MECAB_EON_NODE = 4
00243 };
00244 
00248 enum {
00252   MECAB_SYS_DIC = 0,
00253 
00257   MECAB_USR_DIC = 1,
00258 
00262   MECAB_UNK_DIC = 2
00263 };
00264 
00268 enum {
00272   MECAB_ONE_BEST          = 1,
00276   MECAB_NBEST             = 2,
00280   MECAB_PARTIAL           = 4,
00286   MECAB_MARGINAL_PROB     = 8,
00291   MECAB_ALTERNATIVE       = 16,
00296   MECAB_ALL_MORPHS        = 32,
00297 
00302   MECAB_ALLOCATE_SENTENCE = 64
00303 };
00304 
00305 /* C interface  */
00306 #ifdef __cplusplus
00307 #include <cstdio>
00308 #else
00309 #include <stdio.h>
00310 #endif
00311 
00312 #ifdef __cplusplus
00313 extern "C" {
00314 #endif
00315 
00316 #ifdef _WIN32
00317 #include <windows.h>
00318 #  ifdef DLL_EXPORT
00319 #    define MECAB_DLL_EXTERN  __declspec(dllexport)
00320 #    define MECAB_DLL_CLASS_EXTERN  __declspec(dllexport)
00321 #  else
00322 #    define MECAB_DLL_EXTERN  __declspec(dllimport)
00323 #  endif
00324 #endif
00325 
00326 #ifndef MECAB_DLL_EXTERN
00327 #  define MECAB_DLL_EXTERN extern
00328 #endif
00329 
00330 #ifndef MECAB_DLL_CLASS_EXTERN
00331 #  define MECAB_DLL_CLASS_EXTERN
00332 #endif
00333 
00334   typedef struct mecab_t                 mecab_t;
00335   typedef struct mecab_model_t           mecab_model_t;
00336   typedef struct mecab_lattice_t         mecab_lattice_t;
00337   typedef struct mecab_dictionary_info_t mecab_dictionary_info_t;
00338   typedef struct mecab_node_t            mecab_node_t;
00339   typedef struct mecab_path_t            mecab_path_t;
00340 
00341 #ifndef SWIG
00342   /* C interface */
00343 
00344   /* old mecab interface */
00348   MECAB_DLL_EXTERN mecab_t*      mecab_new(int argc, char **argv);
00349 
00353   MECAB_DLL_EXTERN mecab_t*      mecab_new2(const char *arg);
00354 
00358   MECAB_DLL_EXTERN const char*   mecab_version();
00359 
00363   MECAB_DLL_EXTERN const char*   mecab_strerror(mecab_t *mecab);
00364 
00368   MECAB_DLL_EXTERN void          mecab_destroy(mecab_t *mecab);
00369 
00373   MECAB_DLL_EXTERN int           mecab_get_partial(mecab_t *mecab);
00374 
00378   MECAB_DLL_EXTERN void          mecab_set_partial(mecab_t *mecab, int partial);
00379 
00383   MECAB_DLL_EXTERN float         mecab_get_theta(mecab_t *mecab);
00384 
00388   MECAB_DLL_EXTERN void          mecab_set_theta(mecab_t *mecab, float theta);
00389 
00393   MECAB_DLL_EXTERN int           mecab_get_lattice_level(mecab_t *mecab);
00394 
00398   MECAB_DLL_EXTERN void          mecab_set_lattice_level(mecab_t *mecab, int level);
00399 
00403   MECAB_DLL_EXTERN int           mecab_get_all_morphs(mecab_t *mecab);
00404 
00408   MECAB_DLL_EXTERN void          mecab_set_all_morphs(mecab_t *mecab, int all_morphs);
00409 
00413   MECAB_DLL_EXTERN int           mecab_parse_lattice(mecab_t *mecab, mecab_lattice_t *lattice);
00414 
00418   MECAB_DLL_EXTERN const char*   mecab_sparse_tostr(mecab_t *mecab, const char *str);
00419 
00423   MECAB_DLL_EXTERN const char*   mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len);
00424 
00428   MECAB_DLL_EXTERN char*         mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len,
00429                                                      char *ostr, size_t olen);
00430 
00434   MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*);
00435 
00439   MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t);
00440 
00444   MECAB_DLL_EXTERN const char*   mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str);
00445 
00449   MECAB_DLL_EXTERN const char*   mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N,
00450                                                            const char *str, size_t len);
00451 
00455   MECAB_DLL_EXTERN char*         mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N,
00456                                                            const char *str, size_t len,
00457                                                            char *ostr, size_t olen);
00458 
00462   MECAB_DLL_EXTERN int           mecab_nbest_init(mecab_t *mecab, const char *str);
00463 
00467   MECAB_DLL_EXTERN int           mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len);
00468 
00472   MECAB_DLL_EXTERN const char*   mecab_nbest_next_tostr(mecab_t *mecab);
00473 
00477   MECAB_DLL_EXTERN char*         mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen);
00478 
00482   MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab);
00483 
00487   MECAB_DLL_EXTERN const char*   mecab_format_node(mecab_t *mecab, const mecab_node_t *node);
00488 
00492   MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab);
00493 
00494   /* lattice interface */
00498   MECAB_DLL_EXTERN mecab_lattice_t *mecab_lattice_new();
00499 
00503   MECAB_DLL_EXTERN void             mecab_lattice_destroy(mecab_lattice_t *lattice);
00504 
00508   MECAB_DLL_EXTERN void             mecab_lattice_clear(mecab_lattice_t *lattice);
00509 
00514   MECAB_DLL_EXTERN int              mecab_lattice_is_available(mecab_lattice_t *lattice);
00515 
00519   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_bos_node(mecab_lattice_t *lattice);
00520 
00524   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_eos_node(mecab_lattice_t *lattice);
00525 
00530   MECAB_DLL_EXTERN mecab_node_t   **mecab_lattice_get_all_begin_nodes(mecab_lattice_t *lattice);
00534   MECAB_DLL_EXTERN mecab_node_t   **mecab_lattice_get_all_end_nodes(mecab_lattice_t *lattice);
00535 
00539   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_begin_nodes(mecab_lattice_t *lattice, size_t pos);
00540 
00544   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_end_nodes(mecab_lattice_t *lattice, size_t pos);
00545 
00549   MECAB_DLL_EXTERN const char      *mecab_lattice_get_sentence(mecab_lattice_t *lattice);
00550 
00554   MECAB_DLL_EXTERN void             mecab_lattice_set_sentence(mecab_lattice_t *lattice, const char *sentence);
00555 
00560   MECAB_DLL_EXTERN void             mecab_lattice_set_sentence2(mecab_lattice_t *lattice, const char *sentence, size_t len);
00561 
00565   MECAB_DLL_EXTERN size_t           mecab_lattice_get_size(mecab_lattice_t *lattice);
00566 
00570   MECAB_DLL_EXTERN double           mecab_lattice_get_z(mecab_lattice_t *lattice);
00571 
00575   MECAB_DLL_EXTERN void             mecab_lattice_set_z(mecab_lattice_t *lattice, double Z);
00576 
00580   MECAB_DLL_EXTERN double           mecab_lattice_get_theta(mecab_lattice_t *lattice);
00581 
00586   MECAB_DLL_EXTERN void             mecab_lattice_set_theta(mecab_lattice_t *lattice, double theta);
00587 
00591   MECAB_DLL_EXTERN int              mecab_lattice_next(mecab_lattice_t *lattice);
00592 
00596   MECAB_DLL_EXTERN int              mecab_lattice_get_request_type(mecab_lattice_t *lattice);
00597 
00601   MECAB_DLL_EXTERN int              mecab_lattice_has_request_type(mecab_lattice_t *lattice, int request_type);
00602 
00606   MECAB_DLL_EXTERN void             mecab_lattice_set_request_type(mecab_lattice_t *lattice, int request_type);
00607 
00612   MECAB_DLL_EXTERN void             mecab_lattice_add_request_type(mecab_lattice_t *lattice, int request_type);
00613 
00617   MECAB_DLL_EXTERN void             mecab_lattice_remove_request_type(mecab_lattice_t *lattice, int request_type);
00618 
00622   MECAB_DLL_EXTERN const char      *mecab_lattice_tostr(mecab_lattice_t *lattice);
00623 
00627   MECAB_DLL_EXTERN const char      *mecab_lattice_tostr2(mecab_lattice_t *lattice, char *buf, size_t size);
00628 
00632   MECAB_DLL_EXTERN const char      *mecab_lattice_nbest_tostr(mecab_lattice_t *lattice, size_t N);
00633 
00638   MECAB_DLL_EXTERN const char      *mecab_lattice_nbest_tostr2(mecab_lattice_t *lattice, size_t N, char *buf, size_t size);
00639 
00643   MECAB_DLL_EXTERN const char      *mecab_lattice_strerror(mecab_lattice_t *lattice);
00644 
00645 
00646   /* model interface */
00650   MECAB_DLL_EXTERN mecab_model_t   *mecab_model_new(int argc, char **argv);
00651 
00655   MECAB_DLL_EXTERN mecab_model_t   *mecab_model_new2(const char *arg);
00656 
00661   MECAB_DLL_EXTERN void             mecab_model_destroy(mecab_model_t *model);
00662 
00666   MECAB_DLL_EXTERN mecab_t         *mecab_model_new_tagger(mecab_model_t *model);
00667 
00671   MECAB_DLL_EXTERN mecab_lattice_t *mecab_model_new_lattice(mecab_model_t *model);
00672 
00676   MECAB_DLL_EXTERN bool mecab_model_swap(mecab_model_t *model, mecab_model_t *new_model);
00677 
00681   MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_model_dictionary_info(mecab_model_t *model);
00682 
00683   /* static functions */
00684   MECAB_DLL_EXTERN int           mecab_do(int argc, char **argv);
00685   MECAB_DLL_EXTERN int           mecab_dict_index(int argc, char **argv);
00686   MECAB_DLL_EXTERN int           mecab_dict_gen(int argc, char **argv);
00687   MECAB_DLL_EXTERN int           mecab_cost_train(int argc, char **argv);
00688   MECAB_DLL_EXTERN int           mecab_system_eval(int argc, char **argv);
00689   MECAB_DLL_EXTERN int           mecab_test_gen(int argc, char **argv);
00690 #endif
00691 
00692 #ifdef __cplusplus
00693 }
00694 #endif
00695 
00696 /* C++ interface */
00697 #ifdef __cplusplus
00698 
00699 namespace MeCab {
00700 typedef struct mecab_dictionary_info_t DictionaryInfo;
00701 typedef struct mecab_path_t            Path;
00702 typedef struct mecab_node_t            Node;
00703 
00704 template <typename N, typename P> class Allocator;
00705 class Tagger;
00706 
00710 class MECAB_DLL_CLASS_EXTERN Lattice {
00711 public:
00715   virtual void clear()              = 0;
00716 
00721   virtual bool is_available() const = 0;
00722 
00728   virtual Node *bos_node() const              = 0;
00729 
00734   virtual Node *eos_node() const              = 0;
00735 
00736 #ifndef SWIG
00737 
00740   virtual Node **begin_nodes() const          = 0;
00741 
00745   virtual Node **end_nodes() const            = 0;
00746 #endif
00747 
00754   virtual Node *end_nodes(size_t pos) const   = 0;
00755 
00762   virtual Node *begin_nodes(size_t pos) const = 0;
00763 
00769   virtual const char *sentence() const = 0;
00770 
00775   virtual void set_sentence(const char *sentence)             = 0;
00776 
00777 #ifndef SWIG
00778 
00783   virtual void set_sentence(const char *sentence, size_t len) = 0;
00784 #endif
00785 
00790   virtual size_t size() const                                 = 0;
00791 
00796   virtual void   set_Z(double Z) = 0;
00797 
00802   virtual double Z() const = 0;
00803 
00808   virtual void  set_theta(float theta) = 0;
00809 
00814   virtual float theta() const          = 0;
00815 
00822   virtual bool next() = 0;
00823 
00828   virtual int request_type() const                = 0;
00829 
00834   virtual bool has_request_type(int request_type) const = 0;
00835 
00840   virtual void set_request_type(int request_type) = 0;
00841 
00846   virtual void add_request_type(int request_type) = 0;
00847 
00852   virtual void remove_request_type(int request_type) = 0;
00853 
00854 #ifndef SWIG
00855 
00858   virtual Allocator<Node, Path> *allocator() const = 0;
00859 #endif
00860 
00867   virtual const char *toString()                = 0;
00868 
00876   virtual const char *toString(const Node *node) = 0;
00877 
00885   virtual const char *enumNBestAsString(size_t N) = 0;
00886 
00887 #ifndef SWIG
00888 
00895   virtual const char *toString(char *buf, size_t size) = 0;
00896 
00905   virtual const char *toString(const Node *node,
00906                                char *buf, size_t size) = 0;
00907 
00916   virtual const char *enumNBestAsString(size_t N, char *buf, size_t size) = 0;
00917 #endif
00918 
00923   virtual const char *what() const            = 0;
00924 
00929   virtual void set_what(const char *str)        = 0;
00930 
00931 #ifndef SWIG
00932 
00936   static Lattice *create();
00937 #endif
00938 
00939   virtual ~Lattice() {}
00940 };
00941 
00945 class MECAB_DLL_CLASS_EXTERN Model {
00946 public:
00951   virtual const DictionaryInfo *dictionary_info() const = 0;
00952 
00959   virtual Tagger  *createTagger() const = 0;
00960 
00965   virtual Lattice *createLattice() const = 0;
00966 
00978   virtual bool swap(Model *model) = 0;
00979 
00984   static const char *version();
00985 
00986   virtual ~Model() {}
00987 
00988 #ifndef SIWG
00989 
00997   static Model* create(int argc, char **argv);
00998 
01007   static Model* create(const char *arg);
01008 #endif
01009 };
01010 
01014 class MECAB_DLL_CLASS_EXTERN Tagger {
01015 public:
01028   static bool  parse(const Model &model, Lattice *lattice);
01029 
01039   virtual bool parse(Lattice *lattice) const                = 0;
01040 
01049   virtual const char* parse(const char *str)                = 0;
01050 
01060   virtual const Node* parseToNode(const char *str)          = 0;
01061 
01072   virtual const char* parseNBest(size_t N, const char *str) = 0;
01073 
01083   virtual bool  parseNBestInit(const char *str)             = 0;
01084 
01092   virtual const Node* nextNode()                            = 0;
01093 
01101   virtual const char* next()                                = 0;
01102 
01113   virtual const char* formatNode(const Node *node)          = 0;
01114 
01115 #ifndef SWIG
01116 
01126   virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0;
01127 
01134   virtual const char* parse(const char *str, size_t len)                          = 0;
01135 
01142   virtual const Node* parseToNode(const char *str, size_t len)                    = 0;
01143 
01151   virtual const char* parseNBest(size_t N, const char *str, size_t len)           = 0;
01152 
01160   virtual bool  parseNBestInit(const char *str, size_t len)                  = 0;
01161 
01169   virtual const char* next(char *ostr , size_t olen)                        = 0;
01170 
01181   virtual const char* parseNBest(size_t N, const char *str,
01182                                  size_t len, char *ostr, size_t olen)       = 0;
01183 
01192   virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0;
01193 #endif
01194 
01200   virtual void set_request_type(int request_type) = 0;
01201 
01207   virtual int  request_type() const = 0;
01208 
01214   virtual bool  partial() const                             = 0;
01215 
01221   virtual void  set_partial(bool partial)                   = 0;
01222 
01228   virtual int   lattice_level() const                       = 0;
01229 
01235   virtual void  set_lattice_level(int level)                = 0;
01236 
01242   virtual bool  all_morphs() const                          = 0;
01243 
01249   virtual void  set_all_morphs(bool all_morphs)             = 0;
01250 
01255   virtual void  set_theta(float theta)                      = 0;
01256 
01261   virtual float theta() const                               = 0;
01262 
01267   virtual const DictionaryInfo* dictionary_info() const = 0;
01268 
01273   virtual const char* what() const = 0;
01274 
01275   virtual ~Tagger() {}
01276 
01277 #ifndef SIWG
01278 
01286   static Tagger *create(int argc, char **argv);
01287 
01296   static Tagger *create(const char *arg);
01297 #endif
01298 
01303   static const char *version();
01304 };
01305 
01306 #ifndef SWIG
01307 
01310 MECAB_DLL_EXTERN Lattice     *createLattice();
01311 
01315 MECAB_DLL_EXTERN Model       *createModel(int argc, char **argv);
01316 
01320 MECAB_DLL_EXTERN Model       *createModel(const char *arg);
01321 
01325 MECAB_DLL_EXTERN Tagger      *createTagger(int argc, char **argv);
01326 
01330 MECAB_DLL_EXTERN Tagger      *createTagger(const char *arg);
01331 
01338 MECAB_DLL_EXTERN void        deleteLattice(Lattice *lattice);
01339 
01340 
01347 MECAB_DLL_EXTERN void        deleteModel(Model *model);
01348 
01355 MECAB_DLL_EXTERN void        deleteTagger(Tagger *tagger);
01356 
01361 MECAB_DLL_EXTERN const char*  getLastError();
01362 
01368 MECAB_DLL_EXTERN const char*  getTaggerError();
01369 #endif
01370 }
01371 #endif
01372 #endif  /* MECAB_MECAB_H_ */