* Link in tagger, to work on integrating POS tagging

This commit is contained in:
Matthew Honnibal 2014-12-07 15:29:41 +11:00
parent 0c7aeb9de7
commit 5caabec789
2 changed files with 4 additions and 0 deletions

View File

@ -8,6 +8,7 @@ from cymem.cymem cimport Pool
from .typedefs cimport hash_t from .typedefs cimport hash_t
from .tokens cimport Tokens, TokenC from .tokens cimport Tokens, TokenC
from .lexeme cimport Lexeme from .lexeme cimport Lexeme
from .tagger cimport Tagger
from .utf8string cimport StringStore, UniStr from .utf8string cimport StringStore, UniStr
@ -29,6 +30,7 @@ cdef class Language:
cdef PreshMap _cache cdef PreshMap _cache
cdef PreshMap _specials cdef PreshMap _specials
cpdef readonly Lexicon lexicon cpdef readonly Lexicon lexicon
cpdef readonly Tagger pos_tagger
cdef object _prefix_re cdef object _prefix_re
cdef object _suffix_re cdef object _suffix_re

View File

@ -39,10 +39,12 @@ cdef class Language:
self._infix_re = re.compile(infix) self._infix_re = re.compile(infix)
self.lexicon = Lexicon(self.set_flags) self.lexicon = Lexicon(self.set_flags)
self._load_special_tokenization(rules) self._load_special_tokenization(rules)
self.pos_tagger = None
def load(self): def load(self):
self.lexicon.load(path.join(util.DATA_DIR, self.name, 'lexemes')) self.lexicon.load(path.join(util.DATA_DIR, self.name, 'lexemes'))
self.lexicon.strings.load(path.join(util.DATA_DIR, self.name, 'strings')) self.lexicon.strings.load(path.join(util.DATA_DIR, self.name, 'strings'))
self.pos_tagger = Tagger(path.join(util.DATA_DIR, self.name, 'pos'))
cpdef Tokens tokens_from_list(self, list strings): cpdef Tokens tokens_from_list(self, list strings):
cdef int length = sum([len(s) for s in strings]) cdef int length = sum([len(s) for s in strings])