mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 21:50:35 +03:00
* Rename Lexicon._dict to Lexicon._map
This commit is contained in:
parent
2ee8a1e61f
commit
8c2938fe01
|
@ -1,5 +1,7 @@
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
|
||||||
|
from cpython cimport Py_UNICODE_ISSPACE, Py_UNICODE_ISALPHA, Py_UNICODE_ISUPPER
|
||||||
|
|
||||||
from preshed.maps cimport PreshMap
|
from preshed.maps cimport PreshMap
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
||||||
|
@ -7,17 +9,9 @@ from .typedefs cimport hash_t
|
||||||
from .tokens cimport Tokens
|
from .tokens cimport Tokens
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
from .tagger cimport Tagger
|
from .tagger cimport Tagger
|
||||||
from .ner.greedy_parser cimport NERParser
|
|
||||||
from .utf8string cimport StringStore
|
from .utf8string cimport StringStore
|
||||||
|
|
||||||
|
|
||||||
cdef extern from "Python.h":
|
|
||||||
cdef bint Py_UNICODE_ISSPACE(Py_UNICODE ch)
|
|
||||||
cdef bint Py_UNICODE_ISALNUM(Py_UNICODE ch)
|
|
||||||
cdef bint Py_UNICODE_ISALPHA(Py_UNICODE ch)
|
|
||||||
cdef bint Py_UNICODE_ISUPPER(Py_UNICODE ch)
|
|
||||||
|
|
||||||
|
|
||||||
cdef struct String:
|
cdef struct String:
|
||||||
Py_UNICODE* chars
|
Py_UNICODE* chars
|
||||||
size_t n
|
size_t n
|
||||||
|
@ -32,7 +26,7 @@ cdef class Lexicon:
|
||||||
|
|
||||||
cdef Lexeme* get(self, String* s) except NULL
|
cdef Lexeme* get(self, String* s) except NULL
|
||||||
|
|
||||||
cdef PreshMap _dict
|
cdef PreshMap _map
|
||||||
|
|
||||||
|
|
||||||
cdef class Language:
|
cdef class Language:
|
||||||
|
@ -42,9 +36,6 @@ cdef class Language:
|
||||||
cdef PreshMap _specials
|
cdef PreshMap _specials
|
||||||
cpdef readonly Lexicon lexicon
|
cpdef readonly Lexicon lexicon
|
||||||
|
|
||||||
cpdef readonly Tagger pos_tagger
|
|
||||||
cpdef readonly NERParser ner_tagger
|
|
||||||
|
|
||||||
cdef object _prefix_re
|
cdef object _prefix_re
|
||||||
cdef object _suffix_re
|
cdef object _suffix_re
|
||||||
cdef object _infix_re
|
cdef object _infix_re
|
||||||
|
|
|
@ -241,7 +241,7 @@ cdef class Lexicon:
|
||||||
'''
|
'''
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._dict = PreshMap(2 ** 20)
|
self._map = PreshMap(2 ** 20)
|
||||||
self.strings = StringStore()
|
self.strings = StringStore()
|
||||||
self.lexemes.push_back(&EMPTY_LEXEME)
|
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||||
self.size = 1
|
self.size = 1
|
||||||
|
@ -249,12 +249,12 @@ cdef class Lexicon:
|
||||||
cdef Lexeme* get(self, String* string) except NULL:
|
cdef Lexeme* get(self, String* string) except NULL:
|
||||||
'''Retrieve a pointer to a Lexeme from the lexicon.'''
|
'''Retrieve a pointer to a Lexeme from the lexicon.'''
|
||||||
cdef Lexeme* lex
|
cdef Lexeme* lex
|
||||||
lex = <Lexeme*>self._dict.get(string.key)
|
lex = <Lexeme*>self._map.get(string.key)
|
||||||
if lex != NULL:
|
if lex != NULL:
|
||||||
return lex
|
return lex
|
||||||
lex = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
|
lex = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
|
||||||
lex[0] = lexeme_init(self.size, string.chars[:string.n], string.key, self.strings, {})
|
lex[0] = lexeme_init(self.size, string.chars[:string.n], string.key, self.strings, {})
|
||||||
self._dict.set(string.key, lex)
|
self._map.set(string.key, lex)
|
||||||
while self.lexemes.size() < (lex.id + 1):
|
while self.lexemes.size() < (lex.id + 1):
|
||||||
self.lexemes.push_back(&EMPTY_LEXEME)
|
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||||
self.lexemes[lex.id] = lex
|
self.lexemes[lex.id] = lex
|
||||||
|
@ -302,11 +302,11 @@ cdef class Lexicon:
|
||||||
assert fp != NULL
|
assert fp != NULL
|
||||||
cdef size_t st
|
cdef size_t st
|
||||||
cdef hash_t key
|
cdef hash_t key
|
||||||
for i in range(self._dict.length):
|
for i in range(self._map.length):
|
||||||
key = self._dict.c_map.cells[i].key
|
key = self._map.c_map.cells[i].key
|
||||||
if key == 0:
|
if key == 0:
|
||||||
continue
|
continue
|
||||||
lexeme = <Lexeme*>self._dict.c_map.cells[i].value
|
lexeme = <Lexeme*>self._map.c_map.cells[i].value
|
||||||
st = fwrite(&key, sizeof(key), 1, fp)
|
st = fwrite(&key, sizeof(key), 1, fp)
|
||||||
assert st == 1
|
assert st == 1
|
||||||
st = fwrite(lexeme, sizeof(Lexeme), 1, fp)
|
st = fwrite(lexeme, sizeof(Lexeme), 1, fp)
|
||||||
|
@ -331,7 +331,7 @@ cdef class Lexicon:
|
||||||
st = fread(lexeme, sizeof(Lexeme), 1, fp)
|
st = fread(lexeme, sizeof(Lexeme), 1, fp)
|
||||||
if st != 1:
|
if st != 1:
|
||||||
break
|
break
|
||||||
self._dict.set(key, lexeme)
|
self._map.set(key, lexeme)
|
||||||
while self.lexemes.size() < (lexeme.id + 1):
|
while self.lexemes.size() < (lexeme.id + 1):
|
||||||
self.lexemes.push_back(&EMPTY_LEXEME)
|
self.lexemes.push_back(&EMPTY_LEXEME)
|
||||||
self.lexemes[lexeme.id] = lexeme
|
self.lexemes[lexeme.id] = lexeme
|
||||||
|
|
Loading…
Reference in New Issue
Block a user