mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-29 09:23:12 +03:00
Merge d19e2bbf21
into acb44f8e73
This commit is contained in:
commit
b7ed9c7dba
|
@ -1,7 +1,7 @@
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = [
|
requires = [
|
||||||
"setuptools",
|
"setuptools",
|
||||||
"cython>=0.25,<3.0",
|
"cython>=0.25",
|
||||||
"cymem>=2.0.2,<2.1.0",
|
"cymem>=2.0.2,<2.1.0",
|
||||||
"preshed>=3.0.2,<3.1.0",
|
"preshed>=3.0.2,<3.1.0",
|
||||||
"murmurhash>=0.28.0,<1.1.0",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
|
|
|
@ -24,7 +24,7 @@ setuptools
|
||||||
packaging>=20.0
|
packaging>=20.0
|
||||||
# Development dependencies
|
# Development dependencies
|
||||||
pre-commit>=2.13.0
|
pre-commit>=2.13.0
|
||||||
cython>=0.25,<3.0
|
cython>=0.25
|
||||||
pytest>=5.2.0,!=7.1.0
|
pytest>=5.2.0,!=7.1.0
|
||||||
pytest-timeout>=1.3.0,<2.0.0
|
pytest-timeout>=1.3.0,<2.0.0
|
||||||
mock>=2.0.0,<3.0.0
|
mock>=2.0.0,<3.0.0
|
||||||
|
|
|
@ -12,7 +12,7 @@ cdef cppclass Feature:
|
||||||
hash_t field
|
hash_t field
|
||||||
hash_t value
|
hash_t value
|
||||||
|
|
||||||
__init__():
|
inline __init__():
|
||||||
this.field = 0
|
this.field = 0
|
||||||
this.value = 0
|
this.value = 0
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ cdef cppclass MorphAnalysisC:
|
||||||
hash_t key
|
hash_t key
|
||||||
vector[Feature] features
|
vector[Feature] features
|
||||||
|
|
||||||
__init__():
|
inline __init__():
|
||||||
this.key = 0
|
this.key = 0
|
||||||
|
|
||||||
cdef class Morphology:
|
cdef class Morphology:
|
||||||
|
|
|
@ -36,7 +36,7 @@ cdef class Morphology:
|
||||||
|
|
||||||
cdef shared_ptr[MorphAnalysisC] _lookup_tag(self, hash_t tag_hash):
|
cdef shared_ptr[MorphAnalysisC] _lookup_tag(self, hash_t tag_hash):
|
||||||
match = self.tags.find(tag_hash)
|
match = self.tags.find(tag_hash)
|
||||||
if match != self.tags.const_end():
|
if match != self.tags.end():
|
||||||
return deref(match).second
|
return deref(match).second
|
||||||
else:
|
else:
|
||||||
return shared_ptr[MorphAnalysisC]()
|
return shared_ptr[MorphAnalysisC]()
|
||||||
|
|
|
@ -41,7 +41,7 @@ cdef cppclass StateC:
|
||||||
int offset
|
int offset
|
||||||
int _b_i
|
int _b_i
|
||||||
|
|
||||||
__init__(const TokenC* sent, int length) nogil:
|
inline __init__(const TokenC* sent, int length) nogil:
|
||||||
this._sent = sent
|
this._sent = sent
|
||||||
this._heads = <int*>calloc(length, sizeof(int))
|
this._heads = <int*>calloc(length, sizeof(int))
|
||||||
if not (this._sent and this._heads):
|
if not (this._sent and this._heads):
|
||||||
|
@ -57,10 +57,10 @@ cdef cppclass StateC:
|
||||||
memset(&this._empty_token, 0, sizeof(TokenC))
|
memset(&this._empty_token, 0, sizeof(TokenC))
|
||||||
this._empty_token.lex = &EMPTY_LEXEME
|
this._empty_token.lex = &EMPTY_LEXEME
|
||||||
|
|
||||||
__dealloc__():
|
inline __dealloc__():
|
||||||
free(this._heads)
|
free(this._heads)
|
||||||
|
|
||||||
void set_context_tokens(int* ids, int n) nogil:
|
inline void set_context_tokens(int* ids, int n) nogil:
|
||||||
cdef int i, j
|
cdef int i, j
|
||||||
if n == 1:
|
if n == 1:
|
||||||
if this.B(0) >= 0:
|
if this.B(0) >= 0:
|
||||||
|
@ -131,14 +131,14 @@ cdef cppclass StateC:
|
||||||
else:
|
else:
|
||||||
ids[i] = -1
|
ids[i] = -1
|
||||||
|
|
||||||
int S(int i) nogil const:
|
inline int S(int i) nogil const:
|
||||||
if i >= this._stack.size():
|
if i >= this._stack.size():
|
||||||
return -1
|
return -1
|
||||||
elif i < 0:
|
elif i < 0:
|
||||||
return -1
|
return -1
|
||||||
return this._stack.at(this._stack.size() - (i+1))
|
return this._stack.at(this._stack.size() - (i+1))
|
||||||
|
|
||||||
int B(int i) nogil const:
|
inline int B(int i) nogil const:
|
||||||
if i < 0:
|
if i < 0:
|
||||||
return -1
|
return -1
|
||||||
elif i < this._rebuffer.size():
|
elif i < this._rebuffer.size():
|
||||||
|
@ -150,19 +150,19 @@ cdef cppclass StateC:
|
||||||
else:
|
else:
|
||||||
return b_i
|
return b_i
|
||||||
|
|
||||||
const TokenC* B_(int i) nogil const:
|
inline const TokenC* B_(int i) nogil const:
|
||||||
return this.safe_get(this.B(i))
|
return this.safe_get(this.B(i))
|
||||||
|
|
||||||
const TokenC* E_(int i) nogil const:
|
inline const TokenC* E_(int i) nogil const:
|
||||||
return this.safe_get(this.E(i))
|
return this.safe_get(this.E(i))
|
||||||
|
|
||||||
const TokenC* safe_get(int i) nogil const:
|
inline const TokenC* safe_get(int i) nogil const:
|
||||||
if i < 0 or i >= this.length:
|
if i < 0 or i >= this.length:
|
||||||
return &this._empty_token
|
return &this._empty_token
|
||||||
else:
|
else:
|
||||||
return &this._sent[i]
|
return &this._sent[i]
|
||||||
|
|
||||||
void map_get_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, vector[ArcC]* out) nogil const:
|
inline void map_get_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, vector[ArcC]* out) nogil const:
|
||||||
cdef const vector[ArcC]* arcs
|
cdef const vector[ArcC]* arcs
|
||||||
head_arcs_it = heads_arcs.const_begin()
|
head_arcs_it = heads_arcs.const_begin()
|
||||||
while head_arcs_it != heads_arcs.const_end():
|
while head_arcs_it != heads_arcs.const_end():
|
||||||
|
@ -175,23 +175,23 @@ cdef cppclass StateC:
|
||||||
incr(arcs_it)
|
incr(arcs_it)
|
||||||
incr(head_arcs_it)
|
incr(head_arcs_it)
|
||||||
|
|
||||||
void get_arcs(vector[ArcC]* out) nogil const:
|
inline void get_arcs(vector[ArcC]* out) nogil const:
|
||||||
this.map_get_arcs(this._left_arcs, out)
|
this.map_get_arcs(this._left_arcs, out)
|
||||||
this.map_get_arcs(this._right_arcs, out)
|
this.map_get_arcs(this._right_arcs, out)
|
||||||
|
|
||||||
int H(int child) nogil const:
|
inline int H(int child) nogil const:
|
||||||
if child >= this.length or child < 0:
|
if child >= this.length or child < 0:
|
||||||
return -1
|
return -1
|
||||||
else:
|
else:
|
||||||
return this._heads[child]
|
return this._heads[child]
|
||||||
|
|
||||||
int E(int i) nogil const:
|
inline int E(int i) nogil const:
|
||||||
if this._ents.size() == 0:
|
if this._ents.size() == 0:
|
||||||
return -1
|
return -1
|
||||||
else:
|
else:
|
||||||
return this._ents.back().start
|
return this._ents.back().start
|
||||||
|
|
||||||
int nth_child(const unordered_map[int, vector[ArcC]]& heads_arcs, int head, int idx) nogil const:
|
inline int nth_child(const unordered_map[int, vector[ArcC]]& heads_arcs, int head, int idx) nogil const:
|
||||||
if idx < 1:
|
if idx < 1:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
@ -215,22 +215,22 @@ cdef cppclass StateC:
|
||||||
|
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
int L(int head, int idx) nogil const:
|
inline int L(int head, int idx) nogil const:
|
||||||
return this.nth_child(this._left_arcs, head, idx)
|
return this.nth_child(this._left_arcs, head, idx)
|
||||||
|
|
||||||
int R(int head, int idx) nogil const:
|
inline int R(int head, int idx) nogil const:
|
||||||
return this.nth_child(this._right_arcs, head, idx)
|
return this.nth_child(this._right_arcs, head, idx)
|
||||||
|
|
||||||
bint empty() nogil const:
|
inline bint empty() nogil const:
|
||||||
return this._stack.size() == 0
|
return this._stack.size() == 0
|
||||||
|
|
||||||
bint eol() nogil const:
|
inline bint eol() nogil const:
|
||||||
return this.buffer_length() == 0
|
return this.buffer_length() == 0
|
||||||
|
|
||||||
bint is_final() nogil const:
|
inline bint is_final() nogil const:
|
||||||
return this.stack_depth() <= 0 and this.eol()
|
return this.stack_depth() <= 0 and this.eol()
|
||||||
|
|
||||||
int cannot_sent_start(int word) nogil const:
|
inline int cannot_sent_start(int word) nogil const:
|
||||||
if word < 0 or word >= this.length:
|
if word < 0 or word >= this.length:
|
||||||
return 0
|
return 0
|
||||||
elif this._sent[word].sent_start == -1:
|
elif this._sent[word].sent_start == -1:
|
||||||
|
@ -238,7 +238,7 @@ cdef cppclass StateC:
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
int is_sent_start(int word) nogil const:
|
inline int is_sent_start(int word) nogil const:
|
||||||
if word < 0 or word >= this.length:
|
if word < 0 or word >= this.length:
|
||||||
return 0
|
return 0
|
||||||
elif this._sent[word].sent_start == 1:
|
elif this._sent[word].sent_start == 1:
|
||||||
|
@ -248,20 +248,20 @@ cdef cppclass StateC:
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
void set_sent_start(int word, int value) nogil:
|
inline void set_sent_start(int word, int value) nogil:
|
||||||
if value >= 1:
|
if value >= 1:
|
||||||
this._sent_starts.insert(word)
|
this._sent_starts.insert(word)
|
||||||
|
|
||||||
bint has_head(int child) nogil const:
|
inline bint has_head(int child) nogil const:
|
||||||
return this._heads[child] >= 0
|
return this._heads[child] >= 0
|
||||||
|
|
||||||
int l_edge(int word) nogil const:
|
inline int l_edge(int word) nogil const:
|
||||||
return word
|
return word
|
||||||
|
|
||||||
int r_edge(int word) nogil const:
|
inline int r_edge(int word) nogil const:
|
||||||
return word
|
return word
|
||||||
|
|
||||||
int n_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, int head) nogil const:
|
inline int n_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, int head) nogil const:
|
||||||
cdef int n = 0
|
cdef int n = 0
|
||||||
head_arcs_it = heads_arcs.const_find(head)
|
head_arcs_it = heads_arcs.const_find(head)
|
||||||
if head_arcs_it == heads_arcs.const_end():
|
if head_arcs_it == heads_arcs.const_end():
|
||||||
|
@ -277,28 +277,28 @@ cdef cppclass StateC:
|
||||||
|
|
||||||
return n
|
return n
|
||||||
|
|
||||||
int n_L(int head) nogil const:
|
inline int n_L(int head) nogil const:
|
||||||
return n_arcs(this._left_arcs, head)
|
return n_arcs(this._left_arcs, head)
|
||||||
|
|
||||||
int n_R(int head) nogil const:
|
inline int n_R(int head) nogil const:
|
||||||
return n_arcs(this._right_arcs, head)
|
return n_arcs(this._right_arcs, head)
|
||||||
|
|
||||||
bint stack_is_connected() nogil const:
|
inline bint stack_is_connected() nogil const:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
bint entity_is_open() nogil const:
|
inline bint entity_is_open() nogil const:
|
||||||
if this._ents.size() == 0:
|
if this._ents.size() == 0:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return this._ents.back().end == -1
|
return this._ents.back().end == -1
|
||||||
|
|
||||||
int stack_depth() nogil const:
|
inline int stack_depth() nogil const:
|
||||||
return this._stack.size()
|
return this._stack.size()
|
||||||
|
|
||||||
int buffer_length() nogil const:
|
inline int buffer_length() nogil const:
|
||||||
return (this.length - this._b_i) + this._rebuffer.size()
|
return (this.length - this._b_i) + this._rebuffer.size()
|
||||||
|
|
||||||
void push() nogil:
|
inline void push() nogil:
|
||||||
b0 = this.B(0)
|
b0 = this.B(0)
|
||||||
if this._rebuffer.size():
|
if this._rebuffer.size():
|
||||||
b0 = this._rebuffer.back()
|
b0 = this._rebuffer.back()
|
||||||
|
@ -308,32 +308,32 @@ cdef cppclass StateC:
|
||||||
this._b_i += 1
|
this._b_i += 1
|
||||||
this._stack.push_back(b0)
|
this._stack.push_back(b0)
|
||||||
|
|
||||||
void pop() nogil:
|
inline void pop() nogil:
|
||||||
this._stack.pop_back()
|
this._stack.pop_back()
|
||||||
|
|
||||||
void force_final() nogil:
|
inline void force_final() nogil:
|
||||||
# This should only be used in desperate situations, as it may leave
|
# This should only be used in desperate situations, as it may leave
|
||||||
# the analysis in an unexpected state.
|
# the analysis in an unexpected state.
|
||||||
this._stack.clear()
|
this._stack.clear()
|
||||||
this._b_i = this.length
|
this._b_i = this.length
|
||||||
|
|
||||||
void unshift() nogil:
|
inline void unshift() nogil:
|
||||||
s0 = this._stack.back()
|
s0 = this._stack.back()
|
||||||
this._unshiftable[s0] = 1
|
this._unshiftable[s0] = 1
|
||||||
this._rebuffer.push_back(s0)
|
this._rebuffer.push_back(s0)
|
||||||
this._stack.pop_back()
|
this._stack.pop_back()
|
||||||
|
|
||||||
int is_unshiftable(int item) nogil const:
|
inline int is_unshiftable(int item) nogil const:
|
||||||
if item >= this._unshiftable.size():
|
if item >= this._unshiftable.size():
|
||||||
return 0
|
return 0
|
||||||
else:
|
else:
|
||||||
return this._unshiftable.at(item)
|
return this._unshiftable.at(item)
|
||||||
|
|
||||||
void set_reshiftable(int item) nogil:
|
inline void set_reshiftable(int item) nogil:
|
||||||
if item < this._unshiftable.size():
|
if item < this._unshiftable.size():
|
||||||
this._unshiftable[item] = 0
|
this._unshiftable[item] = 0
|
||||||
|
|
||||||
void add_arc(int head, int child, attr_t label) nogil:
|
inline void add_arc(int head, int child, attr_t label) nogil:
|
||||||
if this.has_head(child):
|
if this.has_head(child):
|
||||||
this.del_arc(this.H(child), child)
|
this.del_arc(this.H(child), child)
|
||||||
cdef ArcC arc
|
cdef ArcC arc
|
||||||
|
@ -346,7 +346,7 @@ cdef cppclass StateC:
|
||||||
this._right_arcs[arc.head].push_back(arc)
|
this._right_arcs[arc.head].push_back(arc)
|
||||||
this._heads[child] = head
|
this._heads[child] = head
|
||||||
|
|
||||||
void map_del_arc(unordered_map[int, vector[ArcC]]* heads_arcs, int h_i, int c_i) nogil:
|
inline void map_del_arc(unordered_map[int, vector[ArcC]]* heads_arcs, int h_i, int c_i) nogil:
|
||||||
arcs_it = heads_arcs.find(h_i)
|
arcs_it = heads_arcs.find(h_i)
|
||||||
if arcs_it == heads_arcs.end():
|
if arcs_it == heads_arcs.end():
|
||||||
return
|
return
|
||||||
|
@ -367,13 +367,13 @@ cdef cppclass StateC:
|
||||||
arc.label = 0
|
arc.label = 0
|
||||||
break
|
break
|
||||||
|
|
||||||
void del_arc(int h_i, int c_i) nogil:
|
inline void del_arc(int h_i, int c_i) nogil:
|
||||||
if h_i > c_i:
|
if h_i > c_i:
|
||||||
this.map_del_arc(&this._left_arcs, h_i, c_i)
|
this.map_del_arc(&this._left_arcs, h_i, c_i)
|
||||||
else:
|
else:
|
||||||
this.map_del_arc(&this._right_arcs, h_i, c_i)
|
this.map_del_arc(&this._right_arcs, h_i, c_i)
|
||||||
|
|
||||||
SpanC get_ent() nogil const:
|
inline SpanC get_ent() nogil const:
|
||||||
cdef SpanC ent
|
cdef SpanC ent
|
||||||
if this._ents.size() == 0:
|
if this._ents.size() == 0:
|
||||||
ent.start = 0
|
ent.start = 0
|
||||||
|
@ -383,17 +383,17 @@ cdef cppclass StateC:
|
||||||
else:
|
else:
|
||||||
return this._ents.back()
|
return this._ents.back()
|
||||||
|
|
||||||
void open_ent(attr_t label) nogil:
|
inline void open_ent(attr_t label) nogil:
|
||||||
cdef SpanC ent
|
cdef SpanC ent
|
||||||
ent.start = this.B(0)
|
ent.start = this.B(0)
|
||||||
ent.label = label
|
ent.label = label
|
||||||
ent.end = -1
|
ent.end = -1
|
||||||
this._ents.push_back(ent)
|
this._ents.push_back(ent)
|
||||||
|
|
||||||
void close_ent() nogil:
|
inline void close_ent() nogil:
|
||||||
this._ents.back().end = this.B(0)+1
|
this._ents.back().end = this.B(0)+1
|
||||||
|
|
||||||
void clone(const StateC* src) nogil:
|
inline void clone(const StateC* src) nogil:
|
||||||
this.length = src.length
|
this.length = src.length
|
||||||
this._sent = src._sent
|
this._sent = src._sent
|
||||||
this._stack = src._stack
|
this._stack = src._stack
|
||||||
|
|
|
@ -871,11 +871,11 @@ cdef extern from "<algorithm>" namespace "std" nogil:
|
||||||
bint (*)(SpanC, SpanC))
|
bint (*)(SpanC, SpanC))
|
||||||
|
|
||||||
|
|
||||||
cdef bint len_start_cmp(SpanC a, SpanC b) nogil:
|
cdef bint len_start_cmp(SpanC a, SpanC b) noexcept nogil:
|
||||||
if a.end - a.start == b.end - b.start:
|
if a.end - a.start == b.end - b.start:
|
||||||
return b.start < a.start
|
return b.start < a.start
|
||||||
return a.end - a.start < b.end - b.start
|
return a.end - a.start < b.end - b.start
|
||||||
|
|
||||||
|
|
||||||
cdef bint start_cmp(SpanC a, SpanC b) nogil:
|
cdef bint start_cmp(SpanC a, SpanC b) noexcept nogil:
|
||||||
return a.start < b.start
|
return a.start < b.start
|
||||||
|
|
Loading…
Reference in New Issue
Block a user