This commit is contained in:
Dāvis 2025-03-06 21:28:37 +00:00 committed by GitHub
commit b7ed9c7dba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 51 additions and 51 deletions

View File

@ -1,7 +1,7 @@
[build-system] [build-system]
requires = [ requires = [
"setuptools", "setuptools",
"cython>=0.25,<3.0", "cython>=0.25",
"cymem>=2.0.2,<2.1.0", "cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0", "preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0", "murmurhash>=0.28.0,<1.1.0",

View File

@ -24,7 +24,7 @@ setuptools
packaging>=20.0 packaging>=20.0
# Development dependencies # Development dependencies
pre-commit>=2.13.0 pre-commit>=2.13.0
cython>=0.25,<3.0 cython>=0.25
pytest>=5.2.0,!=7.1.0 pytest>=5.2.0,!=7.1.0
pytest-timeout>=1.3.0,<2.0.0 pytest-timeout>=1.3.0,<2.0.0
mock>=2.0.0,<3.0.0 mock>=2.0.0,<3.0.0

View File

@ -12,7 +12,7 @@ cdef cppclass Feature:
hash_t field hash_t field
hash_t value hash_t value
__init__(): inline __init__():
this.field = 0 this.field = 0
this.value = 0 this.value = 0
@ -21,7 +21,7 @@ cdef cppclass MorphAnalysisC:
hash_t key hash_t key
vector[Feature] features vector[Feature] features
__init__(): inline __init__():
this.key = 0 this.key = 0
cdef class Morphology: cdef class Morphology:

View File

@ -36,7 +36,7 @@ cdef class Morphology:
cdef shared_ptr[MorphAnalysisC] _lookup_tag(self, hash_t tag_hash): cdef shared_ptr[MorphAnalysisC] _lookup_tag(self, hash_t tag_hash):
match = self.tags.find(tag_hash) match = self.tags.find(tag_hash)
if match != self.tags.const_end(): if match != self.tags.end():
return deref(match).second return deref(match).second
else: else:
return shared_ptr[MorphAnalysisC]() return shared_ptr[MorphAnalysisC]()

View File

@ -41,7 +41,7 @@ cdef cppclass StateC:
int offset int offset
int _b_i int _b_i
__init__(const TokenC* sent, int length) nogil: inline __init__(const TokenC* sent, int length) nogil:
this._sent = sent this._sent = sent
this._heads = <int*>calloc(length, sizeof(int)) this._heads = <int*>calloc(length, sizeof(int))
if not (this._sent and this._heads): if not (this._sent and this._heads):
@ -57,10 +57,10 @@ cdef cppclass StateC:
memset(&this._empty_token, 0, sizeof(TokenC)) memset(&this._empty_token, 0, sizeof(TokenC))
this._empty_token.lex = &EMPTY_LEXEME this._empty_token.lex = &EMPTY_LEXEME
__dealloc__(): inline __dealloc__():
free(this._heads) free(this._heads)
void set_context_tokens(int* ids, int n) nogil: inline void set_context_tokens(int* ids, int n) nogil:
cdef int i, j cdef int i, j
if n == 1: if n == 1:
if this.B(0) >= 0: if this.B(0) >= 0:
@ -131,14 +131,14 @@ cdef cppclass StateC:
else: else:
ids[i] = -1 ids[i] = -1
int S(int i) nogil const: inline int S(int i) nogil const:
if i >= this._stack.size(): if i >= this._stack.size():
return -1 return -1
elif i < 0: elif i < 0:
return -1 return -1
return this._stack.at(this._stack.size() - (i+1)) return this._stack.at(this._stack.size() - (i+1))
int B(int i) nogil const: inline int B(int i) nogil const:
if i < 0: if i < 0:
return -1 return -1
elif i < this._rebuffer.size(): elif i < this._rebuffer.size():
@ -150,19 +150,19 @@ cdef cppclass StateC:
else: else:
return b_i return b_i
const TokenC* B_(int i) nogil const: inline const TokenC* B_(int i) nogil const:
return this.safe_get(this.B(i)) return this.safe_get(this.B(i))
const TokenC* E_(int i) nogil const: inline const TokenC* E_(int i) nogil const:
return this.safe_get(this.E(i)) return this.safe_get(this.E(i))
const TokenC* safe_get(int i) nogil const: inline const TokenC* safe_get(int i) nogil const:
if i < 0 or i >= this.length: if i < 0 or i >= this.length:
return &this._empty_token return &this._empty_token
else: else:
return &this._sent[i] return &this._sent[i]
void map_get_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, vector[ArcC]* out) nogil const: inline void map_get_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, vector[ArcC]* out) nogil const:
cdef const vector[ArcC]* arcs cdef const vector[ArcC]* arcs
head_arcs_it = heads_arcs.const_begin() head_arcs_it = heads_arcs.const_begin()
while head_arcs_it != heads_arcs.const_end(): while head_arcs_it != heads_arcs.const_end():
@ -175,23 +175,23 @@ cdef cppclass StateC:
incr(arcs_it) incr(arcs_it)
incr(head_arcs_it) incr(head_arcs_it)
void get_arcs(vector[ArcC]* out) nogil const: inline void get_arcs(vector[ArcC]* out) nogil const:
this.map_get_arcs(this._left_arcs, out) this.map_get_arcs(this._left_arcs, out)
this.map_get_arcs(this._right_arcs, out) this.map_get_arcs(this._right_arcs, out)
int H(int child) nogil const: inline int H(int child) nogil const:
if child >= this.length or child < 0: if child >= this.length or child < 0:
return -1 return -1
else: else:
return this._heads[child] return this._heads[child]
int E(int i) nogil const: inline int E(int i) nogil const:
if this._ents.size() == 0: if this._ents.size() == 0:
return -1 return -1
else: else:
return this._ents.back().start return this._ents.back().start
int nth_child(const unordered_map[int, vector[ArcC]]& heads_arcs, int head, int idx) nogil const: inline int nth_child(const unordered_map[int, vector[ArcC]]& heads_arcs, int head, int idx) nogil const:
if idx < 1: if idx < 1:
return -1 return -1
@ -215,22 +215,22 @@ cdef cppclass StateC:
return -1 return -1
int L(int head, int idx) nogil const: inline int L(int head, int idx) nogil const:
return this.nth_child(this._left_arcs, head, idx) return this.nth_child(this._left_arcs, head, idx)
int R(int head, int idx) nogil const: inline int R(int head, int idx) nogil const:
return this.nth_child(this._right_arcs, head, idx) return this.nth_child(this._right_arcs, head, idx)
bint empty() nogil const: inline bint empty() nogil const:
return this._stack.size() == 0 return this._stack.size() == 0
bint eol() nogil const: inline bint eol() nogil const:
return this.buffer_length() == 0 return this.buffer_length() == 0
bint is_final() nogil const: inline bint is_final() nogil const:
return this.stack_depth() <= 0 and this.eol() return this.stack_depth() <= 0 and this.eol()
int cannot_sent_start(int word) nogil const: inline int cannot_sent_start(int word) nogil const:
if word < 0 or word >= this.length: if word < 0 or word >= this.length:
return 0 return 0
elif this._sent[word].sent_start == -1: elif this._sent[word].sent_start == -1:
@ -238,7 +238,7 @@ cdef cppclass StateC:
else: else:
return 0 return 0
int is_sent_start(int word) nogil const: inline int is_sent_start(int word) nogil const:
if word < 0 or word >= this.length: if word < 0 or word >= this.length:
return 0 return 0
elif this._sent[word].sent_start == 1: elif this._sent[word].sent_start == 1:
@ -248,20 +248,20 @@ cdef cppclass StateC:
else: else:
return 0 return 0
void set_sent_start(int word, int value) nogil: inline void set_sent_start(int word, int value) nogil:
if value >= 1: if value >= 1:
this._sent_starts.insert(word) this._sent_starts.insert(word)
bint has_head(int child) nogil const: inline bint has_head(int child) nogil const:
return this._heads[child] >= 0 return this._heads[child] >= 0
int l_edge(int word) nogil const: inline int l_edge(int word) nogil const:
return word return word
int r_edge(int word) nogil const: inline int r_edge(int word) nogil const:
return word return word
int n_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, int head) nogil const: inline int n_arcs(const unordered_map[int, vector[ArcC]] &heads_arcs, int head) nogil const:
cdef int n = 0 cdef int n = 0
head_arcs_it = heads_arcs.const_find(head) head_arcs_it = heads_arcs.const_find(head)
if head_arcs_it == heads_arcs.const_end(): if head_arcs_it == heads_arcs.const_end():
@ -277,28 +277,28 @@ cdef cppclass StateC:
return n return n
int n_L(int head) nogil const: inline int n_L(int head) nogil const:
return n_arcs(this._left_arcs, head) return n_arcs(this._left_arcs, head)
int n_R(int head) nogil const: inline int n_R(int head) nogil const:
return n_arcs(this._right_arcs, head) return n_arcs(this._right_arcs, head)
bint stack_is_connected() nogil const: inline bint stack_is_connected() nogil const:
return False return False
bint entity_is_open() nogil const: inline bint entity_is_open() nogil const:
if this._ents.size() == 0: if this._ents.size() == 0:
return False return False
else: else:
return this._ents.back().end == -1 return this._ents.back().end == -1
int stack_depth() nogil const: inline int stack_depth() nogil const:
return this._stack.size() return this._stack.size()
int buffer_length() nogil const: inline int buffer_length() nogil const:
return (this.length - this._b_i) + this._rebuffer.size() return (this.length - this._b_i) + this._rebuffer.size()
void push() nogil: inline void push() nogil:
b0 = this.B(0) b0 = this.B(0)
if this._rebuffer.size(): if this._rebuffer.size():
b0 = this._rebuffer.back() b0 = this._rebuffer.back()
@ -308,32 +308,32 @@ cdef cppclass StateC:
this._b_i += 1 this._b_i += 1
this._stack.push_back(b0) this._stack.push_back(b0)
void pop() nogil: inline void pop() nogil:
this._stack.pop_back() this._stack.pop_back()
void force_final() nogil: inline void force_final() nogil:
# This should only be used in desperate situations, as it may leave # This should only be used in desperate situations, as it may leave
# the analysis in an unexpected state. # the analysis in an unexpected state.
this._stack.clear() this._stack.clear()
this._b_i = this.length this._b_i = this.length
void unshift() nogil: inline void unshift() nogil:
s0 = this._stack.back() s0 = this._stack.back()
this._unshiftable[s0] = 1 this._unshiftable[s0] = 1
this._rebuffer.push_back(s0) this._rebuffer.push_back(s0)
this._stack.pop_back() this._stack.pop_back()
int is_unshiftable(int item) nogil const: inline int is_unshiftable(int item) nogil const:
if item >= this._unshiftable.size(): if item >= this._unshiftable.size():
return 0 return 0
else: else:
return this._unshiftable.at(item) return this._unshiftable.at(item)
void set_reshiftable(int item) nogil: inline void set_reshiftable(int item) nogil:
if item < this._unshiftable.size(): if item < this._unshiftable.size():
this._unshiftable[item] = 0 this._unshiftable[item] = 0
void add_arc(int head, int child, attr_t label) nogil: inline void add_arc(int head, int child, attr_t label) nogil:
if this.has_head(child): if this.has_head(child):
this.del_arc(this.H(child), child) this.del_arc(this.H(child), child)
cdef ArcC arc cdef ArcC arc
@ -346,7 +346,7 @@ cdef cppclass StateC:
this._right_arcs[arc.head].push_back(arc) this._right_arcs[arc.head].push_back(arc)
this._heads[child] = head this._heads[child] = head
void map_del_arc(unordered_map[int, vector[ArcC]]* heads_arcs, int h_i, int c_i) nogil: inline void map_del_arc(unordered_map[int, vector[ArcC]]* heads_arcs, int h_i, int c_i) nogil:
arcs_it = heads_arcs.find(h_i) arcs_it = heads_arcs.find(h_i)
if arcs_it == heads_arcs.end(): if arcs_it == heads_arcs.end():
return return
@ -367,13 +367,13 @@ cdef cppclass StateC:
arc.label = 0 arc.label = 0
break break
void del_arc(int h_i, int c_i) nogil: inline void del_arc(int h_i, int c_i) nogil:
if h_i > c_i: if h_i > c_i:
this.map_del_arc(&this._left_arcs, h_i, c_i) this.map_del_arc(&this._left_arcs, h_i, c_i)
else: else:
this.map_del_arc(&this._right_arcs, h_i, c_i) this.map_del_arc(&this._right_arcs, h_i, c_i)
SpanC get_ent() nogil const: inline SpanC get_ent() nogil const:
cdef SpanC ent cdef SpanC ent
if this._ents.size() == 0: if this._ents.size() == 0:
ent.start = 0 ent.start = 0
@ -383,17 +383,17 @@ cdef cppclass StateC:
else: else:
return this._ents.back() return this._ents.back()
void open_ent(attr_t label) nogil: inline void open_ent(attr_t label) nogil:
cdef SpanC ent cdef SpanC ent
ent.start = this.B(0) ent.start = this.B(0)
ent.label = label ent.label = label
ent.end = -1 ent.end = -1
this._ents.push_back(ent) this._ents.push_back(ent)
void close_ent() nogil: inline void close_ent() nogil:
this._ents.back().end = this.B(0)+1 this._ents.back().end = this.B(0)+1
void clone(const StateC* src) nogil: inline void clone(const StateC* src) nogil:
this.length = src.length this.length = src.length
this._sent = src._sent this._sent = src._sent
this._stack = src._stack this._stack = src._stack

View File

@ -871,11 +871,11 @@ cdef extern from "<algorithm>" namespace "std" nogil:
bint (*)(SpanC, SpanC)) bint (*)(SpanC, SpanC))
cdef bint len_start_cmp(SpanC a, SpanC b) nogil: cdef bint len_start_cmp(SpanC a, SpanC b) noexcept nogil:
if a.end - a.start == b.end - b.start: if a.end - a.start == b.end - b.start:
return b.start < a.start return b.start < a.start
return a.end - a.start < b.end - b.start return a.end - a.start < b.end - b.start
cdef bint start_cmp(SpanC a, SpanC b) nogil: cdef bint start_cmp(SpanC a, SpanC b) noexcept nogil:
return a.start < b.start return a.start < b.start