mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-21 17:41:59 +03:00
Fix remaining files. Reenable cython-lint check.
This commit is contained in:
parent
94110a1c6d
commit
56c1fd00eb
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
@ -48,7 +48,7 @@ jobs:
|
|||
- name: cython-lint
|
||||
run: |
|
||||
python -m pip install cython-lint -c requirements.txt
|
||||
# cython-lint spacy --ignore E501,W291,E266
|
||||
cython-lint spacy --ignore E501,W291,E266
|
||||
|
||||
tests:
|
||||
name: Test
|
||||
|
|
|
@ -96,4 +96,4 @@ cdef enum attr_id_t:
|
|||
ENT_ID = symbols.ENT_ID
|
||||
|
||||
IDX
|
||||
SENT_END
|
||||
SENT_END
|
||||
|
|
|
@ -83,10 +83,11 @@ cdef class Morphology:
|
|||
features = self.normalize_attrs(features)
|
||||
string_features = {self.strings.as_string(field): self.strings.as_string(values) for field, values in features.items()}
|
||||
# normalized UFEATS string with sorted fields and values
|
||||
norm_feats_string = self.FEATURE_SEP.join(sorted([
|
||||
self.FIELD_SEP.join([field, values])
|
||||
for field, values in string_features.items()
|
||||
]))
|
||||
norm_feats_string = self.FEATURE_SEP.join(
|
||||
sorted(
|
||||
[self.FIELD_SEP.join([field, values]) for field, values in string_features.items()]
|
||||
)
|
||||
)
|
||||
return norm_feats_string or self.EMPTY_MORPH
|
||||
|
||||
def normalize_attrs(self, attrs):
|
||||
|
@ -192,6 +193,7 @@ cdef int get_n_by_field(attr_t* results, const MorphAnalysisC* morph, attr_t fie
|
|||
n_results += 1
|
||||
return n_results
|
||||
|
||||
|
||||
def unpickle_morphology(strings, tags):
|
||||
cdef Morphology morphology = Morphology(strings)
|
||||
for tag in tags:
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
cimport cython
|
||||
from libc.stdint cimport uint32_t
|
||||
from libc.string cimport memcpy
|
||||
from libcpp.set cimport set
|
||||
from murmurhash.mrmr cimport hash32, hash64
|
||||
|
||||
import srsly
|
||||
|
@ -20,9 +19,10 @@ cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
|
|||
try:
|
||||
out_hash[0] = key
|
||||
return True
|
||||
except:
|
||||
except: # no-cython-lint
|
||||
return False
|
||||
|
||||
|
||||
def get_string_id(key):
|
||||
"""Get a string ID, handling the reserved symbols correctly. If the key is
|
||||
already an ID, return it.
|
||||
|
@ -87,7 +87,6 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
|
|||
cdef int n_length_bytes
|
||||
cdef int i
|
||||
cdef Utf8Str* string = <Utf8Str*>mem.alloc(1, sizeof(Utf8Str))
|
||||
cdef uint32_t ulength = length
|
||||
if length < sizeof(string.s):
|
||||
string.s[0] = <unsigned char>length
|
||||
memcpy(&string.s[1], chars, length)
|
||||
|
|
|
@ -52,7 +52,7 @@ cdef struct TokenC:
|
|||
|
||||
int sent_start
|
||||
int ent_iob
|
||||
attr_t ent_type # TODO: Is there a better way to do this? Multiple sources of truth..
|
||||
attr_t ent_type # TODO: Is there a better way to do this? Multiple sources of truth..
|
||||
attr_t ent_kb_id
|
||||
hash_t ent_id
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ IDS = {
|
|||
"ADV": ADV,
|
||||
"AUX": AUX,
|
||||
"CONJ": CONJ,
|
||||
"CCONJ": CCONJ, # U20
|
||||
"CCONJ": CCONJ, # U20
|
||||
"DET": DET,
|
||||
"INTJ": INTJ,
|
||||
"NOUN": NOUN,
|
||||
|
@ -421,7 +421,7 @@ IDS = {
|
|||
"ccomp": ccomp,
|
||||
"complm": complm,
|
||||
"conj": conj,
|
||||
"cop": cop, # U20
|
||||
"cop": cop, # U20
|
||||
"csubj": csubj,
|
||||
"csubjpass": csubjpass,
|
||||
"dep": dep,
|
||||
|
@ -444,8 +444,8 @@ IDS = {
|
|||
"num": num,
|
||||
"number": number,
|
||||
"oprd": oprd,
|
||||
"obj": obj, # U20
|
||||
"obl": obl, # U20
|
||||
"obj": obj, # U20
|
||||
"obl": obl, # U20
|
||||
"parataxis": parataxis,
|
||||
"partmod": partmod,
|
||||
"pcomp": pcomp,
|
||||
|
|
Loading…
Reference in New Issue
Block a user