Fix remaining files. Reenable cython-lint check.

This commit is contained in:
Raphael Mitsch 2023-07-04 09:32:50 +02:00
parent 94110a1c6d
commit 56c1fd00eb
6 changed files with 15 additions and 14 deletions

View File

@ -48,7 +48,7 @@ jobs:
- name: cython-lint
run: |
python -m pip install cython-lint -c requirements.txt
# cython-lint spacy --ignore E501,W291,E266
cython-lint spacy --ignore E501,W291,E266
tests:
name: Test

View File

@ -96,4 +96,4 @@ cdef enum attr_id_t:
ENT_ID = symbols.ENT_ID
IDX
SENT_END
SENT_END

View File

@ -83,10 +83,11 @@ cdef class Morphology:
features = self.normalize_attrs(features)
string_features = {self.strings.as_string(field): self.strings.as_string(values) for field, values in features.items()}
# normalized UFEATS string with sorted fields and values
norm_feats_string = self.FEATURE_SEP.join(sorted([
self.FIELD_SEP.join([field, values])
for field, values in string_features.items()
]))
norm_feats_string = self.FEATURE_SEP.join(
sorted(
[self.FIELD_SEP.join([field, values]) for field, values in string_features.items()]
)
)
return norm_feats_string or self.EMPTY_MORPH
def normalize_attrs(self, attrs):
@ -192,6 +193,7 @@ cdef int get_n_by_field(attr_t* results, const MorphAnalysisC* morph, attr_t fie
n_results += 1
return n_results
def unpickle_morphology(strings, tags):
cdef Morphology morphology = Morphology(strings)
for tag in tags:

View File

@ -2,7 +2,6 @@
cimport cython
from libc.stdint cimport uint32_t
from libc.string cimport memcpy
from libcpp.set cimport set
from murmurhash.mrmr cimport hash32, hash64
import srsly
@ -20,9 +19,10 @@ cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
try:
out_hash[0] = key
return True
except:
except: # no-cython-lint
return False
def get_string_id(key):
"""Get a string ID, handling the reserved symbols correctly. If the key is
already an ID, return it.
@ -87,7 +87,6 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
cdef int n_length_bytes
cdef int i
cdef Utf8Str* string = <Utf8Str*>mem.alloc(1, sizeof(Utf8Str))
cdef uint32_t ulength = length
if length < sizeof(string.s):
string.s[0] = <unsigned char>length
memcpy(&string.s[1], chars, length)

View File

@ -52,7 +52,7 @@ cdef struct TokenC:
int sent_start
int ent_iob
attr_t ent_type # TODO: Is there a better way to do this? Multiple sources of truth..
attr_t ent_type # TODO: Is there a better way to do this? Multiple sources of truth..
attr_t ent_kb_id
hash_t ent_id

View File

@ -96,7 +96,7 @@ IDS = {
"ADV": ADV,
"AUX": AUX,
"CONJ": CONJ,
"CCONJ": CCONJ, # U20
"CCONJ": CCONJ, # U20
"DET": DET,
"INTJ": INTJ,
"NOUN": NOUN,
@ -421,7 +421,7 @@ IDS = {
"ccomp": ccomp,
"complm": complm,
"conj": conj,
"cop": cop, # U20
"cop": cop, # U20
"csubj": csubj,
"csubjpass": csubjpass,
"dep": dep,
@ -444,8 +444,8 @@ IDS = {
"num": num,
"number": number,
"oprd": oprd,
"obj": obj, # U20
"obl": obl, # U20
"obj": obj, # U20
"obl": obl, # U20
"parataxis": parataxis,
"partmod": partmod,
"pcomp": pcomp,