diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9f4618102..52069bff8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -47,7 +47,7 @@ jobs: python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics - name: cython-lint run: | - python -m pip install cython-lint -c requirements.txt + python -m pip install cython-lint -c requirements.txt --ignore E501,W291 cython-lint spacy tests: diff --git a/spacy/training/align.pyx b/spacy/training/align.pyx index 8bd43b048..79fec73c4 100644 --- a/spacy/training/align.pyx +++ b/spacy/training/align.pyx @@ -37,10 +37,14 @@ def get_alignments(A: List[str], B: List[str]) -> Tuple[List[List[int]], List[Li b2a.append(set()) # Process the alignment at the current position if A[token_idx_a] == B[token_idx_b] and \ - (char_idx_a == 0 or \ - char_to_token_a[char_idx_a - 1] < token_idx_a) and \ - (char_idx_b == 0 or \ - char_to_token_b[char_idx_b - 1] < token_idx_b): + ( + char_idx_a == 0 or + char_to_token_a[char_idx_a - 1] < token_idx_a + ) and \ + ( + char_idx_b == 0 or + char_to_token_b[char_idx_b - 1] < token_idx_b + ): # Current tokens are identical and both character offsets are the # start of a token (either at the beginning of the document or the # previous character belongs to a different token) diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx index 1524589fa..3f0cf5ade 100644 --- a/spacy/training/example.pyx +++ b/spacy/training/example.pyx @@ -1,4 +1,3 @@ -import warnings from collections.abc import Iterable as IterableInstance import numpy @@ -31,9 +30,9 @@ cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot): attrs, array = _annot2array(vocab, tok_annot, doc_annot) output = Doc(vocab, words=tok_annot["ORTH"], spaces=tok_annot["SPACY"]) if "entities" in doc_annot: - _add_entities_to_doc(output, doc_annot["entities"]) + _add_entities_to_doc(output, doc_annot["entities"]) if "spans" in doc_annot: - _add_spans_to_doc(output, doc_annot["spans"]) + _add_spans_to_doc(output, doc_annot["spans"]) if array.size: output = output.from_array(attrs, array) # links are currently added with ENT_KB_ID on the token level @@ -161,7 +160,6 @@ cdef class Example: self._y_sig = y_sig return self._cached_alignment - def _get_aligned_vectorized(self, align, gold_values): # Fast path for Doc attributes/fields that are predominantly a single value, # i.e., TAG, POS, MORPH. @@ -204,7 +202,6 @@ cdef class Example: return output.tolist() - def _get_aligned_non_vectorized(self, align, gold_values): # Slower path for fields that return multiple values (resulting # in ragged arrays that cannot be vectorized trivially). @@ -221,7 +218,6 @@ cdef class Example: return output - def get_aligned(self, field, as_string=False): """Return an aligned array for a token attribute.""" align = self.alignment.x2y @@ -330,7 +326,7 @@ cdef class Example: missing=None ) # Now fill the tokens we can align to O. - O = 2 # I=1, O=2, B=3 + O = 2 # I=1, O=2, B=3 # no-cython-lint: E741 for i, ent_iob in enumerate(self.get_aligned("ENT_IOB")): if x_tags[i] is None: if ent_iob == O: @@ -340,7 +336,7 @@ cdef class Example: return x_ents, x_tags def get_aligned_ner(self): - x_ents, x_tags = self.get_aligned_ents_and_ner() + _x_ents, x_tags = self.get_aligned_ents_and_ner() return x_tags def get_matching_ents(self, check_label=True): @@ -398,7 +394,6 @@ cdef class Example: return span_dict - def _links_to_dict(self): links = {} for ent in self.reference.ents: