Fix training/. Ignore trailing whitespaces and too long lines.

2025-07-14 18:22:27 +03:00 · 2023-07-03 11:39:28 +02:00 · 2023-07-03 11:39:28 +02:00 · 9de2257e7a
commit 9de2257e7a
parent 50dac51dc8
3 changed files with 13 additions and 14 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -47,7 +47,7 @@ jobs:
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
      - name: cython-lint
        run: |
-          python -m pip install cython-lint -c requirements.txt
+          python -m pip install cython-lint -c requirements.txt --ignore E501,W291
          cython-lint spacy
  tests:
--- a/spacy/training/align.pyx
+++ b/spacy/training/align.pyx
@ -37,10 +37,14 @@ def get_alignments(A: List[str], B: List[str]) -> Tuple[List[List[int]], List[Li
            b2a.append(set())
        # Process the alignment at the current position
        if A[token_idx_a] == B[token_idx_b] and \
-                (char_idx_a == 0 or \
+                (
-                    char_to_token_a[char_idx_a - 1] < token_idx_a) and \
+                    char_idx_a == 0 or
-                (char_idx_b == 0 or \
+                    char_to_token_a[char_idx_a - 1] < token_idx_a
-                    char_to_token_b[char_idx_b - 1] < token_idx_b):
+                ) and \
                (
                    char_idx_b == 0 or
                    char_to_token_b[char_idx_b - 1] < token_idx_b
                ):
            # Current tokens are identical and both character offsets are the
            # start of a token (either at the beginning of the document or the
            # previous character belongs to a different token)
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@ -1,4 +1,3 @@
 import warnings
 from collections.abc import Iterable as IterableInstance
 import numpy
@ -31,9 +30,9 @@ cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot):
    attrs, array = _annot2array(vocab, tok_annot, doc_annot)
    output = Doc(vocab, words=tok_annot["ORTH"], spaces=tok_annot["SPACY"])
    if "entities" in doc_annot:
-       _add_entities_to_doc(output, doc_annot["entities"])
+        _add_entities_to_doc(output, doc_annot["entities"])
    if "spans" in doc_annot:
-       _add_spans_to_doc(output, doc_annot["spans"])
+        _add_spans_to_doc(output, doc_annot["spans"])
    if array.size:
        output = output.from_array(attrs, array)
    # links are currently added with ENT_KB_ID on the token level
@ -161,7 +160,6 @@ cdef class Example:
                self._y_sig = y_sig
                return self._cached_alignment
    def _get_aligned_vectorized(self, align, gold_values):
        # Fast path for Doc attributes/fields that are predominantly a single value,
        # i.e., TAG, POS, MORPH.
@ -204,7 +202,6 @@ cdef class Example:
        return output.tolist()
    def _get_aligned_non_vectorized(self, align, gold_values):
        # Slower path for fields that return multiple values (resulting
        # in ragged arrays that cannot be vectorized trivially).
@ -221,7 +218,6 @@ cdef class Example:
        return output
    def get_aligned(self, field, as_string=False):
        """Return an aligned array for a token attribute."""
        align = self.alignment.x2y
@ -330,7 +326,7 @@ cdef class Example:
            missing=None
        )
        # Now fill the tokens we can align to O.
-        O = 2 # I=1, O=2, B=3
+        O = 2 # I=1, O=2, B=3  # no-cython-lint: E741
        for i, ent_iob in enumerate(self.get_aligned("ENT_IOB")):
            if x_tags[i] is None:
                if ent_iob == O:
@ -340,7 +336,7 @@ cdef class Example:
        return x_ents, x_tags
    def get_aligned_ner(self):
-        x_ents, x_tags = self.get_aligned_ents_and_ner()
+        _x_ents, x_tags = self.get_aligned_ents_and_ner()
        return x_tags
    def get_matching_ents(self, check_label=True):
@ -398,7 +394,6 @@ cdef class Example:
        return span_dict
    def _links_to_dict(self):
        links = {}
        for ent in self.reference.ents: