Fix training/. Ignore trailing whitespaces and too long lines.

This commit is contained in:
Raphael Mitsch 2023-07-03 11:39:28 +02:00
parent 50dac51dc8
commit 9de2257e7a
3 changed files with 13 additions and 14 deletions

View File

@ -47,7 +47,7 @@ jobs:
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
- name: cython-lint
run: |
python -m pip install cython-lint -c requirements.txt
python -m pip install cython-lint -c requirements.txt --ignore E501,W291
cython-lint spacy
tests:

View File

@ -37,10 +37,14 @@ def get_alignments(A: List[str], B: List[str]) -> Tuple[List[List[int]], List[Li
b2a.append(set())
# Process the alignment at the current position
if A[token_idx_a] == B[token_idx_b] and \
(char_idx_a == 0 or \
char_to_token_a[char_idx_a - 1] < token_idx_a) and \
(char_idx_b == 0 or \
char_to_token_b[char_idx_b - 1] < token_idx_b):
(
char_idx_a == 0 or
char_to_token_a[char_idx_a - 1] < token_idx_a
) and \
(
char_idx_b == 0 or
char_to_token_b[char_idx_b - 1] < token_idx_b
):
# Current tokens are identical and both character offsets are the
# start of a token (either at the beginning of the document or the
# previous character belongs to a different token)

View File

@ -1,4 +1,3 @@
import warnings
from collections.abc import Iterable as IterableInstance
import numpy
@ -31,9 +30,9 @@ cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot):
attrs, array = _annot2array(vocab, tok_annot, doc_annot)
output = Doc(vocab, words=tok_annot["ORTH"], spaces=tok_annot["SPACY"])
if "entities" in doc_annot:
_add_entities_to_doc(output, doc_annot["entities"])
_add_entities_to_doc(output, doc_annot["entities"])
if "spans" in doc_annot:
_add_spans_to_doc(output, doc_annot["spans"])
_add_spans_to_doc(output, doc_annot["spans"])
if array.size:
output = output.from_array(attrs, array)
# links are currently added with ENT_KB_ID on the token level
@ -161,7 +160,6 @@ cdef class Example:
self._y_sig = y_sig
return self._cached_alignment
def _get_aligned_vectorized(self, align, gold_values):
# Fast path for Doc attributes/fields that are predominantly a single value,
# i.e., TAG, POS, MORPH.
@ -204,7 +202,6 @@ cdef class Example:
return output.tolist()
def _get_aligned_non_vectorized(self, align, gold_values):
# Slower path for fields that return multiple values (resulting
# in ragged arrays that cannot be vectorized trivially).
@ -221,7 +218,6 @@ cdef class Example:
return output
def get_aligned(self, field, as_string=False):
"""Return an aligned array for a token attribute."""
align = self.alignment.x2y
@ -330,7 +326,7 @@ cdef class Example:
missing=None
)
# Now fill the tokens we can align to O.
O = 2 # I=1, O=2, B=3
O = 2 # I=1, O=2, B=3 # no-cython-lint: E741
for i, ent_iob in enumerate(self.get_aligned("ENT_IOB")):
if x_tags[i] is None:
if ent_iob == O:
@ -340,7 +336,7 @@ cdef class Example:
return x_ents, x_tags
def get_aligned_ner(self):
x_ents, x_tags = self.get_aligned_ents_and_ner()
_x_ents, x_tags = self.get_aligned_ents_and_ner()
return x_tags
def get_matching_ents(self, check_label=True):
@ -398,7 +394,6 @@ cdef class Example:
return span_dict
def _links_to_dict(self):
links = {}
for ent in self.reference.ents: