mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-21 09:31:59 +03:00
Fix training/. Ignore trailing whitespaces and too long lines.
This commit is contained in:
parent
50dac51dc8
commit
9de2257e7a
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
@ -47,7 +47,7 @@ jobs:
|
|||
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
|
||||
- name: cython-lint
|
||||
run: |
|
||||
python -m pip install cython-lint -c requirements.txt
|
||||
python -m pip install cython-lint -c requirements.txt --ignore E501,W291
|
||||
cython-lint spacy
|
||||
|
||||
tests:
|
||||
|
|
|
@ -37,10 +37,14 @@ def get_alignments(A: List[str], B: List[str]) -> Tuple[List[List[int]], List[Li
|
|||
b2a.append(set())
|
||||
# Process the alignment at the current position
|
||||
if A[token_idx_a] == B[token_idx_b] and \
|
||||
(char_idx_a == 0 or \
|
||||
char_to_token_a[char_idx_a - 1] < token_idx_a) and \
|
||||
(char_idx_b == 0 or \
|
||||
char_to_token_b[char_idx_b - 1] < token_idx_b):
|
||||
(
|
||||
char_idx_a == 0 or
|
||||
char_to_token_a[char_idx_a - 1] < token_idx_a
|
||||
) and \
|
||||
(
|
||||
char_idx_b == 0 or
|
||||
char_to_token_b[char_idx_b - 1] < token_idx_b
|
||||
):
|
||||
# Current tokens are identical and both character offsets are the
|
||||
# start of a token (either at the beginning of the document or the
|
||||
# previous character belongs to a different token)
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import warnings
|
||||
from collections.abc import Iterable as IterableInstance
|
||||
|
||||
import numpy
|
||||
|
@ -31,9 +30,9 @@ cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot):
|
|||
attrs, array = _annot2array(vocab, tok_annot, doc_annot)
|
||||
output = Doc(vocab, words=tok_annot["ORTH"], spaces=tok_annot["SPACY"])
|
||||
if "entities" in doc_annot:
|
||||
_add_entities_to_doc(output, doc_annot["entities"])
|
||||
_add_entities_to_doc(output, doc_annot["entities"])
|
||||
if "spans" in doc_annot:
|
||||
_add_spans_to_doc(output, doc_annot["spans"])
|
||||
_add_spans_to_doc(output, doc_annot["spans"])
|
||||
if array.size:
|
||||
output = output.from_array(attrs, array)
|
||||
# links are currently added with ENT_KB_ID on the token level
|
||||
|
@ -161,7 +160,6 @@ cdef class Example:
|
|||
self._y_sig = y_sig
|
||||
return self._cached_alignment
|
||||
|
||||
|
||||
def _get_aligned_vectorized(self, align, gold_values):
|
||||
# Fast path for Doc attributes/fields that are predominantly a single value,
|
||||
# i.e., TAG, POS, MORPH.
|
||||
|
@ -204,7 +202,6 @@ cdef class Example:
|
|||
|
||||
return output.tolist()
|
||||
|
||||
|
||||
def _get_aligned_non_vectorized(self, align, gold_values):
|
||||
# Slower path for fields that return multiple values (resulting
|
||||
# in ragged arrays that cannot be vectorized trivially).
|
||||
|
@ -221,7 +218,6 @@ cdef class Example:
|
|||
|
||||
return output
|
||||
|
||||
|
||||
def get_aligned(self, field, as_string=False):
|
||||
"""Return an aligned array for a token attribute."""
|
||||
align = self.alignment.x2y
|
||||
|
@ -330,7 +326,7 @@ cdef class Example:
|
|||
missing=None
|
||||
)
|
||||
# Now fill the tokens we can align to O.
|
||||
O = 2 # I=1, O=2, B=3
|
||||
O = 2 # I=1, O=2, B=3 # no-cython-lint: E741
|
||||
for i, ent_iob in enumerate(self.get_aligned("ENT_IOB")):
|
||||
if x_tags[i] is None:
|
||||
if ent_iob == O:
|
||||
|
@ -340,7 +336,7 @@ cdef class Example:
|
|||
return x_ents, x_tags
|
||||
|
||||
def get_aligned_ner(self):
|
||||
x_ents, x_tags = self.get_aligned_ents_and_ner()
|
||||
_x_ents, x_tags = self.get_aligned_ents_and_ner()
|
||||
return x_tags
|
||||
|
||||
def get_matching_ents(self, check_label=True):
|
||||
|
@ -398,7 +394,6 @@ cdef class Example:
|
|||
|
||||
return span_dict
|
||||
|
||||
|
||||
def _links_to_dict(self):
|
||||
links = {}
|
||||
for ent in self.reference.ents:
|
||||
|
|
Loading…
Reference in New Issue
Block a user