mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-03 19:08:06 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
e3e9fe18d4
|
@ -12,7 +12,7 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ..norm_exceptions import BASE_NORMS
|
from ..norm_exceptions import BASE_NORMS
|
||||||
from ...util import update_exc, add_lookups
|
from ...util import update_exc, add_lookups
|
||||||
from ...language import Language
|
from ...language import Language
|
||||||
from ...attrs import LANG, LIKE_NUM, NORM
|
from ...attrs import LANG, NORM
|
||||||
|
|
||||||
|
|
||||||
class RussianDefaults(Language.Defaults):
|
class RussianDefaults(Language.Defaults):
|
||||||
|
|
|
@ -5,10 +5,26 @@ import pytest
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
from spacy.displacy import render
|
from spacy.displacy import render
|
||||||
from spacy.gold import iob_to_biluo
|
from spacy.gold import iob_to_biluo
|
||||||
|
from spacy.lang.it import Italian
|
||||||
|
|
||||||
from ..util import add_vecs_to_vocab
|
from ..util import add_vecs_to_vocab
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail
|
||||||
|
def test_issue2179():
|
||||||
|
"""Test that spurious 'extra_labels' aren't created when initializing NER."""
|
||||||
|
nlp = Italian()
|
||||||
|
ner = nlp.create_pipe('ner')
|
||||||
|
ner.add_label('CITIZENSHIP')
|
||||||
|
nlp.add_pipe(ner)
|
||||||
|
nlp.begin_training()
|
||||||
|
nlp2 = Italian()
|
||||||
|
nlp2.add_pipe(nlp2.create_pipe('ner'))
|
||||||
|
nlp2.from_bytes(nlp.to_bytes())
|
||||||
|
assert 'extra_labels' not in nlp2.get_pipe('ner').cfg
|
||||||
|
assert nlp2.get_pipe('ner').labels == ['CITIZENSHIP']
|
||||||
|
|
||||||
|
|
||||||
def test_issue2219(en_vocab):
|
def test_issue2219(en_vocab):
|
||||||
vectors = [("a", [1, 2, 3]), ("letter", [4, 5, 6])]
|
vectors = [("a", [1, 2, 3]), ("letter", [4, 5, 6])]
|
||||||
add_vecs_to_vocab(en_vocab, vectors)
|
add_vecs_to_vocab(en_vocab, vectors)
|
||||||
|
|
|
@ -1,16 +0,0 @@
|
||||||
'''Test that spurious 'extra_labels' aren't created when initializing NER.'''
|
|
||||||
import pytest
|
|
||||||
from ... import blank
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
|
||||||
def test_issue2179():
|
|
||||||
nlp = blank('it')
|
|
||||||
ner = nlp.create_pipe('ner')
|
|
||||||
ner.add_label('CITIZENSHIP')
|
|
||||||
nlp.add_pipe(ner)
|
|
||||||
nlp.begin_training()
|
|
||||||
nlp2 = blank('it')
|
|
||||||
nlp2.add_pipe(nlp2.create_pipe('ner'))
|
|
||||||
nlp2.from_bytes(nlp.to_bytes())
|
|
||||||
assert 'extra_labels' not in nlp2.get_pipe('ner').cfg
|
|
||||||
assert nlp2.get_pipe('ner').labels == ['CITIZENSHIP']
|
|
|
@ -1,11 +1,10 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import spacy
|
|
||||||
|
|
||||||
def test_issue2626():
|
|
||||||
'''Check that this sentence doesn't cause an infinite loop in the tokenizer.'''
|
def test_issue2626(en_tokenizer):
|
||||||
nlp = spacy.blank('en')
|
"""Check that sentence doesn't cause an infinite loop in the tokenizer."""
|
||||||
text = """
|
text = """
|
||||||
ABLEItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume TABLE ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume
|
ABLEItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume TABLE ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume
|
||||||
"""
|
"""
|
||||||
doc = nlp.make_doc(text)
|
doc = en_tokenizer(text)
|
||||||
|
|
||||||
|
|
|
@ -1,29 +1,30 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import pytest
|
|
||||||
from ...lang.en import English
|
|
||||||
from ...matcher import Matcher
|
|
||||||
|
|
||||||
def get_rule_id(nlp, matcher, doc):
|
import pytest
|
||||||
matches = matcher(doc)
|
from spacy.lang.en import English
|
||||||
for match_id, start, end in matches:
|
from spacy.matcher import Matcher
|
||||||
rule_id = nlp.vocab.strings[match_id]
|
|
||||||
span = doc[start:end]
|
|
||||||
return rule_id
|
|
||||||
|
|
||||||
|
|
||||||
def test_issue2671():
|
def test_issue2671():
|
||||||
'''Ensure the correct entity ID is returned for matches with quantifiers.
|
"""Ensure the correct entity ID is returned for matches with quantifiers.
|
||||||
See also #2675
|
See also #2675
|
||||||
'''
|
"""
|
||||||
|
def get_rule_id(nlp, matcher, doc):
|
||||||
|
matches = matcher(doc)
|
||||||
|
for match_id, start, end in matches:
|
||||||
|
rule_id = nlp.vocab.strings[match_id]
|
||||||
|
span = doc[start:end]
|
||||||
|
return rule_id
|
||||||
|
|
||||||
nlp = English()
|
nlp = English()
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
pattern_id = 'test_pattern'
|
||||||
pattern = [{'LOWER': 'high'}, {'IS_PUNCT': True, 'OP': '?'}, {'LOWER': 'adrenaline'}]
|
pattern = [{'LOWER': 'high'},
|
||||||
matcher.add("test_pattern", None, pattern)
|
{'IS_PUNCT': True, 'OP': '?'},
|
||||||
|
{'LOWER': 'adrenaline'}]
|
||||||
|
matcher.add(pattern_id, None, pattern)
|
||||||
doc1 = nlp("This is a high-adrenaline situation.")
|
doc1 = nlp("This is a high-adrenaline situation.")
|
||||||
doc2 = nlp("This is a high adrenaline situation.")
|
doc2 = nlp("This is a high adrenaline situation.")
|
||||||
# Works correctly
|
assert get_rule_id(nlp, matcher, doc1) == pattern_id
|
||||||
assert get_rule_id(nlp, matcher, doc1) == 'test_pattern'
|
assert get_rule_id(nlp, matcher, doc2) == pattern_id
|
||||||
assert get_rule_id(nlp, matcher, doc2) == 'test_pattern'
|
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
'''Test that deprojectivization doesn't mess up sentence boundaries.'''
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ..util import get_doc
|
from ..util import get_doc
|
||||||
|
|
||||||
|
|
||||||
def test_issue2772(en_vocab):
|
def test_issue2772(en_vocab):
|
||||||
|
"""Test that deprojectivization doesn't mess up sentence boundaries."""
|
||||||
words = 'When we write or communicate virtually , we can hide our true feelings .'.split()
|
words = 'When we write or communicate virtually , we can hide our true feelings .'.split()
|
||||||
# A tree with a non-projective (i.e. crossing) arc
|
# A tree with a non-projective (i.e. crossing) arc
|
||||||
# The arcs (0, 4) and (2, 9) cross.
|
# The arcs (0, 4) and (2, 9) cross.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user