From 1724a4f75b3a1ee5ceec39bbaf14b82051c11e90 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 9 Mar 2020 18:08:18 +0100 Subject: [PATCH 1/2] additional information if doc is empty --- spacy/tests/matcher/test_matcher_api.py | 9 ++++++++- spacy/tokens/doc.pyx | 2 +- website/docs/api/doc.md | 8 ++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index a826a0a0e..74d4b8b00 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -5,7 +5,7 @@ import pytest import re from mock import Mock from spacy.matcher import Matcher, DependencyMatcher -from spacy.tokens import Doc, Token +from spacy.tokens import Doc, Token, Span from ..doc.test_underscore import clean_underscore @@ -458,3 +458,10 @@ def test_matcher_callback(en_vocab): doc = Doc(en_vocab, words=["This", "is", "a", "test", "."]) matches = matcher(doc) mock.assert_called_once_with(matcher, doc, 0, matches) + +def test_matcher_span(matcher): + text = "JavaScript is good but Java is better" + doc = Doc(matcher.vocab, words=text.split()) + span = Span(doc, 0, 3) + matches = matcher(span.as_doc()) + assert len(matches) == 1 \ No newline at end of file diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 0c90929c3..ec0cd66b8 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -260,7 +260,7 @@ cdef class Doc: def is_nered(self): """Check if the document has named entities set. Will return True if *any* of the tokens has a named entity tag set (even if the others are - unknown values). + unknown values), or if the document is empty. """ if len(self) == 0: return True diff --git a/website/docs/api/doc.md b/website/docs/api/doc.md index 87b854a8c..ab85c1deb 100644 --- a/website/docs/api/doc.md +++ b/website/docs/api/doc.md @@ -657,10 +657,10 @@ The L2 norm of the document's vector representation. | `user_data` | - | A generic storage area, for user custom data. | | `lang` 2.1 | int | Language of the document's vocabulary. | | `lang_` 2.1 | unicode | Language of the document's vocabulary. | -| `is_tagged` | bool | A flag indicating that the document has been part-of-speech tagged. | -| `is_parsed` | bool | A flag indicating that the document has been syntactically parsed. | -| `is_sentenced` | bool | A flag indicating that sentence boundaries have been applied to the document. | -| `is_nered` 2.1 | bool | A flag indicating that named entities have been set. Will return `True` if _any_ of the tokens has an entity tag set, even if the others are unknown. | +| `is_tagged` | bool | A flag indicating that the document has been part-of-speech tagged. Returns `True` if the `Doc` is empty. | +| `is_parsed` | bool | A flag indicating that the document has been syntactically parsed. Returns `True` if the `Doc` is empty. | +| `is_sentenced` | bool | A flag indicating that sentence boundaries have been applied to the document. Returns `True` if the `Doc` is empty. | +| `is_nered` 2.1 | bool | A flag indicating that named entities have been set. Will return `True` if the `Doc` is empty, or if _any_ of the tokens has an entity tag set, even if the others are unknown. | | `sentiment` | float | The document's positivity/negativity score, if available. | | `user_hooks` | dict | A dictionary that allows customization of the `Doc`'s properties. | | `user_token_hooks` | dict | A dictionary that allows customization of properties of `Token` children. | From c4d030dbf68990e7af6b6a87d6add829906806bf Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 9 Mar 2020 18:10:54 +0100 Subject: [PATCH 2/2] remove accidental commit --- spacy/tests/matcher/test_matcher_api.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index 74d4b8b00..a826a0a0e 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -5,7 +5,7 @@ import pytest import re from mock import Mock from spacy.matcher import Matcher, DependencyMatcher -from spacy.tokens import Doc, Token, Span +from spacy.tokens import Doc, Token from ..doc.test_underscore import clean_underscore @@ -458,10 +458,3 @@ def test_matcher_callback(en_vocab): doc = Doc(en_vocab, words=["This", "is", "a", "test", "."]) matches = matcher(doc) mock.assert_called_once_with(matcher, doc, 0, matches) - -def test_matcher_span(matcher): - text = "JavaScript is good but Java is better" - doc = Doc(matcher.vocab, words=text.split()) - span = Span(doc, 0, 3) - matches = matcher(span.as_doc()) - assert len(matches) == 1 \ No newline at end of file