mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Deprecate (Phrase)Matcher.pipe
This commit is contained in:
parent
db9f8896f5
commit
add9de5487
|
@ -112,6 +112,9 @@ class Warnings:
|
||||||
"word segmenters: {supported}. Defaulting to {default}.")
|
"word segmenters: {supported}. Defaulting to {default}.")
|
||||||
W104 = ("Skipping modifications for '{target}' segmenter. The current "
|
W104 = ("Skipping modifications for '{target}' segmenter. The current "
|
||||||
"segmenter is '{current}'.")
|
"segmenter is '{current}'.")
|
||||||
|
W105 = ("As of spaCy v3.0, the {matcher}.pipe method is deprecated. If you "
|
||||||
|
"need to match on a stream of documents, you can use nlp.pipe and "
|
||||||
|
"call the {matcher} on each Doc object.")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
|
@ -176,18 +176,10 @@ cdef class Matcher:
|
||||||
return (self._callbacks[key], self._patterns[key])
|
return (self._callbacks[key], self._patterns[key])
|
||||||
|
|
||||||
def pipe(self, docs, batch_size=1000, return_matches=False, as_tuples=False):
|
def pipe(self, docs, batch_size=1000, return_matches=False, as_tuples=False):
|
||||||
"""Match a stream of documents, yielding them in turn.
|
"""Match a stream of documents, yielding them in turn. Deprecated as of
|
||||||
|
spaCy v3.0.
|
||||||
docs (Iterable[Union[Doc, Span]]): A stream of documents or spans.
|
|
||||||
batch_size (int): Number of documents to accumulate into a working set.
|
|
||||||
return_matches (bool): Yield the match lists along with the docs, making
|
|
||||||
results (doc, matches) tuples.
|
|
||||||
as_tuples (bool): Interpret the input stream as (doc, context) tuples,
|
|
||||||
and yield (result, context) tuples out.
|
|
||||||
If both return_matches and as_tuples are True, the output will
|
|
||||||
be a sequence of ((doc, matches), context) tuples.
|
|
||||||
YIELDS (Doc): Documents, in order.
|
|
||||||
"""
|
"""
|
||||||
|
warnings.warn(Warnings.W105.format(matcher="Matcher"), DeprecationWarning)
|
||||||
if as_tuples:
|
if as_tuples:
|
||||||
for doc, context in docs:
|
for doc, context in docs:
|
||||||
matches = self(doc)
|
matches = self(doc)
|
||||||
|
|
|
@ -292,20 +292,10 @@ cdef class PhraseMatcher:
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
def pipe(self, stream, batch_size=1000, return_matches=False, as_tuples=False):
|
def pipe(self, stream, batch_size=1000, return_matches=False, as_tuples=False):
|
||||||
"""Match a stream of documents, yielding them in turn.
|
"""Match a stream of documents, yielding them in turn. Deprecated as of
|
||||||
|
spaCy v3.0.
|
||||||
docs (iterable): A stream of documents.
|
|
||||||
batch_size (int): Number of documents to accumulate into a working set.
|
|
||||||
return_matches (bool): Yield the match lists along with the docs, making
|
|
||||||
results (doc, matches) tuples.
|
|
||||||
as_tuples (bool): Interpret the input stream as (doc, context) tuples,
|
|
||||||
and yield (result, context) tuples out.
|
|
||||||
If both return_matches and as_tuples are True, the output will
|
|
||||||
be a sequence of ((doc, matches), context) tuples.
|
|
||||||
YIELDS (Doc): Documents, in order.
|
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher#pipe
|
|
||||||
"""
|
"""
|
||||||
|
warnings.warn(Warnings.W105.format(matcher="PhraseMatcher"), DeprecationWarning)
|
||||||
if as_tuples:
|
if as_tuples:
|
||||||
for doc, context in stream:
|
for doc, context in stream:
|
||||||
matches = self(doc)
|
matches = self(doc)
|
||||||
|
|
|
@ -484,3 +484,12 @@ def test_matcher_as_spans(matcher):
|
||||||
assert isinstance(matches[1], Span)
|
assert isinstance(matches[1], Span)
|
||||||
assert matches[1].text == "Java"
|
assert matches[1].text == "Java"
|
||||||
assert matches[1].label_ == "Java"
|
assert matches[1].label_ == "Java"
|
||||||
|
|
||||||
|
|
||||||
|
def test_matcher_deprecated(matcher):
|
||||||
|
doc = Doc(matcher.vocab, words=["hello", "world"])
|
||||||
|
with pytest.warns(DeprecationWarning) as record:
|
||||||
|
for _ in matcher.pipe([doc]):
|
||||||
|
pass
|
||||||
|
assert record.list
|
||||||
|
assert "spaCy v3.0" in str(record.list[0].message)
|
||||||
|
|
|
@ -303,3 +303,14 @@ def test_phrase_matcher_as_spans(en_vocab):
|
||||||
assert isinstance(matches[1], Span)
|
assert isinstance(matches[1], Span)
|
||||||
assert matches[1].text == "test"
|
assert matches[1].text == "test"
|
||||||
assert matches[1].label_ == "B"
|
assert matches[1].label_ == "B"
|
||||||
|
|
||||||
|
|
||||||
|
def test_phrase_matcher_deprecated(en_vocab):
|
||||||
|
matcher = PhraseMatcher(en_vocab)
|
||||||
|
matcher.add("TEST", [Doc(en_vocab, words=["helllo"])])
|
||||||
|
doc = Doc(en_vocab, words=["hello", "world"])
|
||||||
|
with pytest.warns(DeprecationWarning) as record:
|
||||||
|
for _ in matcher.pipe([doc]):
|
||||||
|
pass
|
||||||
|
assert record.list
|
||||||
|
assert "spaCy v3.0" in str(record.list[0].message)
|
||||||
|
|
|
@ -123,27 +123,6 @@ Find all token sequences matching the supplied patterns on the `Doc` or `Span`.
|
||||||
| `as_spans` <Tag variant="new">3</Tag> | Instead of tuples, return a list of [`Span`](/api/span) objects of the matches, with the `match_id` assigned as the span label. Defaults to `False`. ~~bool~~ |
|
| `as_spans` <Tag variant="new">3</Tag> | Instead of tuples, return a list of [`Span`](/api/span) objects of the matches, with the `match_id` assigned as the span label. Defaults to `False`. ~~bool~~ |
|
||||||
| **RETURNS** | A list of `(match_id, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end`]. The `match_id` is the ID of the added match pattern. If `as_spans` is set to `True`, a list of `Span` objects is returned instead. ~~Union[List[Tuple[int, int, int]], List[Span]]~~ |
|
| **RETURNS** | A list of `(match_id, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end`]. The `match_id` is the ID of the added match pattern. If `as_spans` is set to `True`, a list of `Span` objects is returned instead. ~~Union[List[Tuple[int, int, int]], List[Span]]~~ |
|
||||||
|
|
||||||
## Matcher.pipe {#pipe tag="method"}
|
|
||||||
|
|
||||||
Match a stream of documents, yielding them in turn.
|
|
||||||
|
|
||||||
> #### Example
|
|
||||||
>
|
|
||||||
> ```python
|
|
||||||
> from spacy.matcher import Matcher
|
|
||||||
> matcher = Matcher(nlp.vocab)
|
|
||||||
> for doc in matcher.pipe(docs, batch_size=50):
|
|
||||||
> pass
|
|
||||||
> ```
|
|
||||||
|
|
||||||
| Name | Description |
|
|
||||||
| --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
||||||
| `docs` | A stream of documents or spans. ~~Iterable[Union[Doc, Span]]~~ |
|
|
||||||
| `batch_size` | The number of documents to accumulate into a working set. ~~int~~ |
|
|
||||||
| `return_matches` <Tag variant="new">2.1</Tag> | Yield the match lists along with the docs, making results `(doc, matches)` tuples. ~~bool~~ |
|
|
||||||
| `as_tuples` | Interpret the input stream as `(doc, context)` tuples, and yield `(result, context)` tuples out. If both `return_matches` and `as_tuples` are `True`, the output will be a sequence of `((doc, matches), context)` tuples. ~~bool~~ |
|
|
||||||
| **YIELDS** | Documents, in order. ~~Union[Doc, Tuple[Doc, Any], Tuple[Tuple[Doc, Any], Any]]~~ |
|
|
||||||
|
|
||||||
## Matcher.\_\_len\_\_ {#len tag="method" new="2"}
|
## Matcher.\_\_len\_\_ {#len tag="method" new="2"}
|
||||||
|
|
||||||
Get the number of rules added to the matcher. Note that this only returns the
|
Get the number of rules added to the matcher. Note that this only returns the
|
||||||
|
|
|
@ -76,27 +76,6 @@ match_id_string = nlp.vocab.strings[match_id]
|
||||||
|
|
||||||
</Infobox>
|
</Infobox>
|
||||||
|
|
||||||
## PhraseMatcher.pipe {#pipe tag="method"}
|
|
||||||
|
|
||||||
Match a stream of documents, yielding them in turn.
|
|
||||||
|
|
||||||
> #### Example
|
|
||||||
>
|
|
||||||
> ```python
|
|
||||||
> from spacy.matcher import PhraseMatcher
|
|
||||||
> matcher = PhraseMatcher(nlp.vocab)
|
|
||||||
> for doc in matcher.pipe(docs, batch_size=50):
|
|
||||||
> pass
|
|
||||||
> ```
|
|
||||||
|
|
||||||
| Name | Description |
|
|
||||||
| --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
||||||
| `docs` | A stream of documents. ~~Iterable[Doc]~~ |
|
|
||||||
| `batch_size` | The number of documents to accumulate into a working set. ~~int~~ |
|
|
||||||
| `return_matches` <Tag variant="new">2.1</Tag> | Yield the match lists along with the docs, making results `(doc, matches)` tuples. ~~bool~~ |
|
|
||||||
| `as_tuples` | Interpret the input stream as `(doc, context)` tuples, and yield `(result, context)` tuples out. If both `return_matches` and `as_tuples` are `True`, the output will be a sequence of `((doc, matches), context)` tuples. ~~bool~~ |
|
|
||||||
| **YIELDS** | Documents and optional matches or context in order. ~~Union[Doc, Tuple[Doc, Any], Tuple[Tuple[Doc, Any], Any]]~~ |
|
|
||||||
|
|
||||||
## PhraseMatcher.\_\_len\_\_ {#len tag="method"}
|
## PhraseMatcher.\_\_len\_\_ {#len tag="method"}
|
||||||
|
|
||||||
Get the number of rules added to the matcher. Note that this only returns the
|
Get the number of rules added to the matcher. Note that this only returns the
|
||||||
|
|
|
@ -856,15 +856,6 @@ for token in doc:
|
||||||
print(token.text, token._.is_hashtag)
|
print(token.text, token._.is_hashtag)
|
||||||
```
|
```
|
||||||
|
|
||||||
To process a stream of social media posts, we can use
|
|
||||||
[`Language.pipe`](/api/language#pipe), which will return a stream of `Doc`
|
|
||||||
objects that we can pass to [`Matcher.pipe`](/api/matcher#pipe).
|
|
||||||
|
|
||||||
```python
|
|
||||||
docs = nlp.pipe(LOTS_OF_TWEETS)
|
|
||||||
matches = matcher.pipe(docs)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Efficient phrase matching {#phrasematcher}
|
## Efficient phrase matching {#phrasematcher}
|
||||||
|
|
||||||
If you need to match large terminology lists, you can also use the
|
If you need to match large terminology lists, you can also use the
|
||||||
|
|
|
@ -389,6 +389,7 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
|
||||||
| `GoldParse` | [`Example`](/api/example) |
|
| `GoldParse` | [`Example`](/api/example) |
|
||||||
| `GoldCorpus` | [`Corpus`](/api/corpus) |
|
| `GoldCorpus` | [`Corpus`](/api/corpus) |
|
||||||
| `KnowledgeBase.load_bulk`, `KnowledgeBase.dump` | [`KnowledgeBase.from_disk`](/api/kb#from_disk), [`KnowledgeBase.to_disk`](/api/kb#to_disk) |
|
| `KnowledgeBase.load_bulk`, `KnowledgeBase.dump` | [`KnowledgeBase.from_disk`](/api/kb#from_disk), [`KnowledgeBase.to_disk`](/api/kb#to_disk) |
|
||||||
|
| `Matcher.pipe`, `PhraseMatcher.pipe` | not needed |
|
||||||
| `spacy init-model` | [`spacy init model`](/api/cli#init-model) |
|
| `spacy init-model` | [`spacy init model`](/api/cli#init-model) |
|
||||||
| `spacy debug-data` | [`spacy debug data`](/api/cli#debug-data) |
|
| `spacy debug-data` | [`spacy debug data`](/api/cli#debug-data) |
|
||||||
| `spacy profile` | [`spacy debug profile`](/api/cli#debug-profile) |
|
| `spacy profile` | [`spacy debug profile`](/api/cli#debug-profile) |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user