Deprecate (Phrase)Matcher.pipe

2025-10-26 05:31:15 +03:00 · 2020-08-31 17:01:24 +02:00 · 2020-08-31 17:01:24 +02:00 · add9de5487
commit add9de5487
parent db9f8896f5
9 changed files with 30 additions and 75 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -112,6 +112,9 @@ class Warnings:
            "word segmenters: {supported}. Defaulting to {default}.")
    W104 = ("Skipping modifications for '{target}' segmenter. The current "
            "segmenter is '{current}'.")
    W105 = ("As of spaCy v3.0, the {matcher}.pipe method is deprecated. If you "
            "need to match on a stream of documents, you can use nlp.pipe and "
            "call the {matcher} on each Doc object.")
@add_codes
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -176,18 +176,10 @@ cdef class Matcher:
        return (self._callbacks[key], self._patterns[key])
    def pipe(self, docs, batch_size=1000, return_matches=False, as_tuples=False):
-        """Match a stream of documents, yielding them in turn.
+        """Match a stream of documents, yielding them in turn. Deprecated as of
-
+        spaCy v3.0.
        docs (Iterable[Union[Doc, Span]]): A stream of documents or spans.
        batch_size (int): Number of documents to accumulate into a working set.
        return_matches (bool): Yield the match lists along with the docs, making
            results (doc, matches) tuples.
        as_tuples (bool): Interpret the input stream as (doc, context) tuples,
            and yield (result, context) tuples out.
            If both return_matches and as_tuples are True, the output will
            be a sequence of ((doc, matches), context) tuples.
        YIELDS (Doc): Documents, in order.
        """
        warnings.warn(Warnings.W105.format(matcher="Matcher"), DeprecationWarning)
        if as_tuples:
            for doc, context in docs:
                matches = self(doc)
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@ -292,20 +292,10 @@ cdef class PhraseMatcher:
            idx += 1
    def pipe(self, stream, batch_size=1000, return_matches=False, as_tuples=False):
-        """Match a stream of documents, yielding them in turn.
+        """Match a stream of documents, yielding them in turn. Deprecated as of
-
+        spaCy v3.0.
        docs (iterable): A stream of documents.
        batch_size (int): Number of documents to accumulate into a working set.
        return_matches (bool): Yield the match lists along with the docs, making
            results (doc, matches) tuples.
        as_tuples (bool): Interpret the input stream as (doc, context) tuples,
            and yield (result, context) tuples out.
            If both return_matches and as_tuples are True, the output will
            be a sequence of ((doc, matches), context) tuples.
        YIELDS (Doc): Documents, in order.
        DOCS: https://spacy.io/api/phrasematcher#pipe
        """
        warnings.warn(Warnings.W105.format(matcher="PhraseMatcher"), DeprecationWarning)
        if as_tuples:
            for doc, context in stream:
                matches = self(doc)
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@ -484,3 +484,12 @@ def test_matcher_as_spans(matcher):
    assert isinstance(matches[1], Span)
    assert matches[1].text == "Java"
    assert matches[1].label_ == "Java"
 def test_matcher_deprecated(matcher):
    doc = Doc(matcher.vocab, words=["hello", "world"])
    with pytest.warns(DeprecationWarning) as record:
        for _ in matcher.pipe([doc]):
            pass
        assert record.list
        assert "spaCy v3.0" in str(record.list[0].message)
--- a/spacy/tests/matcher/test_phrase_matcher.py
+++ b/spacy/tests/matcher/test_phrase_matcher.py
@ -303,3 +303,14 @@ def test_phrase_matcher_as_spans(en_vocab):
    assert isinstance(matches[1], Span)
    assert matches[1].text == "test"
    assert matches[1].label_ == "B"
 def test_phrase_matcher_deprecated(en_vocab):
    matcher = PhraseMatcher(en_vocab)
    matcher.add("TEST", [Doc(en_vocab, words=["helllo"])])
    doc = Doc(en_vocab, words=["hello", "world"])
    with pytest.warns(DeprecationWarning) as record:
        for _ in matcher.pipe([doc]):
            pass
        assert record.list
        assert "spaCy v3.0" in str(record.list[0].message)
--- a/website/docs/api/matcher.md
+++ b/website/docs/api/matcher.md
@ -123,27 +123,6 @@ Find all token sequences matching the supplied patterns on the `Doc` or `Span`.
 | `as_spans` <Tag variant="new">3</Tag> | Instead of tuples, return a list of [`Span`](/api/span) objects of the matches, with the `match_id` assigned as the span label. Defaults to `False`. ~~bool~~                                                                                                                                            |
 | **RETURNS**                           | A list of `(match_id, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end`]. The `match_id` is the ID of the added match pattern. If `as_spans` is set to `True`, a list of `Span` objects is returned instead. ~~Union[List[Tuple[int, int, int]], List[Span]]~~ |
 ## Matcher.pipe {#pipe tag="method"}
 Match a stream of documents, yielding them in turn.
 > #### Example
 >
 > ```python
 > from spacy.matcher import Matcher
 > matcher = Matcher(nlp.vocab)
 > for doc in matcher.pipe(docs, batch_size=50):
 >     pass
 > ```
 | Name                                          | Description                                                                                                                                                                                                                         |
 | --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `docs`                                        | A stream of documents or spans. ~~Iterable[Union[Doc, Span]]~~                                                                                                                                                                      |
 | `batch_size`                                  | The number of documents to accumulate into a working set. ~~int~~                                                                                                                                                                   |
 | `return_matches` <Tag variant="new">2.1</Tag> | Yield the match lists along with the docs, making results `(doc, matches)` tuples. ~~bool~~                                                                                                                                         |
 | `as_tuples`                                   | Interpret the input stream as `(doc, context)` tuples, and yield `(result, context)` tuples out. If both `return_matches` and `as_tuples` are `True`, the output will be a sequence of `((doc, matches), context)` tuples. ~~bool~~ |
 | **YIELDS**                                    | Documents, in order. ~~Union[Doc, Tuple[Doc, Any], Tuple[Tuple[Doc, Any], Any]]~~                                                                                                                                                   |
 ## Matcher.\_\_len\_\_ {#len tag="method" new="2"}
 Get the number of rules added to the matcher. Note that this only returns the
--- a/website/docs/api/phrasematcher.md
+++ b/website/docs/api/phrasematcher.md
@ -76,27 +76,6 @@ match_id_string = nlp.vocab.strings[match_id]
 </Infobox>
 ## PhraseMatcher.pipe {#pipe tag="method"}
 Match a stream of documents, yielding them in turn.
 > #### Example
 >
 > ```python
 >   from spacy.matcher import PhraseMatcher
 >   matcher = PhraseMatcher(nlp.vocab)
 >   for doc in matcher.pipe(docs, batch_size=50):
 >       pass
 > ```
 | Name                                          | Description                                                                                                                                                                                                                         |
 | --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `docs`                                        | A stream of documents. ~~Iterable[Doc]~~                                                                                                                                                                                            |
 | `batch_size`                                  | The number of documents to accumulate into a working set. ~~int~~                                                                                                                                                                   |
 | `return_matches` <Tag variant="new">2.1</Tag> | Yield the match lists along with the docs, making results `(doc, matches)` tuples. ~~bool~~                                                                                                                                         |
 | `as_tuples`                                   | Interpret the input stream as `(doc, context)` tuples, and yield `(result, context)` tuples out. If both `return_matches` and `as_tuples` are `True`, the output will be a sequence of `((doc, matches), context)` tuples. ~~bool~~ |
 | **YIELDS**                                    | Documents and optional matches or context in order. ~~Union[Doc, Tuple[Doc, Any], Tuple[Tuple[Doc, Any], Any]]~~                                                                                                                    |
 ## PhraseMatcher.\_\_len\_\_ {#len tag="method"}
 Get the number of rules added to the matcher. Note that this only returns the
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@ -856,15 +856,6 @@ for token in doc:
    print(token.text, token._.is_hashtag)
 ```
 To process a stream of social media posts, we can use
 [`Language.pipe`](/api/language#pipe), which will return a stream of `Doc`
 objects that we can pass to [`Matcher.pipe`](/api/matcher#pipe).
 ```python
 docs = nlp.pipe(LOTS_OF_TWEETS)
 matches = matcher.pipe(docs)
 ```
 ## Efficient phrase matching {#phrasematcher}
 If you need to match large terminology lists, you can also use the
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@ -389,6 +389,7 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
 | `GoldParse`                                              | [`Example`](/api/example)                                                                                    |
 | `GoldCorpus`                                             | [`Corpus`](/api/corpus)                                                                                      |
 | `KnowledgeBase.load_bulk`, `KnowledgeBase.dump`          | [`KnowledgeBase.from_disk`](/api/kb#from_disk), [`KnowledgeBase.to_disk`](/api/kb#to_disk)                   |
 | `Matcher.pipe`, `PhraseMatcher.pipe`                     | not needed                                                                                                   |
 | `spacy init-model`                                       | [`spacy init model`](/api/cli#init-model)                                                                    |
 | `spacy debug-data`                                       | [`spacy debug data`](/api/cli#debug-data)                                                                    |
 | `spacy profile`                                          | [`spacy debug profile`](/api/cli#debug-profile)                                                              |