mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Rename argument: doc_or_span/obj -> doclike (#5463)
* doc_or_span -> obj
* Revert "doc_or_span -> obj"
This reverts commit 78bb9ff5e0.
* obj -> doclike
* Refer to correct object
			
			
This commit is contained in:
		
							parent
							
								
									d8f3190c0a
								
							
						
					
					
						commit
						a9cb2882cb
					
				| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
					    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -28,7 +28,7 @@ def noun_chunks(obj):
 | 
				
			||||||
        "og",
 | 
					        "og",
 | 
				
			||||||
        "app",
 | 
					        "app",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -38,7 +38,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    close_app = doc.vocab.strings.add("nk")
 | 
					    close_app = doc.vocab.strings.add("nk")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    rbracket = 0
 | 
					    rbracket = 0
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if i < rbracket:
 | 
					        if i < rbracket:
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
 | 
					        if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases. Works on both Doc and Span.
 | 
					    Detect base noun phrases. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -14,7 +14,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    # obj tag corrects some DEP tagger mistakes.
 | 
					    # obj tag corrects some DEP tagger mistakes.
 | 
				
			||||||
    # Further improvement of the models will eliminate the need for this tag.
 | 
					    # Further improvement of the models will eliminate the need for this tag.
 | 
				
			||||||
    labels = ["nsubj", "obj", "iobj", "appos", "ROOT", "obl"]
 | 
					    labels = ["nsubj", "obj", "iobj", "appos", "ROOT", "obl"]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -24,7 +24,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    nmod = doc.vocab.strings.add("nmod")
 | 
					    nmod = doc.vocab.strings.add("nmod")
 | 
				
			||||||
    np_label = doc.vocab.strings.add("NP")
 | 
					    np_label = doc.vocab.strings.add("NP")
 | 
				
			||||||
    seen = set()
 | 
					    seen = set()
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if word.pos not in (NOUN, PROPN, PRON):
 | 
					        if word.pos not in (NOUN, PROPN, PRON):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        # Prevent nested chunks from being produced
 | 
					        # Prevent nested chunks from being produced
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
					    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -20,7 +20,7 @@ def noun_chunks(obj):
 | 
				
			||||||
        "attr",
 | 
					        "attr",
 | 
				
			||||||
        "ROOT",
 | 
					        "ROOT",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -29,7 +29,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    conj = doc.vocab.strings.add("conj")
 | 
					    conj = doc.vocab.strings.add("conj")
 | 
				
			||||||
    np_label = doc.vocab.strings.add("NP")
 | 
					    np_label = doc.vocab.strings.add("NP")
 | 
				
			||||||
    seen = set()
 | 
					    seen = set()
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if word.pos not in (NOUN, PROPN, PRON):
 | 
					        if word.pos not in (NOUN, PROPN, PRON):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        # Prevent nested chunks from being produced
 | 
					        # Prevent nested chunks from being produced
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,8 +5,8 @@ from ...symbols import NOUN, PROPN, PRON, VERB, AUX
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    doc = obj.doc
 | 
					    doc = doclike.doc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -21,7 +21,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    np_right_deps = [doc.vocab.strings.add(label) for label in right_labels]
 | 
					    np_right_deps = [doc.vocab.strings.add(label) for label in right_labels]
 | 
				
			||||||
    stop_deps = [doc.vocab.strings.add(label) for label in stop_labels]
 | 
					    stop_deps = [doc.vocab.strings.add(label) for label in stop_labels]
 | 
				
			||||||
    token = doc[0]
 | 
					    token = doc[0]
 | 
				
			||||||
    while token and token.i < len(doc):
 | 
					    while token and token.i < len(doclike):
 | 
				
			||||||
        if token.pos in [PROPN, NOUN, PRON]:
 | 
					        if token.pos in [PROPN, NOUN, PRON]:
 | 
				
			||||||
            left, right = noun_bounds(
 | 
					            left, right = noun_bounds(
 | 
				
			||||||
                doc, token, np_left_deps, np_right_deps, stop_deps
 | 
					                doc, token, np_left_deps, np_right_deps, stop_deps
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
					    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -20,7 +20,7 @@ def noun_chunks(obj):
 | 
				
			||||||
        "attr",
 | 
					        "attr",
 | 
				
			||||||
        "ROOT",
 | 
					        "ROOT",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -29,7 +29,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    conj = doc.vocab.strings.add("conj")
 | 
					    conj = doc.vocab.strings.add("conj")
 | 
				
			||||||
    np_label = doc.vocab.strings.add("NP")
 | 
					    np_label = doc.vocab.strings.add("NP")
 | 
				
			||||||
    seen = set()
 | 
					    seen = set()
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if word.pos not in (NOUN, PROPN, PRON):
 | 
					        if word.pos not in (NOUN, PROPN, PRON):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        # Prevent nested chunks from being produced
 | 
					        # Prevent nested chunks from being produced
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
					    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -19,7 +19,7 @@ def noun_chunks(obj):
 | 
				
			||||||
        "nmod",
 | 
					        "nmod",
 | 
				
			||||||
        "nmod:poss",
 | 
					        "nmod:poss",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -28,7 +28,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    conj = doc.vocab.strings.add("conj")
 | 
					    conj = doc.vocab.strings.add("conj")
 | 
				
			||||||
    np_label = doc.vocab.strings.add("NP")
 | 
					    np_label = doc.vocab.strings.add("NP")
 | 
				
			||||||
    seen = set()
 | 
					    seen = set()
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if word.pos not in (NOUN, PROPN, PRON):
 | 
					        if word.pos not in (NOUN, PROPN, PRON):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        # Prevent nested chunks from being produced
 | 
					        # Prevent nested chunks from being produced
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
					    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -19,7 +19,7 @@ def noun_chunks(obj):
 | 
				
			||||||
        "nmod",
 | 
					        "nmod",
 | 
				
			||||||
        "nmod:poss",
 | 
					        "nmod:poss",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -28,7 +28,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    conj = doc.vocab.strings.add("conj")
 | 
					    conj = doc.vocab.strings.add("conj")
 | 
				
			||||||
    np_label = doc.vocab.strings.add("NP")
 | 
					    np_label = doc.vocab.strings.add("NP")
 | 
				
			||||||
    seen = set()
 | 
					    seen = set()
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if word.pos not in (NOUN, PROPN, PRON):
 | 
					        if word.pos not in (NOUN, PROPN, PRON):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        # Prevent nested chunks from being produced
 | 
					        # Prevent nested chunks from being produced
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
					    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -19,7 +19,7 @@ def noun_chunks(obj):
 | 
				
			||||||
        "nmod",
 | 
					        "nmod",
 | 
				
			||||||
        "nmod:poss",
 | 
					        "nmod:poss",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -28,7 +28,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    conj = doc.vocab.strings.add("conj")
 | 
					    conj = doc.vocab.strings.add("conj")
 | 
				
			||||||
    np_label = doc.vocab.strings.add("NP")
 | 
					    np_label = doc.vocab.strings.add("NP")
 | 
				
			||||||
    seen = set()
 | 
					    seen = set()
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if word.pos not in (NOUN, PROPN, PRON):
 | 
					        if word.pos not in (NOUN, PROPN, PRON):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        # Prevent nested chunks from being produced
 | 
					        # Prevent nested chunks from being produced
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON
 | 
				
			||||||
from ...errors import Errors
 | 
					from ...errors import Errors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def noun_chunks(obj):
 | 
					def noun_chunks(doclike):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
					    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -20,7 +20,7 @@ def noun_chunks(obj):
 | 
				
			||||||
        "nmod",
 | 
					        "nmod",
 | 
				
			||||||
        "nmod:poss",
 | 
					        "nmod:poss",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    doc = obj.doc  # Ensure works on both Doc and Span.
 | 
					    doc = doclike.doc  # Ensure works on both Doc and Span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not doc.is_parsed:
 | 
					    if not doc.is_parsed:
 | 
				
			||||||
        raise ValueError(Errors.E029)
 | 
					        raise ValueError(Errors.E029)
 | 
				
			||||||
| 
						 | 
					@ -29,7 +29,7 @@ def noun_chunks(obj):
 | 
				
			||||||
    conj = doc.vocab.strings.add("conj")
 | 
					    conj = doc.vocab.strings.add("conj")
 | 
				
			||||||
    np_label = doc.vocab.strings.add("NP")
 | 
					    np_label = doc.vocab.strings.add("NP")
 | 
				
			||||||
    seen = set()
 | 
					    seen = set()
 | 
				
			||||||
    for i, word in enumerate(obj):
 | 
					    for i, word in enumerate(doclike):
 | 
				
			||||||
        if word.pos not in (NOUN, PROPN, PRON):
 | 
					        if word.pos not in (NOUN, PROPN, PRON):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        # Prevent nested chunks from being produced
 | 
					        # Prevent nested chunks from being produced
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -213,28 +213,28 @@ cdef class Matcher:
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    yield doc
 | 
					                    yield doc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __call__(self, object doc_or_span):
 | 
					    def __call__(self, object doclike):
 | 
				
			||||||
        """Find all token sequences matching the supplied pattern.
 | 
					        """Find all token sequences matching the supplied pattern.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc_or_span (Doc or Span): The document to match over.
 | 
					        doclike (Doc or Span): The document to match over.
 | 
				
			||||||
        RETURNS (list): A list of `(key, start, end)` tuples,
 | 
					        RETURNS (list): A list of `(key, start, end)` tuples,
 | 
				
			||||||
            describing the matches. A match tuple describes a span
 | 
					            describing the matches. A match tuple describes a span
 | 
				
			||||||
            `doc[start:end]`. The `label_id` and `key` are both integers.
 | 
					            `doc[start:end]`. The `label_id` and `key` are both integers.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        if isinstance(doc_or_span, Doc):
 | 
					        if isinstance(doclike, Doc):
 | 
				
			||||||
            doc = doc_or_span
 | 
					            doc = doclike
 | 
				
			||||||
            length = len(doc)
 | 
					            length = len(doc)
 | 
				
			||||||
        elif isinstance(doc_or_span, Span):
 | 
					        elif isinstance(doclike, Span):
 | 
				
			||||||
            doc = doc_or_span.doc
 | 
					            doc = doclike.doc
 | 
				
			||||||
            length = doc_or_span.end - doc_or_span.start
 | 
					            length = doclike.end - doclike.start
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            raise ValueError(Errors.E195.format(good="Doc or Span", got=type(doc_or_span).__name__))
 | 
					            raise ValueError(Errors.E195.format(good="Doc or Span", got=type(doclike).__name__))
 | 
				
			||||||
        if len(set([LEMMA, POS, TAG]) & self._seen_attrs) > 0 \
 | 
					        if len(set([LEMMA, POS, TAG]) & self._seen_attrs) > 0 \
 | 
				
			||||||
          and not doc.is_tagged:
 | 
					          and not doc.is_tagged:
 | 
				
			||||||
            raise ValueError(Errors.E155.format())
 | 
					            raise ValueError(Errors.E155.format())
 | 
				
			||||||
        if DEP in self._seen_attrs and not doc.is_parsed:
 | 
					        if DEP in self._seen_attrs and not doc.is_parsed:
 | 
				
			||||||
            raise ValueError(Errors.E156.format())
 | 
					            raise ValueError(Errors.E156.format())
 | 
				
			||||||
        matches = find_matches(&self.patterns[0], self.patterns.size(), doc_or_span, length, 
 | 
					        matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length,
 | 
				
			||||||
                                extensions=self._extensions, predicates=self._extra_predicates)
 | 
					                                extensions=self._extensions, predicates=self._extra_predicates)
 | 
				
			||||||
        for i, (key, start, end) in enumerate(matches):
 | 
					        for i, (key, start, end) in enumerate(matches):
 | 
				
			||||||
            on_match = self._callbacks.get(key, None)
 | 
					            on_match = self._callbacks.get(key, None)
 | 
				
			||||||
| 
						 | 
					@ -257,7 +257,7 @@ def unpickle_matcher(vocab, patterns, callbacks):
 | 
				
			||||||
    return matcher
 | 
					    return matcher
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef find_matches(TokenPatternC** patterns, int n, object doc_or_span, int length, extensions=None, predicates=tuple()):
 | 
					cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple()):
 | 
				
			||||||
    """Find matches in a doc, with a compiled array of patterns. Matches are
 | 
					    """Find matches in a doc, with a compiled array of patterns. Matches are
 | 
				
			||||||
    returned as a list of (id, start, end) tuples.
 | 
					    returned as a list of (id, start, end) tuples.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -286,7 +286,7 @@ cdef find_matches(TokenPatternC** patterns, int n, object doc_or_span, int lengt
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        nr_extra_attr = 0
 | 
					        nr_extra_attr = 0
 | 
				
			||||||
        extra_attr_values = <attr_t*>mem.alloc(length, sizeof(attr_t))
 | 
					        extra_attr_values = <attr_t*>mem.alloc(length, sizeof(attr_t))
 | 
				
			||||||
    for i, token in enumerate(doc_or_span):
 | 
					    for i, token in enumerate(doclike):
 | 
				
			||||||
        for name, index in extensions.items():
 | 
					        for name, index in extensions.items():
 | 
				
			||||||
            value = token._.get(name)
 | 
					            value = token._.get(name)
 | 
				
			||||||
            if isinstance(value, basestring):
 | 
					            if isinstance(value, basestring):
 | 
				
			||||||
| 
						 | 
					@ -298,7 +298,7 @@ cdef find_matches(TokenPatternC** patterns, int n, object doc_or_span, int lengt
 | 
				
			||||||
        for j in range(n):
 | 
					        for j in range(n):
 | 
				
			||||||
            states.push_back(PatternStateC(patterns[j], i, 0))
 | 
					            states.push_back(PatternStateC(patterns[j], i, 0))
 | 
				
			||||||
        transition_states(states, matches, predicate_cache,
 | 
					        transition_states(states, matches, predicate_cache,
 | 
				
			||||||
            doc_or_span[i], extra_attr_values, predicates)
 | 
					            doclike[i], extra_attr_values, predicates)
 | 
				
			||||||
        extra_attr_values += nr_extra_attr
 | 
					        extra_attr_values += nr_extra_attr
 | 
				
			||||||
        predicate_cache += len(predicates)
 | 
					        predicate_cache += len(predicates)
 | 
				
			||||||
    # Handle matches that end in 0-width patterns
 | 
					    # Handle matches that end in 0-width patterns
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user