mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-03 19:08:06 +03:00
Fix HTML merger examples (see #2390)
This commit is contained in:
parent
9732988951
commit
605c663a4c
|
@ -349,9 +349,10 @@ p
|
||||||
for match_id, start, end in matches:
|
for match_id, start, end in matches:
|
||||||
spans.append(doc[start:end])
|
spans.append(doc[start:end])
|
||||||
for span in spans:
|
for span in spans:
|
||||||
span.merge(is_stop=True) # merge (and mark it as a stop word)
|
span.merge() # merge
|
||||||
for token in span:
|
for token in span:
|
||||||
token._.bad_html = True # mark token as bad HTML
|
token._.bad_html = True # mark token as bad HTML
|
||||||
|
doc.vocab[token.text].is_stop = True # mark lexeme as stop word
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
nlp = spacy.load('en_core_web_sm')
|
nlp = spacy.load('en_core_web_sm')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user