Fix HTML merger examples (see #2390)

This commit is contained in:
ines 2018-05-30 12:22:32 +02:00
parent 9732988951
commit 605c663a4c

View File

@ -349,9 +349,10 @@ p
for match_id, start, end in matches: for match_id, start, end in matches:
spans.append(doc[start:end]) spans.append(doc[start:end])
for span in spans: for span in spans:
span.merge(is_stop=True) # merge (and mark it as a stop word) span.merge() # merge
for token in span: for token in span:
token._.bad_html = True # mark token as bad HTML token._.bad_html = True # mark token as bad HTML
doc.vocab[token.text].is_stop = True # mark lexeme as stop word
return doc return doc
nlp = spacy.load('en_core_web_sm') nlp = spacy.load('en_core_web_sm')