Merge branch 'master' into spacy.io

This commit is contained in:
Ines Montani 2019-06-01 11:35:12 +02:00
commit 2b8bfd6cc7

View File

@ -405,7 +405,7 @@ class BadHTMLMerger(object):
for match_id, start, end in matches: for match_id, start, end in matches:
spans.append(doc[start:end]) spans.append(doc[start:end])
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
for span in hashtags: for span in spans:
retokenizer.merge(span) retokenizer.merge(span)
for token in span: for token in span:
token._.bad_html = True # Mark token as bad HTML token._.bad_html = True # Mark token as bad HTML
@ -679,7 +679,7 @@ for match_id, start, end in matches:
if doc.vocab.strings[match_id] == "HASHTAG": if doc.vocab.strings[match_id] == "HASHTAG":
hashtags.append(doc[start:end]) hashtags.append(doc[start:end])
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
for span in spans: for span in hashtags:
retokenizer.merge(span) retokenizer.merge(span)
for token in span: for token in span:
token._.is_hashtag = True token._.is_hashtag = True