mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-05 14:10:34 +03:00
Fix lightning tour example
This commit is contained in:
parent
88ca82bfa6
commit
ab83dd5d25
|
@ -148,24 +148,20 @@ p
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
def put_spans_around_tokens(doc, get_classes):
|
def put_spans_around_tokens(doc, get_classes):
|
||||||
'''Given some function to compute class names, put each token in a
|
"""Given some function to compute class names, put each token in a
|
||||||
span element, with the appropriate classes computed.
|
span element, with the appropriate classes computed. All whitespace is
|
||||||
|
preserved, outside of the spans. (Of course, HTML won't display more than
|
||||||
All whitespace is preserved, outside of the spans. (Yes, I know HTML
|
one whitespace character it – but the point is, no information is lost
|
||||||
won't display it. But the point is no information is lost, so you can
|
and you can calculate what you need, e.g. <br />, <p> etc.)
|
||||||
calculate what you need, e.g. <br /> tags, <p> tags, etc.)
|
"""
|
||||||
'''
|
|
||||||
output = []
|
output = []
|
||||||
template = '<span classes="{classes}">{word}</span>{space}'
|
html = '<span class="{classes}">{word}</span>{space}'
|
||||||
for token in doc:
|
for token in doc:
|
||||||
if token.is_space:
|
if token.is_space:
|
||||||
output.append(token.orth_)
|
output.append(token.text)
|
||||||
else:
|
else:
|
||||||
output.append(
|
classes = ' '.join(get_classes(token))
|
||||||
template.format(
|
output.append(html.format(classes=classes, word=token.text, space=token.whitespace_))
|
||||||
classes=' '.join(get_classes(token)),
|
|
||||||
word=token.orth_,
|
|
||||||
space=token.whitespace_))
|
|
||||||
string = ''.join(output)
|
string = ''.join(output)
|
||||||
string = string.replace('\n', '')
|
string = string.replace('\n', '')
|
||||||
string = string.replace('\t', ' ')
|
string = string.replace('\t', ' ')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user