mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix formatting
This commit is contained in:
parent
2e4db1beb9
commit
293ee359c5
|
@ -638,7 +638,7 @@ cdef class Doc:
|
||||||
orth_ = text[start:end]
|
orth_ = text[start:end]
|
||||||
lex = self.vocab.get(self.mem, orth_)
|
lex = self.vocab.get(self.mem, orth_)
|
||||||
self.push_back(lex, has_space)
|
self.push_back(lex, has_space)
|
||||||
|
|
||||||
start = end + has_space
|
start = end + has_space
|
||||||
self.from_array([TAG,LEMMA,HEAD,DEP,ENT_IOB,ENT_TYPE],
|
self.from_array([TAG,LEMMA,HEAD,DEP,ENT_IOB,ENT_TYPE],
|
||||||
attrs[:, 2:])
|
attrs[:, 2:])
|
||||||
|
|
|
@ -1,13 +1,20 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
def merge_ents(doc):
|
def merge_ents(doc):
|
||||||
'''Helper: merge adjacent entities into single tokens; modifies the doc.'''
|
"""
|
||||||
|
Helper: merge adjacent entities into single tokens; modifies the doc.
|
||||||
|
"""
|
||||||
for ent in doc.ents:
|
for ent in doc.ents:
|
||||||
ent.merge(ent.root.tag_, ent.text, ent.label_)
|
ent.merge(ent.root.tag_, ent.text, ent.label_)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
def format_POS(token, light, flat):
|
def format_POS(token, light, flat):
|
||||||
'''helper: form the POS output for a token'''
|
"""
|
||||||
|
Helper: form the POS output for a token.
|
||||||
|
"""
|
||||||
subtree = dict([
|
subtree = dict([
|
||||||
("word", token.text),
|
("word", token.text),
|
||||||
("lemma", token.lemma_), # trigger
|
("lemma", token.lemma_), # trigger
|
||||||
|
@ -26,16 +33,21 @@ def format_POS(token, light, flat):
|
||||||
return subtree
|
return subtree
|
||||||
|
|
||||||
def POS_tree(root, light, flat):
|
def POS_tree(root, light, flat):
|
||||||
'''Helper: generate a POS tree for a root token.
|
|
||||||
The doc must have merge_ents(doc) ran on it.
|
"""
|
||||||
'''
|
Helper: generate a POS tree for a root token. The doc must have
|
||||||
|
merge_ents(doc) ran on it.
|
||||||
|
"""
|
||||||
subtree = format_POS(root, light=light, flat=flat)
|
subtree = format_POS(root, light=light, flat=flat)
|
||||||
for c in root.children:
|
for c in root.children:
|
||||||
subtree["modifiers"].append(POS_tree(c))
|
subtree["modifiers"].append(POS_tree(c))
|
||||||
return subtree
|
return subtree
|
||||||
|
|
||||||
|
|
||||||
def parse_tree(doc, light=False, flat=False):
|
def parse_tree(doc, light=False, flat=False):
|
||||||
"""Makes a copy of the doc, then construct a syntactic parse tree, similar to the one used in displaCy. Generates the POS tree for all sentences in a doc
|
"""
|
||||||
|
Makes a copy of the doc, then construct a syntactic parse tree, similar to
|
||||||
|
the one used in displaCy. Generates the POS tree for all sentences in a doc.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
doc: The doc for parsing.
|
doc: The doc for parsing.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user