mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 02:04:07 +03:00
Improve efficiency of deprojectivization
This commit is contained in:
parent
8fec7268eb
commit
db5c714ad2
|
@ -1,4 +1,6 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
# cython: profile=True
|
||||||
|
# cython: infer_types=True
|
||||||
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
|
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
|
||||||
for doing pseudo-projective parsing implementation uses the HEAD decoration
|
for doing pseudo-projective parsing implementation uses the HEAD decoration
|
||||||
scheme.
|
scheme.
|
||||||
|
@ -7,6 +9,8 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from copy import copy
|
from copy import copy
|
||||||
|
|
||||||
|
from ..tokens.doc cimport Doc
|
||||||
|
|
||||||
|
|
||||||
DELIMITER = '||'
|
DELIMITER = '||'
|
||||||
|
|
||||||
|
@ -111,17 +115,18 @@ def projectivize(heads, labels):
|
||||||
return proj_heads, deco_labels
|
return proj_heads, deco_labels
|
||||||
|
|
||||||
|
|
||||||
def deprojectivize(tokens):
|
cpdef deprojectivize(Doc doc):
|
||||||
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
||||||
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
||||||
# hitting a Y then make this the new head.
|
# hitting a Y then make this the new head.
|
||||||
for token in tokens:
|
for i in range(doc.length):
|
||||||
if is_decorated(token.dep_):
|
label = doc.vocab.strings[doc.c[i].dep]
|
||||||
newlabel, headlabel = decompose(token.dep_)
|
if DELIMITER in label:
|
||||||
newhead = _find_new_head(token, headlabel)
|
new_label, head_label = label.split(DELIMITER)
|
||||||
token.head = newhead
|
new_head = _find_new_head(doc[i], head_label)
|
||||||
token.dep_ = newlabel
|
doc[i].head = new_head
|
||||||
return tokens
|
doc.c[i].dep = new_label
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
def _decorate(heads, proj_heads, labels):
|
def _decorate(heads, proj_heads, labels):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user