mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-29 18:23:06 +03:00
* Work around get_lex_attr bug introduced during German parsing
This commit is contained in:
parent
bc3c8d8adf
commit
35214053fd
|
@ -14,6 +14,7 @@ from spacy.syntax.parser import Parser
|
||||||
from spacy.syntax.arc_eager import ArcEager
|
from spacy.syntax.arc_eager import ArcEager
|
||||||
from spacy.syntax.parser import get_templates
|
from spacy.syntax.parser import get_templates
|
||||||
from spacy.scorer import Scorer
|
from spacy.scorer import Scorer
|
||||||
|
import spacy.attrs
|
||||||
|
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
|
|
||||||
|
@ -47,6 +48,7 @@ class TreebankParser(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dir(cls, tag_map, model_dir):
|
def from_dir(cls, tag_map, model_dir):
|
||||||
vocab = Vocab(tag_map=tag_map, get_lex_attr=Language.default_lex_attrs())
|
vocab = Vocab(tag_map=tag_map, get_lex_attr=Language.default_lex_attrs())
|
||||||
|
vocab.get_lex_attr[spacy.attrs.LANG] = lambda _: 0
|
||||||
tokenizer = Tokenizer(vocab, {}, None, None, None)
|
tokenizer = Tokenizer(vocab, {}, None, None, None)
|
||||||
tagger = Tagger.blank(vocab, TAGGER_TEMPLATES)
|
tagger = Tagger.blank(vocab, TAGGER_TEMPLATES)
|
||||||
|
|
||||||
|
@ -99,7 +101,7 @@ def read_conllx(loc):
|
||||||
for sent in text.strip().split('\n\n'):
|
for sent in text.strip().split('\n\n'):
|
||||||
lines = sent.strip().split('\n')
|
lines = sent.strip().split('\n')
|
||||||
if lines:
|
if lines:
|
||||||
if lines[0].startswith('#'):
|
while lines[0].startswith('#'):
|
||||||
lines.pop(0)
|
lines.pop(0)
|
||||||
tokens = []
|
tokens = []
|
||||||
for line in lines:
|
for line in lines:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user