Merge branch 'develop' of https://github.com/honnibal/spaCy into develop

This commit is contained in:
Matthew Honnibal 2015-09-09 14:33:38 +02:00
commit 31ccf494e6
4 changed files with 14 additions and 9 deletions

View File

@ -1,4 +1,5 @@
from os import path from os import path
from warnings import warn
try: try:
import ujson as json import ujson as json
@ -184,7 +185,10 @@ class Language(object):
return None return None
def __init__(self, data_dir=None, vocab=None, tokenizer=None, tagger=None, def __init__(self, data_dir=None, vocab=None, tokenizer=None, tagger=None,
parser=None, entity=None, matcher=None, serializer=None): parser=None, entity=None, matcher=None, serializer=None,
load_vectors=True):
if load_vectors is not True:
warn("load_vectors is deprecated", DeprecationWarning)
if data_dir is None: if data_dir is None:
data_dir = self.default_data_dir() data_dir = self.default_data_dir()
if vocab is None: if vocab is None:

View File

@ -60,19 +60,19 @@ cdef class Lexeme:
def __set__(self, int x): self.c.suffix = x def __set__(self, int x): self.c.suffix = x
property cluster: property cluster:
def __get__(self): return self.c.suffix def __get__(self): return self.c.cluster
def __set__(self, int x): self.c.suffix = x def __set__(self, int x): self.c.cluster = x
property prob: property prob:
def __get__(self): return self.c.suffix def __get__(self): return self.c.prob
def __set__(self, int x): self.c.suffix = x def __set__(self, float x): self.c.prob = x
property lower_: property lower_:
def __get__(self): return self.vocab.strings[self.c.lower] def __get__(self): return self.vocab.strings[self.c.lower]
def __set__(self, unicode x): self.c.lower = self.vocab.strings[x] def __set__(self, unicode x): self.c.lower = self.vocab.strings[x]
property norm_: property norm_:
def __get__(self): return self.c.norm def __get__(self): return self.vocab.strings[self.c.norm]
def __set__(self, unicode x): self.c.norm = self.vocab.strings[x] def __set__(self, unicode x): self.c.norm = self.vocab.strings[x]
property shape_: property shape_:
@ -80,11 +80,11 @@ cdef class Lexeme:
def __set__(self, unicode x): self.c.shape = self.vocab.strings[x] def __set__(self, unicode x): self.c.shape = self.vocab.strings[x]
property prefix_: property prefix_:
def __get__(self): return self.c.prefix def __get__(self): return self.vocab.strings[self.c.prefix]
def __set__(self, unicode x): self.c.prefix = self.vocab.strings[x] def __set__(self, unicode x): self.c.prefix = self.vocab.strings[x]
property suffix_: property suffix_:
def __get__(self): return self.c.suffix def __get__(self): return self.vocab.strings[self.c.suffix]
def __set__(self, unicode x): self.c.suffix = self.vocab.strings[x] def __set__(self, unicode x): self.c.suffix = self.vocab.strings[x]
property flags: property flags:

View File

@ -24,6 +24,7 @@ cdef class Morphology:
self.rich_tags[i].id = i self.rich_tags[i].id = i
self.rich_tags[i].name = self.strings[tag_str] self.rich_tags[i].name = self.strings[tag_str]
self.rich_tags[i].morph = 0 self.rich_tags[i].morph = 0
self.rich_tags[i].pos = UNIV_POS_NAMES[props['pos'].upper()]
self.reverse_index[self.rich_tags[i].name] = i self.reverse_index[self.rich_tags[i].name] = i
self._cache = PreshMapArray(self.n_tags) self._cache = PreshMapArray(self.n_tags)

View File

@ -188,7 +188,7 @@ cdef class Doc:
def noun_chunks(self): def noun_chunks(self):
"""Yield spans for base noun phrases.""" """Yield spans for base noun phrases."""
cdef const TokenC* word cdef const TokenC* word
labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr'] labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr', 'conj']
np_deps = [self.vocab.strings[label] for label in labels] np_deps = [self.vocab.strings[label] for label in labels]
np_label = self.vocab.strings['NP'] np_label = self.vocab.strings['NP']
for i in range(self.length): for i in range(self.length):