mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Merge branch 'develop' of https://github.com/honnibal/spaCy into develop
This commit is contained in:
commit
31ccf494e6
|
@ -1,4 +1,5 @@
|
||||||
from os import path
|
from os import path
|
||||||
|
from warnings import warn
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import ujson as json
|
import ujson as json
|
||||||
|
@ -184,7 +185,10 @@ class Language(object):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __init__(self, data_dir=None, vocab=None, tokenizer=None, tagger=None,
|
def __init__(self, data_dir=None, vocab=None, tokenizer=None, tagger=None,
|
||||||
parser=None, entity=None, matcher=None, serializer=None):
|
parser=None, entity=None, matcher=None, serializer=None,
|
||||||
|
load_vectors=True):
|
||||||
|
if load_vectors is not True:
|
||||||
|
warn("load_vectors is deprecated", DeprecationWarning)
|
||||||
if data_dir is None:
|
if data_dir is None:
|
||||||
data_dir = self.default_data_dir()
|
data_dir = self.default_data_dir()
|
||||||
if vocab is None:
|
if vocab is None:
|
||||||
|
|
|
@ -60,19 +60,19 @@ cdef class Lexeme:
|
||||||
def __set__(self, int x): self.c.suffix = x
|
def __set__(self, int x): self.c.suffix = x
|
||||||
|
|
||||||
property cluster:
|
property cluster:
|
||||||
def __get__(self): return self.c.suffix
|
def __get__(self): return self.c.cluster
|
||||||
def __set__(self, int x): self.c.suffix = x
|
def __set__(self, int x): self.c.cluster = x
|
||||||
|
|
||||||
property prob:
|
property prob:
|
||||||
def __get__(self): return self.c.suffix
|
def __get__(self): return self.c.prob
|
||||||
def __set__(self, int x): self.c.suffix = x
|
def __set__(self, float x): self.c.prob = x
|
||||||
|
|
||||||
property lower_:
|
property lower_:
|
||||||
def __get__(self): return self.vocab.strings[self.c.lower]
|
def __get__(self): return self.vocab.strings[self.c.lower]
|
||||||
def __set__(self, unicode x): self.c.lower = self.vocab.strings[x]
|
def __set__(self, unicode x): self.c.lower = self.vocab.strings[x]
|
||||||
|
|
||||||
property norm_:
|
property norm_:
|
||||||
def __get__(self): return self.c.norm
|
def __get__(self): return self.vocab.strings[self.c.norm]
|
||||||
def __set__(self, unicode x): self.c.norm = self.vocab.strings[x]
|
def __set__(self, unicode x): self.c.norm = self.vocab.strings[x]
|
||||||
|
|
||||||
property shape_:
|
property shape_:
|
||||||
|
@ -80,11 +80,11 @@ cdef class Lexeme:
|
||||||
def __set__(self, unicode x): self.c.shape = self.vocab.strings[x]
|
def __set__(self, unicode x): self.c.shape = self.vocab.strings[x]
|
||||||
|
|
||||||
property prefix_:
|
property prefix_:
|
||||||
def __get__(self): return self.c.prefix
|
def __get__(self): return self.vocab.strings[self.c.prefix]
|
||||||
def __set__(self, unicode x): self.c.prefix = self.vocab.strings[x]
|
def __set__(self, unicode x): self.c.prefix = self.vocab.strings[x]
|
||||||
|
|
||||||
property suffix_:
|
property suffix_:
|
||||||
def __get__(self): return self.c.suffix
|
def __get__(self): return self.vocab.strings[self.c.suffix]
|
||||||
def __set__(self, unicode x): self.c.suffix = self.vocab.strings[x]
|
def __set__(self, unicode x): self.c.suffix = self.vocab.strings[x]
|
||||||
|
|
||||||
property flags:
|
property flags:
|
||||||
|
|
|
@ -24,6 +24,7 @@ cdef class Morphology:
|
||||||
self.rich_tags[i].id = i
|
self.rich_tags[i].id = i
|
||||||
self.rich_tags[i].name = self.strings[tag_str]
|
self.rich_tags[i].name = self.strings[tag_str]
|
||||||
self.rich_tags[i].morph = 0
|
self.rich_tags[i].morph = 0
|
||||||
|
self.rich_tags[i].pos = UNIV_POS_NAMES[props['pos'].upper()]
|
||||||
self.reverse_index[self.rich_tags[i].name] = i
|
self.reverse_index[self.rich_tags[i].name] = i
|
||||||
self._cache = PreshMapArray(self.n_tags)
|
self._cache = PreshMapArray(self.n_tags)
|
||||||
|
|
||||||
|
|
|
@ -188,7 +188,7 @@ cdef class Doc:
|
||||||
def noun_chunks(self):
|
def noun_chunks(self):
|
||||||
"""Yield spans for base noun phrases."""
|
"""Yield spans for base noun phrases."""
|
||||||
cdef const TokenC* word
|
cdef const TokenC* word
|
||||||
labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr']
|
labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr', 'conj']
|
||||||
np_deps = [self.vocab.strings[label] for label in labels]
|
np_deps = [self.vocab.strings[label] for label in labels]
|
||||||
np_label = self.vocab.strings['NP']
|
np_label = self.vocab.strings['NP']
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user