mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Merge branch 'master' of ssh://github.com/explosion/spaCy
This commit is contained in:
commit
bd6e24fe0e
|
@ -4,7 +4,7 @@
|
|||
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
|
||||
|
||||
__title__ = 'spacy'
|
||||
__version__ = '1.1.2'
|
||||
__version__ = '1.1.3'
|
||||
__summary__ = 'Industrial-strength NLP'
|
||||
__uri__ = 'https://spacy.io'
|
||||
__author__ = 'Matthew Honnibal'
|
||||
|
|
|
@ -35,7 +35,7 @@ cdef class Morphology:
|
|||
|
||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||
|
||||
cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1
|
||||
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
||||
|
||||
cdef int assign_feature(self, uint64_t* morph, feature, value) except -1
|
||||
|
||||
|
|
|
@ -39,9 +39,9 @@ cdef class Morphology:
|
|||
tag_id = self.reverse_index[self.strings[tag]]
|
||||
else:
|
||||
tag_id = self.reverse_index[tag]
|
||||
self._assign_tag_id(token, tag_id)
|
||||
self.assign_tag_id(token, tag_id)
|
||||
|
||||
cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1:
|
||||
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
|
||||
if tag_id >= self.n_tags:
|
||||
raise ValueError("Unknown tag ID: %s" % tag_id)
|
||||
# TODO: It's pretty arbitrary to put this logic here. I guess the justification
|
||||
|
|
|
@ -196,7 +196,7 @@ cdef class Tagger:
|
|||
self.model.set_scoresC(eg.c.scores,
|
||||
eg.c.features, eg.c.nr_feat)
|
||||
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
|
||||
self.vocab.morphology.assign_tag(&tokens.c[i], guess)
|
||||
self.vocab.morphology.assign_tag_id(&tokens.c[i], guess)
|
||||
eg.fill_scores(0, eg.c.nr_class)
|
||||
tokens.is_tagged = True
|
||||
tokens._py_tokens = [None] * tokens.length
|
||||
|
|
|
@ -107,10 +107,11 @@ cdef class Tokenizer:
|
|||
return (self.__class__, args, None, None)
|
||||
|
||||
cpdef Doc tokens_from_list(self, list strings):
|
||||
raise NotImplementedError(
|
||||
"Method deprecated in 1.0.\n"
|
||||
"Old: tokenizer.tokens_from_list(strings)\n"
|
||||
"New: Doc(tokenizer.vocab, words=strings)")
|
||||
return Doc(self.vocab, words=strings)
|
||||
#raise NotImplementedError(
|
||||
# "Method deprecated in 1.0.\n"
|
||||
# "Old: tokenizer.tokens_from_list(strings)\n"
|
||||
# "New: Doc(tokenizer.vocab, words=strings)")
|
||||
|
||||
@cython.boundscheck(False)
|
||||
def __call__(self, unicode string):
|
||||
|
|
|
@ -577,8 +577,7 @@ cdef class Doc:
|
|||
elif attr_id == TAG:
|
||||
for i in range(length):
|
||||
if values[i] != 0:
|
||||
self.vocab.morphology.assign_tag(&tokens[i],
|
||||
self.vocab.morphology.reverse_index[values[i]])
|
||||
self.vocab.morphology.assign_tag(&tokens[i], values[i])
|
||||
elif attr_id == POS:
|
||||
for i in range(length):
|
||||
tokens[i].pos = <univ_pos_t>values[i]
|
||||
|
|
|
@ -43,7 +43,7 @@ p
|
|||
| #[code token.dep_].
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.symbols import DET
|
||||
from spacy.symbols import det
|
||||
the, dog = nlp(u'the dog')
|
||||
assert the.dep == det
|
||||
assert the.dep_ == 'det'
|
||||
|
@ -96,14 +96,14 @@ p
|
|||
print([w.text for w in apples.rights])
|
||||
# ['on']
|
||||
assert apples.n_lefts == 2
|
||||
assert apples.n_rights == 3
|
||||
assert apples.n_rights == 1
|
||||
|
||||
from spacy.symbols import nsubj
|
||||
doc = nlp(u'Credit and mortgage account holders must submit their requests within 30 days.')
|
||||
root = [w for w in doc if w.head is w][0]
|
||||
subject = list(root.lefts)[0]
|
||||
for descendant in subject.subtree:
|
||||
assert subject.is_ancestor(descendant)
|
||||
assert subject.is_ancestor_of(descendant)
|
||||
|
||||
from spacy.symbols import nsubj
|
||||
doc = nlp(u'Credit and mortgage account holders must submit their requests.')
|
||||
|
@ -131,7 +131,7 @@ p
|
|||
|
||||
p
|
||||
| Finally, I often find the #[code .left_edge] and #[code right_edge]
|
||||
| attributes especially useful. They give you the first and right tokens
|
||||
| attributes especially useful. They give you the first and last token
|
||||
| of the subtree. This is the easiest way to create a #[code Span] object
|
||||
| for a syntactic phrase — a useful operation.
|
||||
|
||||
|
@ -160,4 +160,4 @@ p
|
|||
+code.
|
||||
nlp = spacy.load('en')
|
||||
doc1 = nlp(u'Text I do want parsed.')
|
||||
doc2 = nlp(u'Text I don't want parsed', parser=False)
|
||||
doc2 = nlp(u"Text I don't want parsed", parse=False)
|
||||
|
|
Loading…
Reference in New Issue
Block a user