mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-05 20:33:10 +03:00
Merge branch 'master' of ssh://github.com/explosion/spaCy
This commit is contained in:
commit
bd6e24fe0e
|
@ -4,7 +4,7 @@
|
||||||
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
|
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
|
||||||
|
|
||||||
__title__ = 'spacy'
|
__title__ = 'spacy'
|
||||||
__version__ = '1.1.2'
|
__version__ = '1.1.3'
|
||||||
__summary__ = 'Industrial-strength NLP'
|
__summary__ = 'Industrial-strength NLP'
|
||||||
__uri__ = 'https://spacy.io'
|
__uri__ = 'https://spacy.io'
|
||||||
__author__ = 'Matthew Honnibal'
|
__author__ = 'Matthew Honnibal'
|
||||||
|
|
|
@ -35,7 +35,7 @@ cdef class Morphology:
|
||||||
|
|
||||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||||
|
|
||||||
cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1
|
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
||||||
|
|
||||||
cdef int assign_feature(self, uint64_t* morph, feature, value) except -1
|
cdef int assign_feature(self, uint64_t* morph, feature, value) except -1
|
||||||
|
|
||||||
|
|
|
@ -39,9 +39,9 @@ cdef class Morphology:
|
||||||
tag_id = self.reverse_index[self.strings[tag]]
|
tag_id = self.reverse_index[self.strings[tag]]
|
||||||
else:
|
else:
|
||||||
tag_id = self.reverse_index[tag]
|
tag_id = self.reverse_index[tag]
|
||||||
self._assign_tag_id(token, tag_id)
|
self.assign_tag_id(token, tag_id)
|
||||||
|
|
||||||
cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1:
|
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
|
||||||
if tag_id >= self.n_tags:
|
if tag_id >= self.n_tags:
|
||||||
raise ValueError("Unknown tag ID: %s" % tag_id)
|
raise ValueError("Unknown tag ID: %s" % tag_id)
|
||||||
# TODO: It's pretty arbitrary to put this logic here. I guess the justification
|
# TODO: It's pretty arbitrary to put this logic here. I guess the justification
|
||||||
|
|
|
@ -196,7 +196,7 @@ cdef class Tagger:
|
||||||
self.model.set_scoresC(eg.c.scores,
|
self.model.set_scoresC(eg.c.scores,
|
||||||
eg.c.features, eg.c.nr_feat)
|
eg.c.features, eg.c.nr_feat)
|
||||||
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
|
guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
|
||||||
self.vocab.morphology.assign_tag(&tokens.c[i], guess)
|
self.vocab.morphology.assign_tag_id(&tokens.c[i], guess)
|
||||||
eg.fill_scores(0, eg.c.nr_class)
|
eg.fill_scores(0, eg.c.nr_class)
|
||||||
tokens.is_tagged = True
|
tokens.is_tagged = True
|
||||||
tokens._py_tokens = [None] * tokens.length
|
tokens._py_tokens = [None] * tokens.length
|
||||||
|
|
|
@ -107,10 +107,11 @@ cdef class Tokenizer:
|
||||||
return (self.__class__, args, None, None)
|
return (self.__class__, args, None, None)
|
||||||
|
|
||||||
cpdef Doc tokens_from_list(self, list strings):
|
cpdef Doc tokens_from_list(self, list strings):
|
||||||
raise NotImplementedError(
|
return Doc(self.vocab, words=strings)
|
||||||
"Method deprecated in 1.0.\n"
|
#raise NotImplementedError(
|
||||||
"Old: tokenizer.tokens_from_list(strings)\n"
|
# "Method deprecated in 1.0.\n"
|
||||||
"New: Doc(tokenizer.vocab, words=strings)")
|
# "Old: tokenizer.tokens_from_list(strings)\n"
|
||||||
|
# "New: Doc(tokenizer.vocab, words=strings)")
|
||||||
|
|
||||||
@cython.boundscheck(False)
|
@cython.boundscheck(False)
|
||||||
def __call__(self, unicode string):
|
def __call__(self, unicode string):
|
||||||
|
|
|
@ -577,8 +577,7 @@ cdef class Doc:
|
||||||
elif attr_id == TAG:
|
elif attr_id == TAG:
|
||||||
for i in range(length):
|
for i in range(length):
|
||||||
if values[i] != 0:
|
if values[i] != 0:
|
||||||
self.vocab.morphology.assign_tag(&tokens[i],
|
self.vocab.morphology.assign_tag(&tokens[i], values[i])
|
||||||
self.vocab.morphology.reverse_index[values[i]])
|
|
||||||
elif attr_id == POS:
|
elif attr_id == POS:
|
||||||
for i in range(length):
|
for i in range(length):
|
||||||
tokens[i].pos = <univ_pos_t>values[i]
|
tokens[i].pos = <univ_pos_t>values[i]
|
||||||
|
|
|
@ -43,7 +43,7 @@ p
|
||||||
| #[code token.dep_].
|
| #[code token.dep_].
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
from spacy.symbols import DET
|
from spacy.symbols import det
|
||||||
the, dog = nlp(u'the dog')
|
the, dog = nlp(u'the dog')
|
||||||
assert the.dep == det
|
assert the.dep == det
|
||||||
assert the.dep_ == 'det'
|
assert the.dep_ == 'det'
|
||||||
|
@ -96,14 +96,14 @@ p
|
||||||
print([w.text for w in apples.rights])
|
print([w.text for w in apples.rights])
|
||||||
# ['on']
|
# ['on']
|
||||||
assert apples.n_lefts == 2
|
assert apples.n_lefts == 2
|
||||||
assert apples.n_rights == 3
|
assert apples.n_rights == 1
|
||||||
|
|
||||||
from spacy.symbols import nsubj
|
from spacy.symbols import nsubj
|
||||||
doc = nlp(u'Credit and mortgage account holders must submit their requests within 30 days.')
|
doc = nlp(u'Credit and mortgage account holders must submit their requests within 30 days.')
|
||||||
root = [w for w in doc if w.head is w][0]
|
root = [w for w in doc if w.head is w][0]
|
||||||
subject = list(root.lefts)[0]
|
subject = list(root.lefts)[0]
|
||||||
for descendant in subject.subtree:
|
for descendant in subject.subtree:
|
||||||
assert subject.is_ancestor(descendant)
|
assert subject.is_ancestor_of(descendant)
|
||||||
|
|
||||||
from spacy.symbols import nsubj
|
from spacy.symbols import nsubj
|
||||||
doc = nlp(u'Credit and mortgage account holders must submit their requests.')
|
doc = nlp(u'Credit and mortgage account holders must submit their requests.')
|
||||||
|
@ -131,7 +131,7 @@ p
|
||||||
|
|
||||||
p
|
p
|
||||||
| Finally, I often find the #[code .left_edge] and #[code right_edge]
|
| Finally, I often find the #[code .left_edge] and #[code right_edge]
|
||||||
| attributes especially useful. They give you the first and right tokens
|
| attributes especially useful. They give you the first and last token
|
||||||
| of the subtree. This is the easiest way to create a #[code Span] object
|
| of the subtree. This is the easiest way to create a #[code Span] object
|
||||||
| for a syntactic phrase — a useful operation.
|
| for a syntactic phrase — a useful operation.
|
||||||
|
|
||||||
|
@ -160,4 +160,4 @@ p
|
||||||
+code.
|
+code.
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
doc1 = nlp(u'Text I do want parsed.')
|
doc1 = nlp(u'Text I do want parsed.')
|
||||||
doc2 = nlp(u'Text I don't want parsed', parser=False)
|
doc2 = nlp(u"Text I don't want parsed", parse=False)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user