diff --git a/spacy/about.py b/spacy/about.py index 45570be81..d35113283 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -4,7 +4,7 @@ # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py __title__ = 'spacy' -__version__ = '1.1.2' +__version__ = '1.1.3' __summary__ = 'Industrial-strength NLP' __uri__ = 'https://spacy.io' __author__ = 'Matthew Honnibal' diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index ad9d61eab..aa45c47f0 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -35,7 +35,7 @@ cdef class Morphology: cdef int assign_tag(self, TokenC* token, tag) except -1 - cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1 + cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1 cdef int assign_feature(self, uint64_t* morph, feature, value) except -1 diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index ef546e4e6..e5e5e013f 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -39,9 +39,9 @@ cdef class Morphology: tag_id = self.reverse_index[self.strings[tag]] else: tag_id = self.reverse_index[tag] - self._assign_tag_id(token, tag_id) + self.assign_tag_id(token, tag_id) - cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1: + cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1: if tag_id >= self.n_tags: raise ValueError("Unknown tag ID: %s" % tag_id) # TODO: It's pretty arbitrary to put this logic here. I guess the justification diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx index a387ccb12..53e648f24 100644 --- a/spacy/tagger.pyx +++ b/spacy/tagger.pyx @@ -196,7 +196,7 @@ cdef class Tagger: self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat) guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class) - self.vocab.morphology.assign_tag(&tokens.c[i], guess) + self.vocab.morphology.assign_tag_id(&tokens.c[i], guess) eg.fill_scores(0, eg.c.nr_class) tokens.is_tagged = True tokens._py_tokens = [None] * tokens.length diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index e6975abc5..66c93528b 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -107,10 +107,11 @@ cdef class Tokenizer: return (self.__class__, args, None, None) cpdef Doc tokens_from_list(self, list strings): - raise NotImplementedError( - "Method deprecated in 1.0.\n" - "Old: tokenizer.tokens_from_list(strings)\n" - "New: Doc(tokenizer.vocab, words=strings)") + return Doc(self.vocab, words=strings) + #raise NotImplementedError( + # "Method deprecated in 1.0.\n" + # "Old: tokenizer.tokens_from_list(strings)\n" + # "New: Doc(tokenizer.vocab, words=strings)") @cython.boundscheck(False) def __call__(self, unicode string): diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 1200a0517..3d09b7ad0 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -577,8 +577,7 @@ cdef class Doc: elif attr_id == TAG: for i in range(length): if values[i] != 0: - self.vocab.morphology.assign_tag(&tokens[i], - self.vocab.morphology.reverse_index[values[i]]) + self.vocab.morphology.assign_tag(&tokens[i], values[i]) elif attr_id == POS: for i in range(length): tokens[i].pos = values[i] diff --git a/website/docs/usage/dependency-parse.jade b/website/docs/usage/dependency-parse.jade index 694aeef1a..28ab62d77 100644 --- a/website/docs/usage/dependency-parse.jade +++ b/website/docs/usage/dependency-parse.jade @@ -43,7 +43,7 @@ p | #[code token.dep_]. +aside-code("Example"). - from spacy.symbols import DET + from spacy.symbols import det the, dog = nlp(u'the dog') assert the.dep == det assert the.dep_ == 'det' @@ -96,14 +96,14 @@ p print([w.text for w in apples.rights]) # ['on'] assert apples.n_lefts == 2 - assert apples.n_rights == 3 + assert apples.n_rights == 1 from spacy.symbols import nsubj doc = nlp(u'Credit and mortgage account holders must submit their requests within 30 days.') root = [w for w in doc if w.head is w][0] subject = list(root.lefts)[0] for descendant in subject.subtree: - assert subject.is_ancestor(descendant) + assert subject.is_ancestor_of(descendant) from spacy.symbols import nsubj doc = nlp(u'Credit and mortgage account holders must submit their requests.') @@ -131,7 +131,7 @@ p p | Finally, I often find the #[code .left_edge] and #[code right_edge] - | attributes especially useful. They give you the first and right tokens + | attributes especially useful. They give you the first and last token | of the subtree. This is the easiest way to create a #[code Span] object | for a syntactic phrase — a useful operation. @@ -160,4 +160,4 @@ p +code. nlp = spacy.load('en') doc1 = nlp(u'Text I do want parsed.') - doc2 = nlp(u'Text I don't want parsed', parser=False) + doc2 = nlp(u"Text I don't want parsed", parse=False)