From daedf2c1534a369de92b5023eb7898e8716823a5 Mon Sep 17 00:00:00 2001 From: SultanMirza Date: Fri, 4 Nov 2016 20:54:28 +0530 Subject: [PATCH 1/4] Fixing typos and errors!! Fixed some typos and errors on the page. --- website/docs/usage/dependency-parse.jade | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/website/docs/usage/dependency-parse.jade b/website/docs/usage/dependency-parse.jade index 694aeef1a..28ab62d77 100644 --- a/website/docs/usage/dependency-parse.jade +++ b/website/docs/usage/dependency-parse.jade @@ -43,7 +43,7 @@ p | #[code token.dep_]. +aside-code("Example"). - from spacy.symbols import DET + from spacy.symbols import det the, dog = nlp(u'the dog') assert the.dep == det assert the.dep_ == 'det' @@ -96,14 +96,14 @@ p print([w.text for w in apples.rights]) # ['on'] assert apples.n_lefts == 2 - assert apples.n_rights == 3 + assert apples.n_rights == 1 from spacy.symbols import nsubj doc = nlp(u'Credit and mortgage account holders must submit their requests within 30 days.') root = [w for w in doc if w.head is w][0] subject = list(root.lefts)[0] for descendant in subject.subtree: - assert subject.is_ancestor(descendant) + assert subject.is_ancestor_of(descendant) from spacy.symbols import nsubj doc = nlp(u'Credit and mortgage account holders must submit their requests.') @@ -131,7 +131,7 @@ p p | Finally, I often find the #[code .left_edge] and #[code right_edge] - | attributes especially useful. They give you the first and right tokens + | attributes especially useful. They give you the first and last token | of the subtree. This is the easiest way to create a #[code Span] object | for a syntactic phrase — a useful operation. @@ -160,4 +160,4 @@ p +code. nlp = spacy.load('en') doc1 = nlp(u'Text I do want parsed.') - doc2 = nlp(u'Text I don't want parsed', parser=False) + doc2 = nlp(u"Text I don't want parsed", parse=False) From a36353df470a3ada700e2758c28cd38f5fbb83e0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 4 Nov 2016 19:18:07 +0100 Subject: [PATCH 2/4] Temporarily put back the tokenize_from_strings method, while tests aren't updated yet. --- spacy/tokenizer.pyx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index e6975abc5..66c93528b 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -107,10 +107,11 @@ cdef class Tokenizer: return (self.__class__, args, None, None) cpdef Doc tokens_from_list(self, list strings): - raise NotImplementedError( - "Method deprecated in 1.0.\n" - "Old: tokenizer.tokens_from_list(strings)\n" - "New: Doc(tokenizer.vocab, words=strings)") + return Doc(self.vocab, words=strings) + #raise NotImplementedError( + # "Method deprecated in 1.0.\n" + # "Old: tokenizer.tokens_from_list(strings)\n" + # "New: Doc(tokenizer.vocab, words=strings)") @cython.boundscheck(False) def __call__(self, unicode string): From 1fb09c3dc16d20fdbcc3576efbd020ea93e291e1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 4 Nov 2016 19:19:09 +0100 Subject: [PATCH 3/4] Fix morphology tagger --- spacy/morphology.pxd | 2 +- spacy/morphology.pyx | 4 ++-- spacy/tagger.pyx | 2 +- spacy/tokens/doc.pyx | 3 +-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index ad9d61eab..aa45c47f0 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -35,7 +35,7 @@ cdef class Morphology: cdef int assign_tag(self, TokenC* token, tag) except -1 - cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1 + cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1 cdef int assign_feature(self, uint64_t* morph, feature, value) except -1 diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index ef546e4e6..e5e5e013f 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -39,9 +39,9 @@ cdef class Morphology: tag_id = self.reverse_index[self.strings[tag]] else: tag_id = self.reverse_index[tag] - self._assign_tag_id(token, tag_id) + self.assign_tag_id(token, tag_id) - cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1: + cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1: if tag_id >= self.n_tags: raise ValueError("Unknown tag ID: %s" % tag_id) # TODO: It's pretty arbitrary to put this logic here. I guess the justification diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx index a387ccb12..53e648f24 100644 --- a/spacy/tagger.pyx +++ b/spacy/tagger.pyx @@ -196,7 +196,7 @@ cdef class Tagger: self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat) guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class) - self.vocab.morphology.assign_tag(&tokens.c[i], guess) + self.vocab.morphology.assign_tag_id(&tokens.c[i], guess) eg.fill_scores(0, eg.c.nr_class) tokens.is_tagged = True tokens._py_tokens = [None] * tokens.length diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 1200a0517..3d09b7ad0 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -577,8 +577,7 @@ cdef class Doc: elif attr_id == TAG: for i in range(length): if values[i] != 0: - self.vocab.morphology.assign_tag(&tokens[i], - self.vocab.morphology.reverse_index[values[i]]) + self.vocab.morphology.assign_tag(&tokens[i], values[i]) elif attr_id == POS: for i in range(length): tokens[i].pos = values[i] From 9f933869947f4f7ca22b87e1249d64f8252e0e9c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 4 Nov 2016 19:28:16 +0100 Subject: [PATCH 4/4] Update version --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 45570be81..d35113283 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -4,7 +4,7 @@ # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py __title__ = 'spacy' -__version__ = '1.1.2' +__version__ = '1.1.3' __summary__ = 'Industrial-strength NLP' __uri__ = 'https://spacy.io' __author__ = 'Matthew Honnibal'