Merge branch 'master' of ssh://github.com/explosion/spaCy

2025-10-24 20:51:30 +03:00 · 2016-11-04 20:03:07 +01:00 · 2016-11-04 20:03:07 +01:00 · bd6e24fe0e
commit bd6e24fe0e
parent bc8d04abc0 9f93386994
7 changed files with 16 additions and 16 deletions
--- a/spacy/about.py
+++ b/spacy/about.py
@ -4,7 +4,7 @@
 # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py

 __title__ = 'spacy'
-__version__ = '1.1.2'
+__version__ = '1.1.3'
 __summary__ = 'Industrial-strength NLP'
 __uri__ = 'https://spacy.io'
 __author__ = 'Matthew Honnibal'
--- a/spacy/morphology.pxd
+++ b/spacy/morphology.pxd
@ -35,7 +35,7 @@ cdef class Morphology:

    cdef int assign_tag(self, TokenC* token, tag) except -1

-    cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1
+    cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
    
    cdef int assign_feature(self, uint64_t* morph, feature, value) except -1

--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -39,9 +39,9 @@ cdef class Morphology:
            tag_id = self.reverse_index[self.strings[tag]]
        else:
            tag_id = self.reverse_index[tag]
-        self._assign_tag_id(token, tag_id)
+        self.assign_tag_id(token, tag_id)

-    cdef int _assign_tag_id(self, TokenC* token, int tag_id) except -1:
+    cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
        if tag_id >= self.n_tags:
            raise ValueError("Unknown tag ID: %s" % tag_id)
        # TODO: It's pretty arbitrary to put this logic here. I guess the justification
--- a/spacy/tagger.pyx
+++ b/spacy/tagger.pyx
@ -196,7 +196,7 @@ cdef class Tagger:
                self.model.set_scoresC(eg.c.scores,
                    eg.c.features, eg.c.nr_feat)
                guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class)
-                self.vocab.morphology.assign_tag(&tokens.c[i], guess)
+                self.vocab.morphology.assign_tag_id(&tokens.c[i], guess)
                eg.fill_scores(0, eg.c.nr_class)
        tokens.is_tagged = True
        tokens._py_tokens = [None] * tokens.length
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -107,10 +107,11 @@ cdef class Tokenizer:
        return (self.__class__, args, None, None)
    
    cpdef Doc tokens_from_list(self, list strings):
-        raise NotImplementedError(
-            "Method deprecated in 1.0.\n"
-            "Old: tokenizer.tokens_from_list(strings)\n"
-            "New: Doc(tokenizer.vocab, words=strings)")
+        return Doc(self.vocab, words=strings)
+        #raise NotImplementedError(
+        #    "Method deprecated in 1.0.\n"
+        #    "Old: tokenizer.tokens_from_list(strings)\n"
+        #    "New: Doc(tokenizer.vocab, words=strings)")

    @cython.boundscheck(False)
    def __call__(self, unicode string):
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -577,8 +577,7 @@ cdef class Doc:
            elif attr_id == TAG:
                for i in range(length):
                    if values[i] != 0:
-                        self.vocab.morphology.assign_tag(&tokens[i],
-                            self.vocab.morphology.reverse_index[values[i]])
+                        self.vocab.morphology.assign_tag(&tokens[i], values[i])
            elif attr_id == POS:
                for i in range(length):
                    tokens[i].pos = <univ_pos_t>values[i]
--- a/website/docs/usage/dependency-parse.jade
+++ b/website/docs/usage/dependency-parse.jade
@ -43,7 +43,7 @@ p
    |  #[code token.dep_].

 +aside-code("Example").
-    from spacy.symbols import DET
+    from spacy.symbols import det
    the, dog = nlp(u'the dog')
    assert the.dep == det
    assert the.dep_ == 'det'
@ -96,14 +96,14 @@ p
    print([w.text for w in apples.rights])
    # ['on']
    assert apples.n_lefts == 2
-    assert apples.n_rights == 3
+    assert apples.n_rights == 1

    from spacy.symbols import nsubj
    doc = nlp(u'Credit and mortgage account holders must submit their requests within 30 days.')
    root = [w for w in doc if w.head is w][0]
    subject = list(root.lefts)[0]
    for descendant in subject.subtree:
-        assert subject.is_ancestor(descendant)
+        assert subject.is_ancestor_of(descendant)

    from spacy.symbols import nsubj
    doc = nlp(u'Credit and mortgage account holders must submit their requests.')
@ -131,7 +131,7 @@ p

 p
    |  Finally, I often find the #[code .left_edge] and #[code right_edge]
-    |  attributes especially useful. They give you the first and right tokens
+    |  attributes especially useful. They give you the first and last token
    |  of the subtree. This is the easiest way to create a #[code Span] object
    |  for a syntactic phrase — a useful operation.

@ -160,4 +160,4 @@ p
 +code.
    nlp = spacy.load('en')
    doc1 = nlp(u'Text I do want parsed.')
-    doc2 = nlp(u'Text I don't want parsed', parser=False)
+    doc2 = nlp(u"Text I don't want parsed", parse=False)