Merge branch 'master' of https://github.com/explosion/spaCy

2025-11-06 02:47:29 +03:00 · 2016-11-16 05:49:42 -06:00 · 2016-11-16 05:49:42 -06:00 · 3a31c3a961
commit 3a31c3a961
parent 389e8b700e e6c5321fbb
5 changed files with 14 additions and 3 deletions
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@ -12,13 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a
 * J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
 * Jordan Suchow, [@suchow](https://github.com/suchow)
 * Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
+* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
 * Liling Tan, [@alvations](https://github.com/alvations)
 * Matthew Honnibal, [@honnibal](https://github.com/honnibal)
 * Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
 * Oleg Zd, [@olegzd](https://github.com/olegzd)
 * Sam Bozek, [@sambozek](https://github.com/sambozek)
+* Sasho Savkov [@savkov](https://github.com/savkov)
 * Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
 * Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
 * Wah Loon Keng, [@kengz](https://github.com/kengz)
 * Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
+* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang)
 * Yubing Dong, [@tomtung](https://github.com/tomtung)
--- a/examples/nn_text_class.py
+++ b/examples/nn_text_class.py
@ -1,3 +1,11 @@
+"""This script expects something like a binary sentiment data set, such as
+ that available here: `http://www.cs.cornell.edu/people/pabo/movie-review-data/`
+
+It expects a directory structure like: `data_dir/train/{pos|neg}`
+ and `data_dir/test/{pos|neg}`. Put (say) 90% of the files in the former
+ and the remainder in the latter.
+"""
+
 from __future__ import unicode_literals
 from __future__ import print_function
 from __future__ import division
@ -56,7 +64,7 @@ class Extractor(object):
        self.vector.fill(0)
        n = 0
        for orth_id, freq in bow.items():
-            self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].repvec * freq
+            self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].vector * freq
            # Apply the fine-tuning we've learned
            if orth_id < E.shape[0]:
                self.vector += E[orth_id] * freq
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -210,7 +210,6 @@ cdef class Matcher:
        self._callbacks = {}
        self.vocab = vocab
        self.mem = Pool()
-        self.vocab = vocab
        for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
            self.add_entity(entity_key, attrs)
            for spec in specs:
--- a/spacy/tagger.pyx
+++ b/spacy/tagger.pyx
@ -250,7 +250,7 @@ cdef class Tagger:
                eg.c.features, eg.c.nr_feat)
            self.model.updateC(&eg.c)

-            self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
+            self.vocab.morphology.assign_tag_id(&tokens.c[i], eg.guess)
            
            correct += eg.cost == 0
            self.freqs[TAG][tokens.c[i].tag] += 1
--- a/website/docs/usage/rule-based-matching.jade
+++ b/website/docs/usage/rule-based-matching.jade
@ -21,6 +21,7 @@ p
    |  callable, to receive a list of #[code (ent_id, start, end)] tuples:

 +code.
+    from spacy.matcher import Matcher
    matcher = Matcher(nlp.vocab)
    matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])