diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 63ddf26e6..23c9387e1 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -12,13 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a * J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading) * Jordan Suchow, [@suchow](https://github.com/suchow) * Kendrick Tan, [@kendricktan](https://github.com/kendricktan) +* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson) * Liling Tan, [@alvations](https://github.com/alvations) * Matthew Honnibal, [@honnibal](https://github.com/honnibal) * Maxim Samsonov, [@maxirmx](https://github.com/maxirmx) * Oleg Zd, [@olegzd](https://github.com/olegzd) * Sam Bozek, [@sambozek](https://github.com/sambozek) +* Sasho Savkov [@savkov](https://github.com/savkov) * Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues) * Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov) * Wah Loon Keng, [@kengz](https://github.com/kengz) * Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker) +* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang) * Yubing Dong, [@tomtung](https://github.com/tomtung) diff --git a/examples/nn_text_class.py b/examples/nn_text_class.py index 4a19e5780..7b4a2fd57 100644 --- a/examples/nn_text_class.py +++ b/examples/nn_text_class.py @@ -1,3 +1,11 @@ +"""This script expects something like a binary sentiment data set, such as + that available here: `http://www.cs.cornell.edu/people/pabo/movie-review-data/` + +It expects a directory structure like: `data_dir/train/{pos|neg}` + and `data_dir/test/{pos|neg}`. Put (say) 90% of the files in the former + and the remainder in the latter. +""" + from __future__ import unicode_literals from __future__ import print_function from __future__ import division @@ -56,7 +64,7 @@ class Extractor(object): self.vector.fill(0) n = 0 for orth_id, freq in bow.items(): - self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].repvec * freq + self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].vector * freq # Apply the fine-tuning we've learned if orth_id < E.shape[0]: self.vector += E[orth_id] * freq diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 72a4a97d6..a45597b28 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -210,7 +210,6 @@ cdef class Matcher: self._callbacks = {} self.vocab = vocab self.mem = Pool() - self.vocab = vocab for entity_key, (etype, attrs, specs) in sorted(patterns.items()): self.add_entity(entity_key, attrs) for spec in specs: diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx index 53e648f24..7903c44fb 100644 --- a/spacy/tagger.pyx +++ b/spacy/tagger.pyx @@ -250,7 +250,7 @@ cdef class Tagger: eg.c.features, eg.c.nr_feat) self.model.updateC(&eg.c) - self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess) + self.vocab.morphology.assign_tag_id(&tokens.c[i], eg.guess) correct += eg.cost == 0 self.freqs[TAG][tokens.c[i].tag] += 1 diff --git a/website/docs/usage/rule-based-matching.jade b/website/docs/usage/rule-based-matching.jade index 41fa0bdd3..bedadb0d3 100644 --- a/website/docs/usage/rule-based-matching.jade +++ b/website/docs/usage/rule-based-matching.jade @@ -21,6 +21,7 @@ p | callable, to receive a list of #[code (ent_id, start, end)] tuples: +code. + from spacy.matcher import Matcher matcher = Matcher(nlp.vocab) matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])