This commit is contained in:
Matthew Honnibal 2016-11-16 05:49:42 -06:00
commit 3a31c3a961
5 changed files with 14 additions and 3 deletions

View File

@ -12,13 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
* Jordan Suchow, [@suchow](https://github.com/suchow)
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
* Liling Tan, [@alvations](https://github.com/alvations)
* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
* Oleg Zd, [@olegzd](https://github.com/olegzd)
* Sam Bozek, [@sambozek](https://github.com/sambozek)
* Sasho Savkov [@savkov](https://github.com/savkov)
* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
* Wah Loon Keng, [@kengz](https://github.com/kengz)
* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang)
* Yubing Dong, [@tomtung](https://github.com/tomtung)

View File

@ -1,3 +1,11 @@
"""This script expects something like a binary sentiment data set, such as
that available here: `http://www.cs.cornell.edu/people/pabo/movie-review-data/`
It expects a directory structure like: `data_dir/train/{pos|neg}`
and `data_dir/test/{pos|neg}`. Put (say) 90% of the files in the former
and the remainder in the latter.
"""
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
@ -56,7 +64,7 @@ class Extractor(object):
self.vector.fill(0)
n = 0
for orth_id, freq in bow.items():
self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].repvec * freq
self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].vector * freq
# Apply the fine-tuning we've learned
if orth_id < E.shape[0]:
self.vector += E[orth_id] * freq

View File

@ -210,7 +210,6 @@ cdef class Matcher:
self._callbacks = {}
self.vocab = vocab
self.mem = Pool()
self.vocab = vocab
for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
self.add_entity(entity_key, attrs)
for spec in specs:

View File

@ -250,7 +250,7 @@ cdef class Tagger:
eg.c.features, eg.c.nr_feat)
self.model.updateC(&eg.c)
self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
self.vocab.morphology.assign_tag_id(&tokens.c[i], eg.guess)
correct += eg.cost == 0
self.freqs[TAG][tokens.c[i].tag] += 1

View File

@ -21,6 +21,7 @@ p
| callable, to receive a list of #[code (ent_id, start, end)] tuples:
+code.
from spacy.matcher import Matcher
matcher = Matcher(nlp.vocab)
matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])