mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
commit
3a31c3a961
|
@ -12,13 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a
|
|||
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
|
||||
* Jordan Suchow, [@suchow](https://github.com/suchow)
|
||||
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
|
||||
* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
|
||||
* Liling Tan, [@alvations](https://github.com/alvations)
|
||||
* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
|
||||
* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
|
||||
* Oleg Zd, [@olegzd](https://github.com/olegzd)
|
||||
* Sam Bozek, [@sambozek](https://github.com/sambozek)
|
||||
* Sasho Savkov [@savkov](https://github.com/savkov)
|
||||
* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
|
||||
* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
|
||||
* Wah Loon Keng, [@kengz](https://github.com/kengz)
|
||||
* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
|
||||
* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang)
|
||||
* Yubing Dong, [@tomtung](https://github.com/tomtung)
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
"""This script expects something like a binary sentiment data set, such as
|
||||
that available here: `http://www.cs.cornell.edu/people/pabo/movie-review-data/`
|
||||
|
||||
It expects a directory structure like: `data_dir/train/{pos|neg}`
|
||||
and `data_dir/test/{pos|neg}`. Put (say) 90% of the files in the former
|
||||
and the remainder in the latter.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
|
@ -56,7 +64,7 @@ class Extractor(object):
|
|||
self.vector.fill(0)
|
||||
n = 0
|
||||
for orth_id, freq in bow.items():
|
||||
self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].repvec * freq
|
||||
self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].vector * freq
|
||||
# Apply the fine-tuning we've learned
|
||||
if orth_id < E.shape[0]:
|
||||
self.vector += E[orth_id] * freq
|
||||
|
|
|
@ -210,7 +210,6 @@ cdef class Matcher:
|
|||
self._callbacks = {}
|
||||
self.vocab = vocab
|
||||
self.mem = Pool()
|
||||
self.vocab = vocab
|
||||
for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
|
||||
self.add_entity(entity_key, attrs)
|
||||
for spec in specs:
|
||||
|
|
|
@ -250,7 +250,7 @@ cdef class Tagger:
|
|||
eg.c.features, eg.c.nr_feat)
|
||||
self.model.updateC(&eg.c)
|
||||
|
||||
self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
|
||||
self.vocab.morphology.assign_tag_id(&tokens.c[i], eg.guess)
|
||||
|
||||
correct += eg.cost == 0
|
||||
self.freqs[TAG][tokens.c[i].tag] += 1
|
||||
|
|
|
@ -21,6 +21,7 @@ p
|
|||
| callable, to receive a list of #[code (ent_id, start, end)] tuples:
|
||||
|
||||
+code.
|
||||
from spacy.matcher import Matcher
|
||||
matcher = Matcher(nlp.vocab)
|
||||
matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user