mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
commit
3a31c3a961
|
@ -12,13 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a
|
||||||
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
|
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
|
||||||
* Jordan Suchow, [@suchow](https://github.com/suchow)
|
* Jordan Suchow, [@suchow](https://github.com/suchow)
|
||||||
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
|
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
|
||||||
|
* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
|
||||||
* Liling Tan, [@alvations](https://github.com/alvations)
|
* Liling Tan, [@alvations](https://github.com/alvations)
|
||||||
* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
|
* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
|
||||||
* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
|
* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
|
||||||
* Oleg Zd, [@olegzd](https://github.com/olegzd)
|
* Oleg Zd, [@olegzd](https://github.com/olegzd)
|
||||||
* Sam Bozek, [@sambozek](https://github.com/sambozek)
|
* Sam Bozek, [@sambozek](https://github.com/sambozek)
|
||||||
|
* Sasho Savkov [@savkov](https://github.com/savkov)
|
||||||
* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
|
* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
|
||||||
* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
|
* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
|
||||||
* Wah Loon Keng, [@kengz](https://github.com/kengz)
|
* Wah Loon Keng, [@kengz](https://github.com/kengz)
|
||||||
* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
|
* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
|
||||||
|
* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang)
|
||||||
* Yubing Dong, [@tomtung](https://github.com/tomtung)
|
* Yubing Dong, [@tomtung](https://github.com/tomtung)
|
||||||
|
|
|
@ -1,3 +1,11 @@
|
||||||
|
"""This script expects something like a binary sentiment data set, such as
|
||||||
|
that available here: `http://www.cs.cornell.edu/people/pabo/movie-review-data/`
|
||||||
|
|
||||||
|
It expects a directory structure like: `data_dir/train/{pos|neg}`
|
||||||
|
and `data_dir/test/{pos|neg}`. Put (say) 90% of the files in the former
|
||||||
|
and the remainder in the latter.
|
||||||
|
"""
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
|
@ -56,7 +64,7 @@ class Extractor(object):
|
||||||
self.vector.fill(0)
|
self.vector.fill(0)
|
||||||
n = 0
|
n = 0
|
||||||
for orth_id, freq in bow.items():
|
for orth_id, freq in bow.items():
|
||||||
self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].repvec * freq
|
self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].vector * freq
|
||||||
# Apply the fine-tuning we've learned
|
# Apply the fine-tuning we've learned
|
||||||
if orth_id < E.shape[0]:
|
if orth_id < E.shape[0]:
|
||||||
self.vector += E[orth_id] * freq
|
self.vector += E[orth_id] * freq
|
||||||
|
|
|
@ -210,7 +210,6 @@ cdef class Matcher:
|
||||||
self._callbacks = {}
|
self._callbacks = {}
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self.vocab = vocab
|
|
||||||
for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
|
for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
|
||||||
self.add_entity(entity_key, attrs)
|
self.add_entity(entity_key, attrs)
|
||||||
for spec in specs:
|
for spec in specs:
|
||||||
|
|
|
@ -250,7 +250,7 @@ cdef class Tagger:
|
||||||
eg.c.features, eg.c.nr_feat)
|
eg.c.features, eg.c.nr_feat)
|
||||||
self.model.updateC(&eg.c)
|
self.model.updateC(&eg.c)
|
||||||
|
|
||||||
self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
|
self.vocab.morphology.assign_tag_id(&tokens.c[i], eg.guess)
|
||||||
|
|
||||||
correct += eg.cost == 0
|
correct += eg.cost == 0
|
||||||
self.freqs[TAG][tokens.c[i].tag] += 1
|
self.freqs[TAG][tokens.c[i].tag] += 1
|
||||||
|
|
|
@ -21,6 +21,7 @@ p
|
||||||
| callable, to receive a list of #[code (ent_id, start, end)] tuples:
|
| callable, to receive a list of #[code (ent_id, start, end)] tuples:
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
|
from spacy.matcher import Matcher
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])
|
matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user