mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
		
						commit
						3a31c3a961
					
				| 
						 | 
					@ -12,13 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a
 | 
				
			||||||
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
 | 
					* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
 | 
				
			||||||
* Jordan Suchow, [@suchow](https://github.com/suchow)
 | 
					* Jordan Suchow, [@suchow](https://github.com/suchow)
 | 
				
			||||||
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
 | 
					* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
 | 
				
			||||||
 | 
					* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
 | 
				
			||||||
* Liling Tan, [@alvations](https://github.com/alvations)
 | 
					* Liling Tan, [@alvations](https://github.com/alvations)
 | 
				
			||||||
* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
 | 
					* Matthew Honnibal, [@honnibal](https://github.com/honnibal)
 | 
				
			||||||
* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
 | 
					* Maxim Samsonov, [@maxirmx](https://github.com/maxirmx)
 | 
				
			||||||
* Oleg Zd, [@olegzd](https://github.com/olegzd)
 | 
					* Oleg Zd, [@olegzd](https://github.com/olegzd)
 | 
				
			||||||
* Sam Bozek, [@sambozek](https://github.com/sambozek)
 | 
					* Sam Bozek, [@sambozek](https://github.com/sambozek)
 | 
				
			||||||
 | 
					* Sasho Savkov [@savkov](https://github.com/savkov)
 | 
				
			||||||
* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
 | 
					* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues)
 | 
				
			||||||
* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
 | 
					* Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov)
 | 
				
			||||||
* Wah Loon Keng, [@kengz](https://github.com/kengz)
 | 
					* Wah Loon Keng, [@kengz](https://github.com/kengz)
 | 
				
			||||||
* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
 | 
					* Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)
 | 
				
			||||||
 | 
					* Yanhao Yang, [@YanhaoYang](https://github.com/YanhaoYang)
 | 
				
			||||||
* Yubing Dong, [@tomtung](https://github.com/tomtung)
 | 
					* Yubing Dong, [@tomtung](https://github.com/tomtung)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,3 +1,11 @@
 | 
				
			||||||
 | 
					"""This script expects something like a binary sentiment data set, such as
 | 
				
			||||||
 | 
					 that available here: `http://www.cs.cornell.edu/people/pabo/movie-review-data/`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It expects a directory structure like: `data_dir/train/{pos|neg}`
 | 
				
			||||||
 | 
					 and `data_dir/test/{pos|neg}`. Put (say) 90% of the files in the former
 | 
				
			||||||
 | 
					 and the remainder in the latter.
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
from __future__ import print_function
 | 
					from __future__ import print_function
 | 
				
			||||||
from __future__ import division
 | 
					from __future__ import division
 | 
				
			||||||
| 
						 | 
					@ -56,7 +64,7 @@ class Extractor(object):
 | 
				
			||||||
        self.vector.fill(0)
 | 
					        self.vector.fill(0)
 | 
				
			||||||
        n = 0
 | 
					        n = 0
 | 
				
			||||||
        for orth_id, freq in bow.items():
 | 
					        for orth_id, freq in bow.items():
 | 
				
			||||||
            self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].repvec * freq
 | 
					            self.vector += self.nlp.vocab[self.nlp.vocab.strings[orth_id]].vector * freq
 | 
				
			||||||
            # Apply the fine-tuning we've learned
 | 
					            # Apply the fine-tuning we've learned
 | 
				
			||||||
            if orth_id < E.shape[0]:
 | 
					            if orth_id < E.shape[0]:
 | 
				
			||||||
                self.vector += E[orth_id] * freq
 | 
					                self.vector += E[orth_id] * freq
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -210,7 +210,6 @@ cdef class Matcher:
 | 
				
			||||||
        self._callbacks = {}
 | 
					        self._callbacks = {}
 | 
				
			||||||
        self.vocab = vocab
 | 
					        self.vocab = vocab
 | 
				
			||||||
        self.mem = Pool()
 | 
					        self.mem = Pool()
 | 
				
			||||||
        self.vocab = vocab
 | 
					 | 
				
			||||||
        for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
 | 
					        for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
 | 
				
			||||||
            self.add_entity(entity_key, attrs)
 | 
					            self.add_entity(entity_key, attrs)
 | 
				
			||||||
            for spec in specs:
 | 
					            for spec in specs:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -250,7 +250,7 @@ cdef class Tagger:
 | 
				
			||||||
                eg.c.features, eg.c.nr_feat)
 | 
					                eg.c.features, eg.c.nr_feat)
 | 
				
			||||||
            self.model.updateC(&eg.c)
 | 
					            self.model.updateC(&eg.c)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
 | 
					            self.vocab.morphology.assign_tag_id(&tokens.c[i], eg.guess)
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            correct += eg.cost == 0
 | 
					            correct += eg.cost == 0
 | 
				
			||||||
            self.freqs[TAG][tokens.c[i].tag] += 1
 | 
					            self.freqs[TAG][tokens.c[i].tag] += 1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,6 +21,7 @@ p
 | 
				
			||||||
    |  callable, to receive a list of #[code (ent_id, start, end)] tuples:
 | 
					    |  callable, to receive a list of #[code (ent_id, start, end)] tuples:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
+code.
 | 
					+code.
 | 
				
			||||||
 | 
					    from spacy.matcher import Matcher
 | 
				
			||||||
    matcher = Matcher(nlp.vocab)
 | 
					    matcher = Matcher(nlp.vocab)
 | 
				
			||||||
    matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])
 | 
					    matcher.add_pattern("HelloWorld", [{LOWER: "hello"}, {IS_PUNCT: True}, {LOWER: "world"}])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user