mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	Merge remote-tracking branch 'refs/remotes/honnibal/master'
This commit is contained in:
		
						commit
						23475360e4
					
				
							
								
								
									
										37
									
								
								examples/_handler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								examples/_handler.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,37 @@ | ||||||
|  | # encoding: utf8 | ||||||
|  | from __future__ import unicode_literals, print_function | ||||||
|  | 
 | ||||||
|  | from math import sqrt | ||||||
|  | from numpy import dot | ||||||
|  | from numpy.linalg import norm | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def handle_tweet(spacy, tweet_data, query): | ||||||
|  |     text = tweet_data.get('text', u'') | ||||||
|  |     # Twython returns either bytes or unicode, depending on tweet. | ||||||
|  |     # ಠ_ಠ #APIshaming | ||||||
|  |     try: | ||||||
|  |         match_tweet(spacy, text, query) | ||||||
|  |     except TypeError: | ||||||
|  |         match_tweet(spacy, text.decode('utf8'), query) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def match_tweet(spacy, text, query): | ||||||
|  |     def get_vector(word): | ||||||
|  |         return spacy.vocab[word].repvec | ||||||
|  | 
 | ||||||
|  |     tweet = spacy(text) | ||||||
|  |     tweet = [w.repvec for w in tweet if w.is_alpha and w.lower_ != query] | ||||||
|  |     if tweet: | ||||||
|  |         accept = map(get_vector, 'child classroom teach'.split()) | ||||||
|  |         reject = map(get_vector, 'mouth hands giveaway'.split()) | ||||||
|  |          | ||||||
|  |         y = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in accept) | ||||||
|  |         n = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in reject) | ||||||
|  |          | ||||||
|  |         if (y / (y + n)) >= 0.5 or True: | ||||||
|  |             print(text) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def cos(v1, v2): | ||||||
|  |     return dot(v1, v2) / (norm(v1) * norm(v2)) | ||||||
|  | @ -248,24 +248,16 @@ cdef class Token: | ||||||
| 
 | 
 | ||||||
|     property conjuncts: |     property conjuncts: | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|             """Get a list of conjoined words""" |             """Get a list of conjoined words.""" | ||||||
|             cdef Token word |             cdef Token word | ||||||
|             conjs = [] |             conjuncts = [] | ||||||
|             if self.c.pos != CONJ and self.c.pos != PUNCT: |             if self.dep_ != 'conj': | ||||||
|                 seen_conj = False |                 for word in self.rights: | ||||||
|                 for word in reversed(list(self.lefts)): |                     if word.dep_ == 'conj': | ||||||
|                     if word.c.pos == CONJ: |                         yield word | ||||||
|                         seen_conj = True |                         yield from word.conjuncts | ||||||
|                     elif seen_conj and word.c.pos == self.c.pos: |                         conjuncts.append(word) | ||||||
|                         conjs.append(word) |                         conjuncts.extend(word.conjuncts) | ||||||
|             conjs.reverse() |  | ||||||
|             conjs.append(self) |  | ||||||
|             if seen_conj: |  | ||||||
|                 return conjs |  | ||||||
|             elif self is not self.head and self in self.head.conjuncts: |  | ||||||
|                 return self.head.conjuncts |  | ||||||
|             else: |  | ||||||
|                 return [] |  | ||||||
| 
 | 
 | ||||||
|     property ent_type: |     property ent_type: | ||||||
|         def __get__(self): |         def __get__(self): | ||||||
|  |  | ||||||
|  | @ -7,6 +7,6 @@ def test_space_attachment(EN): | ||||||
|     sentence = 'This is a test.\nTo ensure  spaces are attached well.' |     sentence = 'This is a test.\nTo ensure  spaces are attached well.' | ||||||
|     doc = EN(sentence) |     doc = EN(sentence) | ||||||
| 
 | 
 | ||||||
|     for word in doc: |     for sent in doc.sents: | ||||||
|         if word.is_space: |         if len(sent) == 1: | ||||||
|             assert word.head.i == (word.i - 1) |             assert not sent[-1].is_space | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user