mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-24 20:51:30 +03:00 
			
		
		
		
	* Make corrections to example code
This commit is contained in:
		
							parent
							
								
									f0e0588833
								
							
						
					
					
						commit
						a7e4f0a86c
					
				|  | @ -83,10 +83,9 @@ particularly egregious: | ||||||
|     >>> from spacy.parts_of_speech import ADV |     >>> from spacy.parts_of_speech import ADV | ||||||
|     >>> # Load the pipeline, and call it with some text. |     >>> # Load the pipeline, and call it with some text. | ||||||
|     >>> nlp = spacy.en.English() |     >>> nlp = spacy.en.English() | ||||||
|     >>> tokens = nlp("‘Give it back,’ he pleaded abjectly, ‘it’s mine.’", |     >>> tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’", tag=True, parse=False) | ||||||
|                      tag=True, parse=False) |     >>> print u''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens) | ||||||
|     >>> print(''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens)) |     u‘Give it BACK,’ he pleaded ABJECTLY, ‘it’s mine.’ | ||||||
|     ‘Give it BACK,’ he pleaded ABJECTLY, ‘it’s mine.’ |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| Easy enough --- but the problem is that we've also highlighted "back". | Easy enough --- but the problem is that we've also highlighted "back". | ||||||
|  | @ -103,11 +102,11 @@ manner adverbs that the style guides are worried about. | ||||||
| The :py:attr:`Lexeme.prob` and :py:attr:`Token.prob` attribute gives a | The :py:attr:`Lexeme.prob` and :py:attr:`Token.prob` attribute gives a | ||||||
| log probability estimate of the word: | log probability estimate of the word: | ||||||
| 
 | 
 | ||||||
|    >>> nlp.vocab['back'].prob |    >>> nlp.vocab[u'back'].prob | ||||||
|    -7.403977394104004 |    -7.403977394104004 | ||||||
|    >>> nlp.vocab['not'].prob |    >>> nlp.vocab[u'not'].prob | ||||||
|    -5.407193660736084 |    -5.407193660736084 | ||||||
|    >>> nlp.vocab['quietly'].prob |    >>> nlp.vocab[u'quietly'].prob | ||||||
|    -11.07155704498291 |    -11.07155704498291 | ||||||
| 
 | 
 | ||||||
| (The probability estimate is based on counts from a 3 billion word corpus, | (The probability estimate is based on counts from a 3 billion word corpus, | ||||||
|  | @ -125,8 +124,8 @@ marker.  Let's try N=1000 for now: | ||||||
|     >>> probs = [lex.prob for lex in nlp.vocab] |     >>> probs = [lex.prob for lex in nlp.vocab] | ||||||
|     >>> probs.sort() |     >>> probs.sort() | ||||||
|     >>> is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000] |     >>> is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000] | ||||||
|     >>> tokens = nlp("‘Give it back,’ he pleaded abjectly, ‘it’s mine.’") |     >>> tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’") | ||||||
|     >>> print(''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)) |     >>> print u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens) | ||||||
|     ‘Give it back,’ he pleaded ABJECTLY, ‘it’s mine.’ |     ‘Give it back,’ he pleaded ABJECTLY, ‘it’s mine.’ | ||||||
| 
 | 
 | ||||||
| There are lots of other ways we could refine the logic, depending on just what | There are lots of other ways we could refine the logic, depending on just what | ||||||
|  | @ -136,7 +135,7 @@ representation for every word (by default, the vectors produced by | ||||||
| `Levy and Goldberg (2014)`_).  Naturally, the vector is provided as a numpy | `Levy and Goldberg (2014)`_).  Naturally, the vector is provided as a numpy | ||||||
| array: | array: | ||||||
| 
 | 
 | ||||||
|     >>> pleaded = tokens[8] |     >>> pleaded = tokens[7] | ||||||
|     >>> pleaded.repvec.shape |     >>> pleaded.repvec.shape | ||||||
|     (300,) |     (300,) | ||||||
|     >>> pleaded.repvec[:5] |     >>> pleaded.repvec[:5] | ||||||
|  | @ -150,9 +149,10 @@ cosine metric: | ||||||
| 
 | 
 | ||||||
|     >>> from numpy import dot |     >>> from numpy import dot | ||||||
|     >>> from numpy.linalg import norm |     >>> from numpy.linalg import norm | ||||||
|     >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1), norm(v2)) |   | ||||||
|  |     >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2)) | ||||||
|     >>> words = [w for w in nlp.vocab if w.lower] |     >>> words = [w for w in nlp.vocab if w.lower] | ||||||
|     >>> words.sort(key=lambda w: cosine(w, pleaded)) |     >>> words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec)) | ||||||
|     >>> words.reverse() |     >>> words.reverse() | ||||||
|     >>> print('1-20', ', '.join(w.orth_ for w in words[0:20])) |     >>> print('1-20', ', '.join(w.orth_ for w in words[0:20])) | ||||||
|     1-20 pleaded, pled, plead, confessed, interceded, pleads, testified, conspired, motioned, demurred, countersued, remonstrated, begged, apologised, consented, acquiesced, petitioned, quarreled, appealed, pleading |     1-20 pleaded, pled, plead, confessed, interceded, pleads, testified, conspired, motioned, demurred, countersued, remonstrated, begged, apologised, consented, acquiesced, petitioned, quarreled, appealed, pleading | ||||||
|  | @ -177,7 +177,7 @@ as our target: | ||||||
| 
 | 
 | ||||||
|     >>> say_verbs = ['pleaded', 'confessed', 'remonstrated', 'begged', 'bragged', 'confided', 'requested'] |     >>> say_verbs = ['pleaded', 'confessed', 'remonstrated', 'begged', 'bragged', 'confided', 'requested'] | ||||||
|     >>> say_vector = sum(nlp.vocab[verb].repvec for verb in say_verbs) / len(say_verbs) |     >>> say_vector = sum(nlp.vocab[verb].repvec for verb in say_verbs) / len(say_verbs) | ||||||
|     >>> words.sort(key=lambda w: cosine(w.repvec, say_vector)) |     >>> words.sort(key=lambda w: cosine(w.repvec * say_vector)) | ||||||
|     >>> words.reverse() |     >>> words.reverse() | ||||||
|     >>> print('1-20', ', '.join(w.orth_ for w in words[0:20])) |     >>> print('1-20', ', '.join(w.orth_ for w in words[0:20])) | ||||||
|     1-20 bragged, remonstrated, enquired, demurred, sighed, mused, intimated, retorted, entreated, motioned, ranted, confided, countersued, gestured, implored, interceded, muttered, marvelled, bickered, despaired |     1-20 bragged, remonstrated, enquired, demurred, sighed, mused, intimated, retorted, entreated, motioned, ranted, confided, countersued, gestured, implored, interceded, muttered, marvelled, bickered, despaired | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user