* Comment out old doc tests for now

This commit is contained in:
Matthew Honnibal 2015-08-26 19:23:04 +02:00
parent 884251801e
commit c07eea8563

View File

@ -1,80 +1,81 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Sphinx doctest is just too hard. Manually paste doctest examples here""" """Sphinx doctest is just too hard. Manually paste doctest examples here"""
import pytest
@pytest.mark.models #@pytest.mark.models
def test_1(): #def test_1():
import spacy.en # import spacy.en
from spacy.parts_of_speech import ADV # from spacy.parts_of_speech import ADV
# Load the pipeline, and call it with some text. # # Load the pipeline, and call it with some text.
nlp = spacy.en.English() # nlp = spacy.en.English()
tokens = nlp(u"Give it back, he pleaded abjectly, its mine.", # tokens = nlp(u"Give it back, he pleaded abjectly, its mine.",
tag=True, parse=False) # tag=True, parse=False)
o = u''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens) # o = u''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens)
assert u"Give it BACK, he pleaded ABJECTLY, its mine." # assert u"Give it BACK, he pleaded ABJECTLY, its mine."
#
o = nlp.vocab[u'back'].prob # o = nlp.vocab[u'back'].prob
assert o == -7.033305644989014 # assert o == -7.033305644989014
o = nlp.vocab[u'not'].prob # o = nlp.vocab[u'not'].prob
assert o == -5.332601070404053 # assert o == -5.332601070404053
o = nlp.vocab[u'quietly'].prob # o = nlp.vocab[u'quietly'].prob
assert o == -11.994928359985352 # assert o == -11.994928359985352
#
#
@pytest.mark.models #@pytest.mark.m
def test2(): #def test2():
import spacy.en # import spacy.en
from spacy.parts_of_speech import ADV # from spacy.parts_of_speech import ADV
nlp = spacy.en.English() # nlp = spacy.en.English()
# Find log probability of Nth most frequent word # # Find log probability of Nth most frequent word
probs = [lex.prob for lex in nlp.vocab] # probs = [lex.prob for lex in nlp.vocab]
probs.sort() # probs.sort()
is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000] # is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000]
tokens = nlp(u"Give it back, he pleaded abjectly, its mine.") # tokens = nlp(u"Give it back, he pleaded abjectly, its mine.")
o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens) # o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)
o == u'Give it back, he pleaded ABJECTLY, its mine.' # o == u'Give it back, he pleaded ABJECTLY, its mine.'
#
@pytest.mark.models #@pytest.mark.models
def test3(): #def test3():
import spacy.en # import spacy.en
from spacy.parts_of_speech import ADV # from spacy.parts_of_speech import ADV
nlp = spacy.en.English() # nlp = spacy.en.English()
# Find log probability of Nth most frequent word # # Find log probability of Nth most frequent word
probs = [lex.prob for lex in nlp.vocab] # probs = [lex.prob for lex in nlp.vocab]
probs.sort() # probs.sort()
is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000] # is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000]
tokens = nlp(u"Give it back, he pleaded abjectly, its mine.") # tokens = nlp(u"Give it back, he pleaded abjectly, its mine.")
o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens) # o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)
assert o == u'Give it back, he pleaded ABJECTLY, its mine.' # assert o == u'Give it back, he pleaded ABJECTLY, its mine.'
#
pleaded = tokens[7] # pleaded = tokens[7]
assert pleaded.repvec.shape == (300,) # assert pleaded.repvec.shape == (300,)
o = pleaded.repvec[:5] # o = pleaded.repvec[:5]
assert sum(o) != 0 # assert sum(o) != 0
from numpy import dot # from numpy import dot
from numpy.linalg import norm # from numpy.linalg import norm
#
cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2)) # cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
words = [w for w in nlp.vocab if w.is_lower and w.has_repvec] # words = [w for w in nlp.vocab if w.is_lower and w.has_repvec]
words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec)) # words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec))
words.reverse() # words.reverse()
o = [w.orth_ for w in words[0:20]] # o = [w.orth_ for w in words[0:20]]
assert o == [u'pleaded', u'pled', u'plead', u'confessed', u'interceded', # assert o == [u'pleaded', u'pled', u'plead', u'confessed', u'interceded',
u'pleads', u'testified', u'conspired', u'motioned', u'demurred', # u'pleads', u'testified', u'conspired', u'motioned', u'demurred',
u'countersued', u'remonstrated', u'begged', u'apologised', # u'countersued', u'remonstrated', u'begged', u'apologised',
u'consented', u'acquiesced', u'petitioned', u'quarreled', # u'consented', u'acquiesced', u'petitioned', u'quarreled',
u'appealed', u'pleading'] # u'appealed', u'pleading']
o = [w.orth_ for w in words[50:60]] # o = [w.orth_ for w in words[50:60]]
assert o == [u'martialed', u'counselled', u'bragged', # assert o == [u'martialed', u'counselled', u'bragged',
u'backtracked', u'caucused', u'refiled', u'dueled', u'mused', # u'backtracked', u'caucused', u'refiled', u'dueled', u'mused',
u'dissented', u'yearned'] # u'dissented', u'yearned']
o = [w.orth_ for w in words[100:110]] # o = [w.orth_ for w in words[100:110]]
assert o == [u'acquits', u'cabled', u'ducked', u'sentenced', # assert o == [u'acquits', u'cabled', u'ducked', u'sentenced',
u'gaoled', u'perjured', u'absconded', u'bargained', u'overstayed', # u'gaoled', u'perjured', u'absconded', u'bargained', u'overstayed',
u'clerked'] # u'clerked']
#
#o = [w.orth_ for w in words[1000:1010]] # #o = [w.orth_ for w in words[1000:1010]]
#assert o == [u'scorned', u'baled', u'righted', u'requested', u'swindled', # #assert o == [u'scorned', u'baled', u'righted', u'requested', u'swindled',
# u'posited', u'firebombed', u'slimed', u'deferred', u'sagged'] # # u'posited', u'firebombed', u'slimed', u'deferred', u'sagged']
#o = [w.orth_ for w in words[50000:50010]] # #o = [w.orth_ for w in words[50000:50010]]
#assert o == [u'fb', u'ford', u'systems', u'puck', u'anglers', u'ik', u'tabloid', # #assert o == [u'fb', u'ford', u'systems', u'puck', u'anglers', u'ik', u'tabloid',
# u'dirty', u'rims', u'artists'] # # u'dirty', u'rims', u'artists']