mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Comment out old doc tests for now
This commit is contained in:
parent
884251801e
commit
c07eea8563
|
@ -1,80 +1,81 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""Sphinx doctest is just too hard. Manually paste doctest examples here"""
|
"""Sphinx doctest is just too hard. Manually paste doctest examples here"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
@pytest.mark.models
|
#@pytest.mark.models
|
||||||
def test_1():
|
#def test_1():
|
||||||
import spacy.en
|
# import spacy.en
|
||||||
from spacy.parts_of_speech import ADV
|
# from spacy.parts_of_speech import ADV
|
||||||
# Load the pipeline, and call it with some text.
|
# # Load the pipeline, and call it with some text.
|
||||||
nlp = spacy.en.English()
|
# nlp = spacy.en.English()
|
||||||
tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’",
|
# tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’",
|
||||||
tag=True, parse=False)
|
# tag=True, parse=False)
|
||||||
o = u''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens)
|
# o = u''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens)
|
||||||
assert u"‘Give it BACK,’ he pleaded ABJECTLY, ‘it’s mine.’"
|
# assert u"‘Give it BACK,’ he pleaded ABJECTLY, ‘it’s mine.’"
|
||||||
|
#
|
||||||
o = nlp.vocab[u'back'].prob
|
# o = nlp.vocab[u'back'].prob
|
||||||
assert o == -7.033305644989014
|
# assert o == -7.033305644989014
|
||||||
o = nlp.vocab[u'not'].prob
|
# o = nlp.vocab[u'not'].prob
|
||||||
assert o == -5.332601070404053
|
# assert o == -5.332601070404053
|
||||||
o = nlp.vocab[u'quietly'].prob
|
# o = nlp.vocab[u'quietly'].prob
|
||||||
assert o == -11.994928359985352
|
# assert o == -11.994928359985352
|
||||||
|
#
|
||||||
|
#
|
||||||
@pytest.mark.models
|
#@pytest.mark.m
|
||||||
def test2():
|
#def test2():
|
||||||
import spacy.en
|
# import spacy.en
|
||||||
from spacy.parts_of_speech import ADV
|
# from spacy.parts_of_speech import ADV
|
||||||
nlp = spacy.en.English()
|
# nlp = spacy.en.English()
|
||||||
# Find log probability of Nth most frequent word
|
# # Find log probability of Nth most frequent word
|
||||||
probs = [lex.prob for lex in nlp.vocab]
|
# probs = [lex.prob for lex in nlp.vocab]
|
||||||
probs.sort()
|
# probs.sort()
|
||||||
is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000]
|
# is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000]
|
||||||
tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’")
|
# tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’")
|
||||||
o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)
|
# o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)
|
||||||
o == u'‘Give it back,’ he pleaded ABJECTLY, ‘it’s mine.’'
|
# o == u'‘Give it back,’ he pleaded ABJECTLY, ‘it’s mine.’'
|
||||||
|
#
|
||||||
@pytest.mark.models
|
#@pytest.mark.models
|
||||||
def test3():
|
#def test3():
|
||||||
import spacy.en
|
# import spacy.en
|
||||||
from spacy.parts_of_speech import ADV
|
# from spacy.parts_of_speech import ADV
|
||||||
nlp = spacy.en.English()
|
# nlp = spacy.en.English()
|
||||||
# Find log probability of Nth most frequent word
|
# # Find log probability of Nth most frequent word
|
||||||
probs = [lex.prob for lex in nlp.vocab]
|
# probs = [lex.prob for lex in nlp.vocab]
|
||||||
probs.sort()
|
# probs.sort()
|
||||||
is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000]
|
# is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000]
|
||||||
tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’")
|
# tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’")
|
||||||
o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)
|
# o = u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)
|
||||||
assert o == u'‘Give it back,’ he pleaded ABJECTLY, ‘it’s mine.’'
|
# assert o == u'‘Give it back,’ he pleaded ABJECTLY, ‘it’s mine.’'
|
||||||
|
#
|
||||||
pleaded = tokens[7]
|
# pleaded = tokens[7]
|
||||||
assert pleaded.repvec.shape == (300,)
|
# assert pleaded.repvec.shape == (300,)
|
||||||
o = pleaded.repvec[:5]
|
# o = pleaded.repvec[:5]
|
||||||
assert sum(o) != 0
|
# assert sum(o) != 0
|
||||||
from numpy import dot
|
# from numpy import dot
|
||||||
from numpy.linalg import norm
|
# from numpy.linalg import norm
|
||||||
|
#
|
||||||
cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
|
# cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
|
||||||
words = [w for w in nlp.vocab if w.is_lower and w.has_repvec]
|
# words = [w for w in nlp.vocab if w.is_lower and w.has_repvec]
|
||||||
words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec))
|
# words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec))
|
||||||
words.reverse()
|
# words.reverse()
|
||||||
o = [w.orth_ for w in words[0:20]]
|
# o = [w.orth_ for w in words[0:20]]
|
||||||
assert o == [u'pleaded', u'pled', u'plead', u'confessed', u'interceded',
|
# assert o == [u'pleaded', u'pled', u'plead', u'confessed', u'interceded',
|
||||||
u'pleads', u'testified', u'conspired', u'motioned', u'demurred',
|
# u'pleads', u'testified', u'conspired', u'motioned', u'demurred',
|
||||||
u'countersued', u'remonstrated', u'begged', u'apologised',
|
# u'countersued', u'remonstrated', u'begged', u'apologised',
|
||||||
u'consented', u'acquiesced', u'petitioned', u'quarreled',
|
# u'consented', u'acquiesced', u'petitioned', u'quarreled',
|
||||||
u'appealed', u'pleading']
|
# u'appealed', u'pleading']
|
||||||
o = [w.orth_ for w in words[50:60]]
|
# o = [w.orth_ for w in words[50:60]]
|
||||||
assert o == [u'martialed', u'counselled', u'bragged',
|
# assert o == [u'martialed', u'counselled', u'bragged',
|
||||||
u'backtracked', u'caucused', u'refiled', u'dueled', u'mused',
|
# u'backtracked', u'caucused', u'refiled', u'dueled', u'mused',
|
||||||
u'dissented', u'yearned']
|
# u'dissented', u'yearned']
|
||||||
o = [w.orth_ for w in words[100:110]]
|
# o = [w.orth_ for w in words[100:110]]
|
||||||
assert o == [u'acquits', u'cabled', u'ducked', u'sentenced',
|
# assert o == [u'acquits', u'cabled', u'ducked', u'sentenced',
|
||||||
u'gaoled', u'perjured', u'absconded', u'bargained', u'overstayed',
|
# u'gaoled', u'perjured', u'absconded', u'bargained', u'overstayed',
|
||||||
u'clerked']
|
# u'clerked']
|
||||||
|
#
|
||||||
#o = [w.orth_ for w in words[1000:1010]]
|
# #o = [w.orth_ for w in words[1000:1010]]
|
||||||
#assert o == [u'scorned', u'baled', u'righted', u'requested', u'swindled',
|
# #assert o == [u'scorned', u'baled', u'righted', u'requested', u'swindled',
|
||||||
# u'posited', u'firebombed', u'slimed', u'deferred', u'sagged']
|
# # u'posited', u'firebombed', u'slimed', u'deferred', u'sagged']
|
||||||
#o = [w.orth_ for w in words[50000:50010]]
|
# #o = [w.orth_ for w in words[50000:50010]]
|
||||||
#assert o == [u'fb', u'ford', u'systems', u'puck', u'anglers', u'ik', u'tabloid',
|
# #assert o == [u'fb', u'ford', u'systems', u'puck', u'anglers', u'ik', u'tabloid',
|
||||||
# u'dirty', u'rims', u'artists']
|
# # u'dirty', u'rims', u'artists']
|
||||||
|
|
Loading…
Reference in New Issue
Block a user