Tweak line spacing

This commit is contained in:
Jordan Suchow 2015-04-19 12:39:18 -07:00
parent 85603f5b6a
commit 38ed265b7d
24 changed files with 37 additions and 3 deletions

View File

@ -64,8 +64,6 @@ def clean(ext):
if os.path.exists(html): if os.path.exists(html):
os.unlink(html) os.unlink(html)
HERE = os.path.dirname(__file__) HERE = os.path.dirname(__file__)
virtual_env = os.environ.get('VIRTUAL_ENV', '') virtual_env = os.environ.get('VIRTUAL_ENV', '')
compile_args = [] compile_args = []

View File

@ -7,6 +7,7 @@ from spacy.lexeme import lex_of
from spacy import LEX, NORM, SHAPE, LAST3 from spacy import LEX, NORM, SHAPE, LAST3
def test_group_by_lex(): def test_group_by_lex():
tokens = en.tokenize("I like the red one and I like the blue one") tokens = en.tokenize("I like the red one and I like the blue one")
names, hashes, groups = tokens.group_by(LEX) names, hashes, groups = tokens.group_by(LEX)

View File

@ -40,6 +40,7 @@ def test_begin(state, sentence):
assert not state.is_valid('O') assert not state.is_valid('O')
assert not state.is_valid('U-PER') assert not state.is_valid('U-PER')
def test_in(state, sentence): def test_in(state, sentence):
state.transition('B-PER') state.transition('B-PER')
assert state.n_ents == 0 assert state.n_ents == 0

View File

@ -2,6 +2,7 @@
"""Sphinx doctest is just too hard. Manually paste doctest examples here""" """Sphinx doctest is just too hard. Manually paste doctest examples here"""
from spacy.en.attrs import IS_LOWER from spacy.en.attrs import IS_LOWER
def test_1(): def test_1():
import spacy.en import spacy.en
from spacy.parts_of_speech import ADV from spacy.parts_of_speech import ADV
@ -39,6 +40,7 @@ def test2():
nlp.vocab[u'quietly'].prob nlp.vocab[u'quietly'].prob
-11.07155704498291 -11.07155704498291
def test3(): def test3():
import spacy.en import spacy.en
from spacy.parts_of_speech import ADV from spacy.parts_of_speech import ADV

View File

@ -8,6 +8,7 @@ from spacy.en import English
def EN(): def EN():
return English() return English()
def test_tweebo_challenge(EN): def test_tweebo_challenge(EN):
text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ....""" text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ...."""
tokens = EN(text) tokens = EN(text)

View File

@ -16,6 +16,7 @@ def words():
return ["1997", "19.97", "hello9", "Hello", "HELLO", "Hello9", "\n", "!", return ["1997", "19.97", "hello9", "Hello", "HELLO", "Hello9", "\n", "!",
"!d", "\nd"] "!d", "\nd"]
def test_is_alpha(words): def test_is_alpha(words):
assert not is_alpha(words[0]) assert not is_alpha(words[0])
assert not is_alpha(words[1]) assert not is_alpha(words[1])

View File

@ -5,10 +5,12 @@ from spacy.strings import StringStore
import pytest import pytest
@pytest.fixture @pytest.fixture
def sstore(): def sstore():
return StringStore() return StringStore()
def test_save_bytes(sstore): def test_save_bytes(sstore):
Hello_i = sstore[b'Hello'] Hello_i = sstore[b'Hello']
assert Hello_i == 1 assert Hello_i == 1

View File

@ -2,10 +2,12 @@ import pytest
from spacy.en import English from spacy.en import English
@pytest.fixture @pytest.fixture
def EN(): def EN():
return English() return English()
def test_range_iter(EN): def test_range_iter(EN):
for i in range(len(EN.vocab)): for i in range(len(EN.vocab)):
lex = EN.vocab[i] lex = EN.vocab[i]

View File

@ -17,6 +17,7 @@ def morph_exc():
'PRP$': {'his': {'L': '-PRP-', 'person': 3, 'case': 2}}, 'PRP$': {'his': {'L': '-PRP-', 'person': 3, 'case': 2}},
} }
def test_load_exc(EN, morph_exc): def test_load_exc(EN, morph_exc):
EN.tagger.load_morph_exceptions(morph_exc) EN.tagger.load_morph_exceptions(morph_exc)
tokens = EN('I like his style.', tag=True) tokens = EN('I like his style.', tag=True)

View File

@ -3,6 +3,7 @@ from spacy.en import English
nlp = English() nlp = English()
def test_simple_types(): def test_simple_types():
tokens = nlp(u'Mr. Best flew to New York on Saturday morning.') tokens = nlp(u'Mr. Best flew to New York on Saturday morning.')
ents = list(tokens.ents) ents = list(tokens.ents)

View File

@ -3,6 +3,7 @@ import pytest
from spacy.en import English from spacy.en import English
def test_only_pre1(): def test_only_pre1():
EN = English() EN = English()
assert len(EN("(")) == 1 assert len(EN("(")) == 1

View File

@ -3,6 +3,7 @@ from spacy.en import English
import pytest import pytest
@pytest.fixture @pytest.fixture
def EN(): def EN():
return English() return English()

View File

@ -8,20 +8,26 @@ from spacy.orth import word_shape as ws
def test_capitalized(): def test_capitalized():
assert ws('Nasa') == 'Xxxx' assert ws('Nasa') == 'Xxxx'
def test_truncate(): def test_truncate():
assert ws('capitalized') == 'xxxx' assert ws('capitalized') == 'xxxx'
def test_digits(): def test_digits():
assert ws('999999999') == 'dddd' assert ws('999999999') == 'dddd'
def test_mix(): def test_mix():
assert ws('C3P0') == 'XdXd' assert ws('C3P0') == 'XdXd'
def test_punct(): def test_punct():
assert ws(',') == ',' assert ws(',') == ','
def test_space(): def test_space():
assert ws('\n') == '\n' assert ws('\n') == '\n'
def test_punct_seq(): def test_punct_seq():
assert ws('``,-') == '``,-' assert ws('``,-') == '``,-'

View File

@ -13,9 +13,11 @@ def EN():
def test_no_special(EN): def test_no_special(EN):
assert len(EN("(can)")) == 3 assert len(EN("(can)")) == 3
def test_no_punct(EN): def test_no_punct(EN):
assert len(EN("can't")) == 2 assert len(EN("can't")) == 2
def test_prefix(EN): def test_prefix(EN):
assert len(EN("(can't")) == 3 assert len(EN("(can't")) == 3

View File

@ -1,6 +1,7 @@
from spacy.en import English from spacy.en import English
import six import six
def test_tag_names(): def test_tag_names():
nlp = English() nlp = English()
tokens = nlp(u'I ate pizzas with anchovies.', parse=True, tag=True) tokens = nlp(u'I ate pizzas with anchovies.', parse=True, tag=True)

View File

@ -6,6 +6,7 @@ import pytest
NLU = English() NLU = English()
def test_am_pm(): def test_am_pm():
numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'] numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
variants = ['a.m.', 'am', 'p.m.', 'pm'] variants = ['a.m.', 'am', 'p.m.', 'pm']

View File

@ -4,6 +4,7 @@ import pytest
from spacy.en import English from spacy.en import English
from spacy.parts_of_speech import ADV from spacy.parts_of_speech import ADV
@pytest.fixture @pytest.fixture
def nlp(): def nlp():
return English() return English()

View File

@ -7,6 +7,8 @@ from spacy.en.attrs import IS_STOP
import pytest import pytest
nlp = English() nlp = English()
@pytest.fixture @pytest.fixture
def token(): def token():
tokens = nlp(u'Give it back! He pleaded.') tokens = nlp(u'Give it back! He pleaded.')

View File

@ -31,6 +31,7 @@ def _orphan_from_list(toks):
lst.append(tok) lst.append(tok)
return lst return lst
def test_list_orphans(): def test_list_orphans():
# Test case from NSchrading # Test case from NSchrading
nlp = English() nlp = English()

View File

@ -10,10 +10,12 @@ from spacy.en import English
def EN(): def EN():
return English().tokenizer return English().tokenizer
def test_no_word(EN): def test_no_word(EN):
tokens = EN(u'') tokens = EN(u'')
assert len(tokens) == 0 assert len(tokens) == 0
def test_single_word(EN): def test_single_word(EN):
tokens = EN(u'hello') tokens = EN(u'hello')
assert tokens[0].orth_ == 'hello' assert tokens[0].orth_ == 'hello'
@ -60,6 +62,7 @@ def test_contraction_punct(EN):
tokens = EN("can't!") tokens = EN("can't!")
assert len(tokens) == 3 assert len(tokens) == 3
def test_sample(EN): def test_sample(EN):
text = """Tributes pour in for late British Labour Party leader text = """Tributes pour in for late British Labour Party leader

View File

@ -3,6 +3,7 @@ from spacy.en import English
import pytest import pytest
@pytest.fixture @pytest.fixture
def tokens(): def tokens():
nlp = English() nlp = English()

View File

@ -2,6 +2,7 @@ from __future__ import unicode_literals
from spacy.orth import like_url from spacy.orth import like_url
def test_basic_url(): def test_basic_url():
assert like_url('www.google.com') assert like_url('www.google.com')
assert like_url('google.com') assert like_url('google.com')

View File

@ -4,15 +4,18 @@ from spacy.en import English
import pytest import pytest
@pytest.fixture @pytest.fixture
def EN(): def EN():
return English() return English()
def test_vec(EN): def test_vec(EN):
hype = EN.vocab['hype'] hype = EN.vocab['hype']
assert hype.orth_ == 'hype' assert hype.orth_ == 'hype'
assert 0.08 >= hype.repvec[0] > 0.07 assert 0.08 >= hype.repvec[0] > 0.07
def test_capitalized(EN): def test_capitalized(EN):
hype = EN.vocab['Hype'] hype = EN.vocab['Hype']
assert hype.orth_ == 'Hype' assert hype.orth_ == 'Hype'