* Pass tests. Need to implement more feature functions.

This commit is contained in:
Matthew Honnibal 2014-08-30 20:36:06 +02:00
parent dcab14ede2
commit 8bbfadfced
5 changed files with 21 additions and 22 deletions

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
# Binary string features
def is_alpha(string, prob, case_stats, tag_stats):
return False
@ -41,6 +43,7 @@ def can_tag(name, thresh):
def canon_case(string, prob, cluster, case_stats, tag_stats):
return string
def word_shape(string, *args):
length = len(string)
shape = ""

View File

@ -11,7 +11,7 @@ cdef class Lexeme:
cpdef readonly double prob
cpdef readonly size_t cluster
cpdef readonly string
cpdef readonly unicode string
cpdef readonly list views
cdef readonly flag_t flags

View File

@ -54,6 +54,7 @@ cdef class Lexeme:
self.string = string
self.views = []
cdef unicode view
for string_feature in string_features:
view = string_feature(string, prob, cluster, case_stats, tag_stats)
self.views.append(view)

View File

@ -1,37 +1,34 @@
from __future__ import unicode_literals
from spacy import lex_of
from spacy.en import lookup
from spacy.en import unhash
from spacy.en import EN
def test_neq():
addr = lookup('Hello')
assert lookup('bye') != addr
addr = EN.lookup('Hello')
assert EN.lookup('bye') != addr
def test_eq():
addr = lookup('Hello')
assert lookup('Hello') == addr
addr = EN.lookup('Hello')
assert EN.lookup('Hello') == addr
def test_round_trip():
hello = lookup('Hello')
assert unhash(hello.lex) == 'Hello'
hello = EN.lookup('Hello')
assert hello.string == 'Hello'
def test_case_neq():
addr = lookup('Hello')
assert lookup('hello') != addr
addr = EN.lookup('Hello')
assert EN.lookup('hello') != addr
def test_punct_neq():
addr = lookup('Hello')
assert lookup('Hello,') != addr
addr = EN.lookup('Hello')
assert EN.lookup('Hello,') != addr
def test_short():
addr = lookup('I')
assert unhash(addr.lex) == 'I'
addr = lookup('not')
assert unhash(addr.lex) == 'not'
addr = EN.lookup('I')
assert addr.string == 'I'
assert addr.string != 'not'

View File

@ -1,8 +1,6 @@
from __future__ import unicode_literals
from spacy.en import unhash
from spacy import lex_of
from spacy import en
from spacy.en import EN
from spacy.util import utf8open
import pytest
@ -21,5 +19,5 @@ def sun_txt():
def test_tokenize(sun_txt):
assert len(sun_txt) != 0
tokens = en.tokenize(sun_txt)
tokens = EN.tokenize(sun_txt)
assert True