* Pass tests. Need to implement more feature functions.

2025-09-18 10:02:40 +03:00 · 2014-08-30 20:36:06 +02:00 · 2014-08-30 20:36:06 +02:00 · 8bbfadfced
commit 8bbfadfced
parent dcab14ede2
5 changed files with 21 additions and 22 deletions
--- a/spacy/orth.py
+++ b/spacy/orth.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 # Binary string features
 def is_alpha(string, prob, case_stats, tag_stats):
    return False
@ -41,6 +43,7 @@ def can_tag(name, thresh):
 def canon_case(string, prob, cluster, case_stats, tag_stats):
    return string

+
 def word_shape(string, *args):
    length = len(string)
    shape = ""
--- a/spacy/word.pxd
+++ b/spacy/word.pxd
@ -11,7 +11,7 @@ cdef class Lexeme:
    cpdef readonly double prob
    cpdef readonly size_t cluster

-    cpdef readonly string
+    cpdef readonly unicode string
    cpdef readonly list views

    cdef readonly flag_t flags
--- a/spacy/word.pyx
+++ b/spacy/word.pyx
@ -54,6 +54,7 @@ cdef class Lexeme:
        self.string = string

        self.views = []
+        cdef unicode view
        for string_feature in string_features:
            view = string_feature(string, prob, cluster, case_stats, tag_stats)
            self.views.append(view)
--- a/tests/test_vocab.py
+++ b/tests/test_vocab.py
@ -1,37 +1,34 @@
 from __future__ import unicode_literals

-from spacy import lex_of
-from spacy.en import lookup
-from spacy.en import unhash
+from spacy.en import EN


 def test_neq():
-    addr = lookup('Hello')
-    assert lookup('bye') != addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('bye') != addr


 def test_eq():
-    addr = lookup('Hello')
-    assert lookup('Hello') == addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('Hello') == addr


 def test_round_trip():
-    hello = lookup('Hello')
-    assert unhash(hello.lex) == 'Hello'
+    hello = EN.lookup('Hello')
+    assert hello.string == 'Hello'


 def test_case_neq():
-    addr = lookup('Hello')
-    assert lookup('hello') != addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('hello') != addr


 def test_punct_neq():
-    addr = lookup('Hello')
-    assert lookup('Hello,') != addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('Hello,') != addr


 def test_short():
-    addr = lookup('I')
-    assert unhash(addr.lex) == 'I'
-    addr = lookup('not')
-    assert unhash(addr.lex) == 'not'
+    addr = EN.lookup('I')
+    assert addr.string == 'I'
+    assert addr.string != 'not'
--- a/tests/test_wiki_sun.py
+++ b/tests/test_wiki_sun.py
@ -1,8 +1,6 @@
 from __future__ import unicode_literals

-from spacy.en import unhash
-from spacy import lex_of
-from spacy import en
+from spacy.en import EN
 from spacy.util import utf8open

 import pytest
@ -21,5 +19,5 @@ def sun_txt():

 def test_tokenize(sun_txt):
    assert len(sun_txt) != 0
-    tokens = en.tokenize(sun_txt)
+    tokens = EN.tokenize(sun_txt)
    assert True