From 8bbfadfced722dded923aa2478684737bd0ca86c Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <matthew@honnibal.com>
Date: Sat, 30 Aug 2014 20:36:06 +0200
Subject: [PATCH] * Pass tests. Need to implement more feature functions.

---
 spacy/orth.py          |  3 +++
 spacy/word.pxd         |  2 +-
 spacy/word.pyx         |  1 +
 tests/test_vocab.py    | 31 ++++++++++++++-----------------
 tests/test_wiki_sun.py |  6 ++----
 5 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/spacy/orth.py b/spacy/orth.py
index c574006c8..847d7eae7 100644
--- a/spacy/orth.py
+++ b/spacy/orth.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 # Binary string features
 def is_alpha(string, prob, case_stats, tag_stats):
     return False
@@ -41,6 +43,7 @@ def can_tag(name, thresh):
 def canon_case(string, prob, cluster, case_stats, tag_stats):
     return string
 
+
 def word_shape(string, *args):
     length = len(string)
     shape = ""
diff --git a/spacy/word.pxd b/spacy/word.pxd
index 90c9c941a..c382d91ea 100644
--- a/spacy/word.pxd
+++ b/spacy/word.pxd
@@ -11,7 +11,7 @@ cdef class Lexeme:
     cpdef readonly double prob
     cpdef readonly size_t cluster
 
-    cpdef readonly string
+    cpdef readonly unicode string
     cpdef readonly list views
 
     cdef readonly flag_t flags
diff --git a/spacy/word.pyx b/spacy/word.pyx
index 9427e3397..8824d8a89 100644
--- a/spacy/word.pyx
+++ b/spacy/word.pyx
@@ -54,6 +54,7 @@ cdef class Lexeme:
         self.string = string
 
         self.views = []
+        cdef unicode view
         for string_feature in string_features:
             view = string_feature(string, prob, cluster, case_stats, tag_stats)
             self.views.append(view)
diff --git a/tests/test_vocab.py b/tests/test_vocab.py
index 6128e728a..706a7ee07 100644
--- a/tests/test_vocab.py
+++ b/tests/test_vocab.py
@@ -1,37 +1,34 @@
 from __future__ import unicode_literals
 
-from spacy import lex_of
-from spacy.en import lookup
-from spacy.en import unhash
+from spacy.en import EN
 
 
 def test_neq():
-    addr = lookup('Hello')
-    assert lookup('bye') != addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('bye') != addr
 
 
 def test_eq():
-    addr = lookup('Hello')
-    assert lookup('Hello') == addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('Hello') == addr
 
 
 def test_round_trip():
-    hello = lookup('Hello')
-    assert unhash(hello.lex) == 'Hello'
+    hello = EN.lookup('Hello')
+    assert hello.string == 'Hello'
 
 
 def test_case_neq():
-    addr = lookup('Hello')
-    assert lookup('hello') != addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('hello') != addr
 
 
 def test_punct_neq():
-    addr = lookup('Hello')
-    assert lookup('Hello,') != addr
+    addr = EN.lookup('Hello')
+    assert EN.lookup('Hello,') != addr
 
 
 def test_short():
-    addr = lookup('I')
-    assert unhash(addr.lex) == 'I'
-    addr = lookup('not')
-    assert unhash(addr.lex) == 'not'
+    addr = EN.lookup('I')
+    assert addr.string == 'I'
+    assert addr.string != 'not'
diff --git a/tests/test_wiki_sun.py b/tests/test_wiki_sun.py
index 1329bdffc..75194b7f5 100644
--- a/tests/test_wiki_sun.py
+++ b/tests/test_wiki_sun.py
@@ -1,8 +1,6 @@
 from __future__ import unicode_literals
 
-from spacy.en import unhash
-from spacy import lex_of
-from spacy import en
+from spacy.en import EN
 from spacy.util import utf8open
 
 import pytest
@@ -21,5 +19,5 @@ def sun_txt():
 
 def test_tokenize(sun_txt):
     assert len(sun_txt) != 0
-    tokens = en.tokenize(sun_txt)
+    tokens = EN.tokenize(sun_txt)
     assert True