diff --git a/spacy/en.pyx b/spacy/en.pyx
index a51349116..f29e45c9c 100644
--- a/spacy/en.pyx
+++ b/spacy/en.pyx
@@ -37,13 +37,7 @@ provides a fully Penn Treebank 3-compliant tokenizer.
 
 from __future__ import unicode_literals
 
-from libc.stdint cimport uint64_t
-
 cimport lang
-from spacy.lexeme cimport lexeme_check_flag
-from spacy.lexeme cimport lexeme_string_view
-
-from spacy import orth
 
 
 cdef class English(Language):
diff --git a/spacy/lang.pyx b/spacy/lang.pyx
index 35d1838b2..73f5d358a 100644
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@@ -15,7 +15,7 @@ import re
 
 from .util import read_lang_data
 from spacy.tokens import Tokens
-from spacy.lexeme cimport LexemeC, lexeme_init, lexeme_pack, lexeme_unpack
+from spacy.lexeme cimport LexemeC, get_lexeme_dict, lexeme_pack, lexeme_unpack
 from murmurhash.mrmr cimport hash64
 
 from cpython.ref cimport Py_INCREF
@@ -30,99 +30,11 @@ from spacy import orth
 from spacy import util
 
 
-cdef enum Flags:
-    Flag_IsAlpha
-    Flag_IsAscii
-    Flag_IsDigit
-    Flag_IsLower
-    Flag_IsPunct
-    Flag_IsSpace
-    Flag_IsTitle
-    Flag_IsUpper
-
-    Flag_CanAdj
-    Flag_CanAdp
-    Flag_CanAdv
-    Flag_CanConj
-    Flag_CanDet
-    Flag_CanNoun
-    Flag_CanNum
-    Flag_CanPdt
-    Flag_CanPos
-    Flag_CanPron
-    Flag_CanPrt
-    Flag_CanPunct
-    Flag_CanVerb
-
-    Flag_OftLower
-    Flag_OftTitle
-    Flag_OftUpper
-    Flag_N
-
-
-cdef enum Views:
-    View_CanonForm
-    View_WordShape
-    View_NonSparse
-    View_Asciied
-    View_N
-
-
-# Assign the flag and view functions by enum value.
-# This is verbose, but it ensures we don't get nasty order sensitivities.
-STRING_VIEW_FUNCS = [None] * View_N
-STRING_VIEW_FUNCS[View_CanonForm] = orth.canon_case
-STRING_VIEW_FUNCS[View_WordShape] = orth.word_shape
-STRING_VIEW_FUNCS[View_NonSparse] = orth.non_sparse
-STRING_VIEW_FUNCS[View_Asciied] = orth.asciied
-
-FLAG_FUNCS = [None] * Flag_N
-FLAG_FUNCS[Flag_IsAlpha] = orth.is_alpha
-FLAG_FUNCS[Flag_IsAscii] = orth.is_ascii
-FLAG_FUNCS[Flag_IsDigit] = orth.is_digit
-FLAG_FUNCS[Flag_IsLower] = orth.is_lower
-FLAG_FUNCS[Flag_IsPunct] = orth.is_punct
-FLAG_FUNCS[Flag_IsSpace] = orth.is_space
-FLAG_FUNCS[Flag_IsTitle] = orth.is_title
-FLAG_FUNCS[Flag_IsUpper] = orth.is_upper
-
-FLAG_FUNCS[Flag_CanAdj] = orth.can_tag('ADJ')
-FLAG_FUNCS[Flag_CanAdp] = orth.can_tag('ADP')
-FLAG_FUNCS[Flag_CanAdv] = orth.can_tag('ADV')
-FLAG_FUNCS[Flag_CanConj] = orth.can_tag('CONJ')
-FLAG_FUNCS[Flag_CanDet] = orth.can_tag('DET')
-FLAG_FUNCS[Flag_CanNoun] = orth.can_tag('NOUN')
-FLAG_FUNCS[Flag_CanNum] = orth.can_tag('NUM')
-FLAG_FUNCS[Flag_CanPdt] = orth.can_tag('PDT')
-FLAG_FUNCS[Flag_CanPos] = orth.can_tag('POS')
-FLAG_FUNCS[Flag_CanPron] = orth.can_tag('PRON')
-FLAG_FUNCS[Flag_CanPrt] = orth.can_tag('PRT')
-FLAG_FUNCS[Flag_CanPunct] = orth.can_tag('PUNCT')
-FLAG_FUNCS[Flag_CanVerb] = orth.can_tag('VERB')
-
-FLAG_FUNCS[Flag_OftLower] = orth.oft_case('lower', 0.7)
-FLAG_FUNCS[Flag_OftTitle] = orth.oft_case('title', 0.7)
-FLAG_FUNCS[Flag_OftUpper] = orth.oft_case('upper', 0.7)
-
-
 cdef class Language:
     """Base class for language-specific tokenizers.
 
-    Most subclasses will override the _split or _split_one methods, which take
-    a string of non-whitespace characters and output a list of strings.  This
-    function is called by _tokenize, which sits behind a cache and turns the
-    list of strings into Lexeme objects via the Lexicon. Most languages will not
-    need to override _tokenize or tokenize.
-
-    The language is supplied a list of boolean functions, used to compute flag
-    features. These are passed to the language's Lexicon object.
-
     The language's name is used to look up default data-files, found in data/<name.
     """
-    fl_is_alpha = Flag_IsAlpha
-    fl_is_digit = Flag_IsDigit
-    v_shape = View_WordShape
-
     def __init__(self, name, user_string_features, user_flag_features):
         self.name = name
         self._mem = Pool()
@@ -131,9 +43,7 @@ cdef class Language:
         rules, prefix, suffix, lexemes = util.read_lang_data(name)
         self.prefix_re = re.compile(prefix)
         self.suffix_re = re.compile(suffix)
-        self.lexicon = Lexicon(lexemes,
-                               STRING_VIEW_FUNCS + user_string_features,
-                               FLAG_FUNCS + user_flag_features)
+        self.lexicon = Lexicon(lexemes)
         self._load_special_tokenization(rules)
 
     property nr_types:
@@ -155,17 +65,17 @@ cdef class Language:
     cpdef Tokens tokenize(self, unicode string):
         """Tokenize a string.
 
-        The tokenization rules are defined in two places:
+        The tokenization rules are defined in three places:
 
         * The data/<lang>/tokenization table, which handles special cases like contractions;
-        * The appropriate :py:meth:`find_split` function, which is used to split
-          off punctuation etc.
+        * The data/<lang>/prefix file, used to build a regex to split off prefixes;
+        * The data/<lang>/suffix file, used to build a regex to split off suffixes.
 
         Args:
             string (unicode): The string to be tokenized. 
 
         Returns:
-            tokens (Tokens): A Tokens object, giving access to a sequence of LexIDs.
+            tokens (Tokens): A Tokens object, giving access to a sequence of Lexemes.
         """
         cdef size_t length = len(string)
         cdef Tokens tokens = Tokens(length)
@@ -339,10 +249,8 @@ cdef class Language:
 
 
 cdef class Lexicon:
-    def __cinit__(self, lexemes, string_features, flag_features):
+    def __cinit__(self, lexemes):
         self._mem = Pool()
-        self._flag_features = flag_features
-        self._string_features = string_features
         self._dict = PreshMap(2 ** 20)
         self.size = 0
         cdef String string
@@ -351,29 +259,22 @@ cdef class Lexicon:
         for lexeme_dict in lexemes:
             string_from_unicode(&string, lexeme_dict['string'])
             lexeme = <LexemeC*>self._mem.alloc(1, sizeof(LexemeC))
-            lexeme.views = <char**>self._mem.alloc(len(string_features), sizeof(char*))
             lexeme_unpack(lexeme, lexeme_dict)
             self._dict.set(string.key, lexeme)
             self.size += 1
 
     cdef LexemeC* get(self, String* string) except NULL:
-        cdef LexemeC* lexeme
-        lexeme = <LexemeC*>self._dict.get(string.key)
-        if lexeme != NULL:
-            return lexeme
-        
-        cdef unicode uni_string = string.chars[:string.n]
-        views = [string_view(uni_string, 0.0, 0, {}, {})
-                 for string_view in self._string_features]
-        flags = set()
-        for i, flag_feature in enumerate(self._flag_features):
-            if flag_feature(uni_string, 0.0, {}, {}):
-                flags.add(i)
- 
-        lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags)
-        self._dict.set(string.key, lexeme)
+        cdef LexemeC* lex
+        lex = <LexemeC*>self._dict.get(string.key)
+        if lex != NULL:
+            return lex
+
+        lex = <LexemeC*>self._mem.alloc(1, sizeof(LexemeC))
+        cdef unicode unicode_string = string.chars[:string.n]
+        lexeme_unpack(lex, get_lexeme_dict(self.size, unicode_string))
+        self._dict.set(string.key, lex)
         self.size += 1
-        return lexeme
+        return lex
 
     cpdef Lexeme lookup(self, unicode uni_string):
         """Retrieve (or create, if not found) a Lexeme for a string, and return it.
diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd
index 09d10d0b7..d7c85619d 100644
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@@ -70,6 +70,7 @@ cdef struct LexemeC:
     flag_t orth_flags
     flag_t dist_flags
 
+cpdef dict get_lexeme_dict(size_t i, unicode string)
 
 cdef char* intern_and_encode(unicode string, size_t* length) except NULL
 
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index d09dfb72d..b84ed4a02 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -6,7 +6,7 @@ import orth
 OOV_DIST_FLAGS = 0
 
 
-def get_lexeme_dict(size_t i, unicode string):
+cpdef dict get_lexeme_dict(size_t i, unicode string):
     ints = [None for _ in range(LexInt_N)]
     ints[<int>LexInt_i] = i
     ints[<int>LexInt_length] = len(string)
@@ -18,13 +18,12 @@ def get_lexeme_dict(size_t i, unicode string):
     floats[<int>LexFloat_prob] = 0
     floats[<int>LexFloat_sentiment] = 0
 
-    cdef size_t length
     strings = [None for _ in range(LexStr_N)]
-    strings[<int>LexStr_key] = intern_and_encode(string, &length)
+    strings[<int>LexStr_key] = string
     strings[<int>LexStr_casefix] = strings[<int>LexStr_key]
-    strings[<int>LexStr_shape] = intern_and_encode(orth.word_shape(string), &length)
+    strings[<int>LexStr_shape] = orth.word_shape(string)
     strings[<int>LexStr_unsparse] = strings[<int>LexStr_shape]
-    strings[<int>LexStr_asciied] = intern_and_encode(orth.asciied(string), &length)
+    strings[<int>LexStr_asciied] = orth.asciied(string)
 
     orth_flags = get_orth_flags(string)
     dist_flags = OOV_DIST_FLAGS
@@ -33,8 +32,18 @@ def get_lexeme_dict(size_t i, unicode string):
             'orth_flags': orth_flags, 'dist_flags': dist_flags}
 
 def get_orth_flags(unicode string):
-    return 0
+    cdef flag_t flags = 0
 
+    flags |= orth.is_ascii(string) << LexOrth_ascii
+    flags |= orth.is_alpha(string) << LexOrth_alpha
+    flags |= orth.is_digit(string) << LexOrth_digit
+    flags |= orth.is_lower(string) << LexOrth_lower
+    flags |= orth.is_punct(string) << LexOrth_punct
+    flags |= orth.is_space(string) << LexOrth_space
+    flags |= orth.is_title(string) << LexOrth_title
+    flags |= orth.is_upper(string) << LexOrth_upper
+    
+    return flags
 
 def get_dist_flags(unicode string):
     return 0
@@ -87,9 +96,9 @@ cdef int lexeme_unpack(LexemeC* lex, dict p) except -1:
     for i, lex_int in enumerate(p['ints']):
         lex.ints[i] = lex_int
     for i, lex_float in enumerate(p['floats']):
-        lex.ints[i] = lex_int
+        lex.floats[i] = lex_float
     cdef size_t _
     for i, lex_string in enumerate(p['strings']):
         lex.strings[i] = intern_and_encode(lex_string, &_)
     lex.orth_flags = p['orth_flags']
-    lex.orth_flags = p['orth_flags']
+    lex.dist_flags = p['dist_flags']
diff --git a/spacy/orth.py b/spacy/orth.py
index 685de191c..53dbcf863 100644
--- a/spacy/orth.py
+++ b/spacy/orth.py
@@ -1,6 +1,7 @@
 # -*- coding: utf8 -*-
 from __future__ import unicode_literals
 import unicodedata
+from unidecode import unidecode
 
 import math
 
@@ -9,15 +10,15 @@ TAGS = 'adj adp adv conj det noun num pdt pos pron prt punct verb'.upper().split
 
 
 # Binary string features
-def is_alpha(string, prob, case_stats, tag_stats):
+def is_alpha(string):
     return string.isalpha()
 
 
-def is_digit(string, prob, case_stats, tag_stats):
+def is_digit(string):
     return string.isdigit()
 
 
-def is_punct(string, prob, case_stats, tag_stats):
+def is_punct(string):
     for c in string:
         if not unicodedata.category(c).startswith('P'):
             return False
@@ -25,11 +26,11 @@ def is_punct(string, prob, case_stats, tag_stats):
         return True
 
 
-def is_space(string, prob, case_stats, tag_stats):
+def is_space(string):
     return string.isspace()
 
 
-def is_ascii(string, prob, case_stats, tag_stats):
+def is_ascii(string):
     for c in string:
         if ord(c) >= 128:
             return False
@@ -37,15 +38,15 @@ def is_ascii(string, prob, case_stats, tag_stats):
         return True
 
 
-def is_title(string, prob, case_stats, tag_stats):
+def is_title(string):
     return string.istitle()
 
 
-def is_lower(string, prob, case_stats, tag_stats):
+def is_lower(string):
     return string.islower()
 
 
-def is_upper(string, prob, case_stats, tag_stats):
+def is_upper(string):
     return string.isupper()
 
 
@@ -103,7 +104,7 @@ def word_shape(string, *args):
 
 
 def non_sparse(string, prob, cluster, case_stats, tag_stats):
-    if is_alpha(string, prob, case_stats, tag_stats):
+    if is_alpha(string):
         return canon_case(string, prob, cluster, case_stats, tag_stats)
     elif prob >= math.log(0.0001):
         return string
@@ -112,22 +113,5 @@ def non_sparse(string, prob, cluster, case_stats, tag_stats):
 
 
 def asciied(string, prob=0, cluster=0, case_stats=None, tag_stats=None):
-    '''"ASCIIfy" a Unicode string by stripping all umlauts, tildes, etc.''' 
-    # Snippet from
-    # http://www.physic.ut.ee/~kkannike/english/prog/python/util/asciify/index.html
-    # TODO: Rewrite and improve this
-    lookup_table = {
-        u'“': '"',
-        u'”': '"'
-    }
-    temp = u'' 
-    for char in string:
-        if char in lookup_table:
-            temp += lookup_table[char]
-        else:
-            decomp = unicodedata.decomposition(char)
-            if decomp: # Not an empty string 
-                temp += unichr(int(decomp.split()[0], 16))
-            else:
-                temp += char
-    return temp
+    ascii_string = unidecode(string)
+    return ascii_string.decode('ascii')
diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd
index bff4c7742..b138387bf 100644
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@@ -5,21 +5,17 @@ from libcpp.vector cimport vector
 cdef class Tokens:
     cdef vector[LexemeC*] *v
 
-    cpdef size_t id(self, size_t i) except 0
     cpdef unicode string(self, size_t i)
-    cpdef double prob(self, size_t i) except 1
-    cpdef size_t cluster(self, size_t i) except *
-    cpdef bint check_flag(self, size_t i, size_t flag_id) except *
+    cpdef float prob(self, size_t i) except 1
+    cpdef int cluster(self, size_t i) except *
+    cpdef bint check_orth_flag(self, size_t i, size_t flag_id) except *
+    cpdef bint check_dist_flag(self, size_t i, size_t flag_id) except *
     cpdef unicode string_view(self, size_t i, size_t view_id)
 
-    cpdef size_t canon(self, size_t i) except 0
-    cpdef size_t shape(self, size_t i) except 0
-    cpdef size_t non_sparse(self, size_t i) except 0
-    cpdef size_t asciied(self, size_t i) except 0
-    cpdef unicode canon_string(self, size_t i)
-    cpdef unicode shape_string(self, size_t i) 
-    cpdef unicode non_sparse_string(self, size_t i)
-    cpdef unicode asciied_string(self, size_t i)
+    cpdef unicode casefix(self, size_t i)
+    cpdef unicode shape(self, size_t i) 
+    cpdef unicode unsparse(self, size_t i)
+    cpdef unicode asciied(self, size_t i)
     cpdef bint is_alpha(self, size_t i) except *
     cpdef bint is_ascii(self, size_t i) except * 
     cpdef bint is_digit(self, size_t i) except *
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index 64ddf5c29..18f0c1533 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -1,45 +1,7 @@
 # cython: profile=True
-from spacy.word cimport Lexeme
-from spacy.lexeme cimport lexeme_check_flag
-from spacy.lexeme cimport lexeme_string_view
+from .word cimport Lexeme
 
-
-cdef enum Flags:
-    Flag_IsAlpha
-    Flag_IsAscii
-    Flag_IsDigit
-    Flag_IsLower
-    Flag_IsPunct
-    Flag_IsSpace
-    Flag_IsTitle
-    Flag_IsUpper
-
-    Flag_CanAdj
-    Flag_CanAdp
-    Flag_CanAdv
-    Flag_CanConj
-    Flag_CanDet
-    Flag_CanNoun
-    Flag_CanNum
-    Flag_CanPdt
-    Flag_CanPos
-    Flag_CanPron
-    Flag_CanPrt
-    Flag_CanPunct
-    Flag_CanVerb
-
-    Flag_OftLower
-    Flag_OftTitle
-    Flag_OftUpper
-    Flag_N
-
-
-cdef enum Views:
-    View_CanonForm
-    View_WordShape
-    View_NonSparse
-    View_Asciied
-    View_N
+from .lexeme cimport *
 
 
 cdef class Tokens:
@@ -79,120 +41,108 @@ cdef class Tokens:
         self.v.push_back(lexeme._c)
 
     cpdef unicode string(self, size_t i):
-        cdef bytes utf8_string = self.v.at(i).string[:self.v.at(i).length]
+        cdef bytes utf8_string = self.v.at(i).strings[<int>LexStr_key]
         cdef unicode string = utf8_string.decode('utf8')
         return string
 
-    cpdef size_t id(self, size_t i) except 0:
-        return <size_t>&self.v.at(i).string
+    cpdef float prob(self, size_t i) except 1:
+        return self.v.at(i).floats[<int>LexFloat_prob]
 
-    cpdef double prob(self, size_t i) except 1:
-        return self.v.at(i).prob
+    cpdef int cluster(self, size_t i) except *:
+        return self.v.at(i).ints[<int>LexInt_cluster]
 
-    cpdef size_t cluster(self, size_t i) except *:
-        return self.v.at(i).cluster
+    cpdef bint check_orth_flag(self, size_t i, size_t flag_id) except *:
+        return lexeme_check_orth_flag(self.v.at(i), flag_id)
 
-    cpdef bint check_flag(self, size_t i, size_t flag_id) except *:
-        return lexeme_check_flag(self.v.at(i), flag_id)
+    cpdef bint check_dist_flag(self, size_t i, size_t flag_id) except *:
+        return lexeme_check_dist_flag(self.v.at(i), flag_id)
 
     cpdef unicode string_view(self, size_t i, size_t view_id):
-        return lexeme_string_view(self.v.at(i), view_id)
+        return lexeme_get_string(self.v.at(i), view_id)
 
     # Provide accessor methods for the features supported by the language.
     # Without these, clients have to use the underlying string_view and check_flag
     # methods, which requires them to know the IDs.
-    cpdef unicode canon_string(self, size_t i):
-        return lexeme_string_view(self.v.at(i), View_CanonForm)
+    cpdef unicode casefix(self, size_t i):
+        return lexeme_get_string(self.v.at(i), LexStr_casefix)
 
-    cpdef unicode shape_string(self, size_t i):
-        return lexeme_string_view(self.v.at(i), View_WordShape)
+    cpdef unicode shape(self, size_t i):
+        return lexeme_get_string(self.v.at(i), LexStr_shape)
 
-    cpdef unicode non_sparse_string(self, size_t i):
-        return lexeme_string_view(self.v.at(i), View_NonSparse)
+    cpdef unicode unsparse(self, size_t i):
+        return lexeme_get_string(self.v.at(i), LexStr_unsparse)
 
-    cpdef unicode asciied_string(self, size_t i):
-        return lexeme_string_view(self.v.at(i), View_Asciied)
+    cpdef unicode asciied(self, size_t i):
+        return lexeme_get_string(self.v.at(i), LexStr_asciied)
 
-    cpdef size_t canon(self, size_t i) except *:
-        return id(self.v.at(i).views[<size_t>View_CanonForm])
-
-    cpdef size_t shape(self, size_t i) except *:
-        return id(self.v.at(i).views[<size_t>View_WordShape])
-
-    cpdef size_t non_sparse(self, size_t i) except *:
-        return id(self.v.at(i).views[<size_t>View_NonSparse])
-
-    cpdef size_t asciied(self, size_t i) except *:
-        return id(self.v.at(i).views[<size_t>View_Asciied])
-    
     cpdef bint is_alpha(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsAlpha)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_alpha)
 
     cpdef bint is_ascii(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsAscii)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_ascii)
 
     cpdef bint is_digit(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsDigit)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_digit)
 
     cpdef bint is_lower(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsLower)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_lower)
 
     cpdef bint is_punct(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsPunct)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_punct)
 
     cpdef bint is_space(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsSpace)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_space)
 
     cpdef bint is_title(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsTitle)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_title)
 
     cpdef bint is_upper(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_IsUpper)
+        return lexeme_check_orth_flag(self.v.at(i), LexOrth_upper)
 
     cpdef bint can_adj(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanAdj)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_adj)
 
     cpdef bint can_adp(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanAdp)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_adp)
 
     cpdef bint can_adv(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanAdv)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_adv)
 
     cpdef bint can_conj(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanConj)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_conj)
 
     cpdef bint can_det(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanDet)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_det)
 
     cpdef bint can_noun(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanNoun)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_noun)
 
     cpdef bint can_num(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanNum)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_num)
 
     cpdef bint can_pdt(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanPdt)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_pdt)
 
     cpdef bint can_pos(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanPos)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_pos)
 
     cpdef bint can_pron(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanPron)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_pron)
 
     cpdef bint can_prt(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanPrt)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_prt)
 
     cpdef bint can_punct(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanPunct)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_punct)
 
     cpdef bint can_verb(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_CanVerb)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_verb)
 
     cpdef bint oft_lower(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_OftLower)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_lower)
 
     cpdef bint oft_title(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_OftTitle)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_title)
 
     cpdef bint oft_upper(self, size_t i) except *:
-        return lexeme_check_flag(self.v.at(i), Flag_OftUpper)
+        return lexeme_check_dist_flag(self.v.at(i), LexDist_upper)
diff --git a/spacy/word.pxd b/spacy/word.pxd
index 5428d667a..ac9c7bb0e 100644
--- a/spacy/word.pxd
+++ b/spacy/word.pxd
@@ -7,5 +7,6 @@ DEF MAX_FLAG = 64
 cdef class Lexeme:
     cdef LexemeC* _c
 
-    cpdef bint check_flag(self, size_t flag_id) except *
+    cpdef bint check_orth_flag(self, size_t flag_id) except *
+    cpdef bint check_dist_flag(self, size_t flag_id) except *
     cpdef unicode string_view(self, size_t view_id)
diff --git a/spacy/word.pyx b/spacy/word.pyx
index 745832775..617e8809f 100644
--- a/spacy/word.pyx
+++ b/spacy/word.pyx
@@ -1,7 +1,10 @@
 # cython: profile=True
 # cython: embedsignature=True
 
-from spacy.lexeme cimport lexeme_check_flag, lexeme_string_view
+from .lexeme cimport lexeme_get_string
+from .lexeme cimport lexeme_check_orth_flag, lexeme_check_dist_flag
+
+from .lexeme cimport *
 
 
 cdef class Lexeme:
@@ -51,49 +54,27 @@ cdef class Lexeme:
 
     property string:
         def __get__(self):
-            cdef bytes utf8_string = self._c.string
+            cdef bytes utf8_string = self._c.strings[<int>LexStr_key]
             cdef unicode string = utf8_string.decode('utf8')
             return string
 
     property prob:
-        def __get__(self): return self._c.prob
+        def __get__(self):
+            return self._c.floats[<int>LexFloat_prob]
+
     property cluster:
-        def __get__(self): return self._c.cluster
+        def __get__(self):
+            return self._c.ints[<int>LexInt_cluster]
+
     property length:
-        def __get__(self): return self._c.length
+        def __get__(self):
+            return self._c.ints[<int>LexInt_length]
 
-    cpdef bint check_flag(self, size_t flag_id) except *:
-        """Lexemes may store language-specific boolean features in a bit-field,
-        with values accessed by providing an ID constant to this function.
+    cpdef bint check_orth_flag(self, size_t flag_id) except *:
+        return lexeme_check_orth_flag(self._c, flag_id)
 
-        The ID constants are exposed as global variables in the language module,
-        e.g.
-
-        >>> from spacy.en import EN
-        >>> lexeme = EN.lookup(u'Nasa')
-        >>> lexeme.check_flag(EN.IS_UPPER)
-        False
-        >>> lexeme.check_flag(EN.OFT_UPPER)
-        True
-        """
-        return lexeme_check_flag(self._c, flag_id)
+    cpdef bint check_dist_flag(self, size_t flag_id) except *:
+        return lexeme_check_dist_flag(self._c, flag_id)
 
     cpdef unicode string_view(self, size_t view_id):
-        """Lexemes may store language-specific string-view features, obtained
-        by transforming the string, possibly in light of distributional information.
-        The string-view features are accessed by providing an ID constant to this
-        function.
-
-        The ID constants are exposed as global variables in the language module,
-        e.g.
-
-        >>> from spacy.en import EN
-        >>> lexeme = EN.lookup(u'Nasa')
-        >>> lexeme.string_view(EN.CANON_CASED)
-        u'NASA'
-        >>> lexeme.string_view(EN.SHAPE)
-        u'Xxxx'
-        >>> lexeme.string_view(EN.NON_SPARSE)
-        u'Xxxx'
-        """
-        return lexeme_string_view(self._c, view_id)
+        return lexeme_get_string(self._c, view_id)
diff --git a/tests/test_flag_features.py b/tests/test_flag_features.py
index 61c013e68..2e20ea0e5 100644
--- a/tests/test_flag_features.py
+++ b/tests/test_flag_features.py
@@ -16,26 +16,26 @@ def words():
             "!d", "\nd"]
 
 def test_is_alpha(words):
-    assert is_alpha(words[0], 0, {}, {}) == False
-    assert is_alpha(words[1], 0, {}, {}) == False
-    assert is_alpha(words[2], 0, {}, {}) == False
-    assert is_alpha(words[3], 0, {}, {}) == True
-    assert is_alpha(words[4], 0, {}, {}) == True
-    assert is_alpha(words[5], 0, {}, {}) == False
-    assert is_alpha(words[6], 0, {}, {}) == False
-    assert is_alpha(words[7], 0, {}, {}) == False
-    assert is_alpha(words[8], 0, {}, {}) == False
-    assert is_alpha(words[9], 0, {}, {}) == False
+    assert not is_alpha(words[0])
+    assert not is_alpha(words[1])
+    assert not is_alpha(words[2])
+    assert is_alpha(words[3])
+    assert is_alpha(words[4])
+    assert not is_alpha(words[5])
+    assert not is_alpha(words[6])
+    assert not is_alpha(words[7])
+    assert not is_alpha(words[8])
+    assert not is_alpha(words[9])
 
 
 def test_is_digit(words):
-    assert is_digit(words[0], 0, {}, {}) == True
-    assert is_digit(words[1], 0, {}, {}) == False
-    assert is_digit(words[2], 0, {}, {}) == False
-    assert is_digit(words[3], 0, {}, {}) == False
-    assert is_digit(words[4], 0, {}, {}) == False
-    assert is_digit(words[5], 0, {}, {}) == False
-    assert is_digit(words[6], 0, {}, {}) == False
-    assert is_digit(words[7], 0, {}, {}) == False
-    assert is_digit(words[8], 0, {}, {}) == False
-    assert is_digit(words[9], 0, {}, {}) == False
+    assert is_digit(words[0])
+    assert not is_digit(words[1])
+    assert not is_digit(words[2])
+    assert not is_digit(words[3])
+    assert not is_digit(words[4])
+    assert not is_digit(words[5])
+    assert not is_digit(words[6])
+    assert not is_digit(words[7])
+    assert not is_digit(words[8])
+    assert not is_digit(words[9])
diff --git a/tests/test_is_punct.py b/tests/test_is_punct.py
index 687f5cf31..242e31212 100644
--- a/tests/test_is_punct.py
+++ b/tests/test_is_punct.py
@@ -5,12 +5,12 @@ from spacy.orth import is_punct
 
 
 def test_comma():
-    assert is_punct(',', 0, {}, {}) == True
+    assert is_punct(',')
 
 
 def test_space():
-    assert is_punct(' ', 0, {}, {}) == False
+    assert not is_punct(' ')
 
 
 def test_letter():
-    assert is_punct('a', 0, {}, {}) == False
+    assert not is_punct('a')
diff --git a/tests/test_lexeme_flags.py b/tests/test_lexeme_flags.py
index 4818c33b1..c6ff44757 100644
--- a/tests/test_lexeme_flags.py
+++ b/tests/test_lexeme_flags.py
@@ -3,23 +3,24 @@ from __future__ import unicode_literals
 import pytest
 
 from spacy.en import *
+from spacy.lexeme import *
 
 
 def test_is_alpha():
     the = EN.lookup('the')
-    assert the.check_flag(EN.fl_is_alpha)
+    assert the.check_orth_flag(LexOrth_alpha)
     year = EN.lookup('1999')
-    assert not year.check_flag(EN.fl_is_alpha)
+    assert not year.check_orth_flag(LexOrth_alpha)
     mixed = EN.lookup('hello1')
-    assert not mixed.check_flag(EN.fl_is_alpha)
+    assert not mixed.check_orth_flag(LexOrth_alpha)
 
 
 def test_is_digit():
     the = EN.lookup('the')
-    assert not the.check_flag(EN.fl_is_digit)
+    assert not the.check_orth_flag(LexOrth_digit)
     year = EN.lookup('1999')
-    assert year.check_flag(EN.fl_is_digit)
+    assert year.check_orth_flag(LexOrth_digit)
     mixed = EN.lookup('hello1')
-    assert not mixed.check_flag(EN.fl_is_digit)
+    assert not mixed.check_orth_flag(LexOrth_digit)
 
 
diff --git a/tests/test_orth.py b/tests/test_orth.py
index 0840af683..a6be98b05 100644
--- a/tests/test_orth.py
+++ b/tests/test_orth.py
@@ -4,6 +4,7 @@ import pytest
 
 import spacy.word
 from spacy.en import EN
+from spacy.lexeme import *
 
 
 @pytest.fixture
@@ -12,7 +13,7 @@ def C3P0():
 
 
 def test_shape(C3P0):
-    assert C3P0.string_view(EN.v_shape) == "XdXd"
+    assert C3P0.string_view(LexStr_shape) == "XdXd"
 
 
 def test_length():