From fda94271afe84f85050ee9d0aa34d75f3079d7c9 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sat, 24 Jan 2015 06:17:03 +1100
Subject: [PATCH] * Rename NORM1 and NORM2 attrs to lower and norm

---
 spacy/en/__init__.py |  4 ++--
 spacy/en/attrs.pxd   | 10 ++++++----
 spacy/lexeme.pxd     | 26 +++++++++++++-------------
 spacy/lexeme.pyx     |  4 ++--
 spacy/structs.pxd    |  4 ++--
 spacy/tokens.pxd     |  4 ++--
 spacy/tokens.pyx     | 29 +++++++++++------------------
 spacy/typedefs.pxd   |  4 ++--
 spacy/vocab.pyx      |  4 ++--
 9 files changed, 42 insertions(+), 47 deletions(-)

diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py
index 131d7e174..bfd521a2a 100644
--- a/spacy/en/__init__.py
+++ b/spacy/en/__init__.py
@@ -20,8 +20,8 @@ def get_lex_props(string):
         'flags': get_flags(string),
         'length': len(string),
         'orth': string,
-        'norm1': string.lower(),
-        'norm2': string,
+        'lower': string.lower(),
+        'norm': string,
         'shape': orth.word_shape(string),
         'prefix': string[0],
         'suffix': string[-3:],
diff --git a/spacy/en/attrs.pxd b/spacy/en/attrs.pxd
index 3582e11ec..34f8e600b 100644
--- a/spacy/en/attrs.pxd
+++ b/spacy/en/attrs.pxd
@@ -2,13 +2,14 @@ from ..attrs cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
 from ..attrs cimport FLAG8, FLAG9, FLAG10
 from ..attrs cimport ORTH as _ORTH
 from ..attrs cimport SHAPE as _SHAPE
-from ..attrs cimport NORM1 as _NORM1
-from ..attrs cimport NORM2 as _NORM2
+from ..attrs cimport LOWER as _LOWER
+from ..attrs cimport NORM as _NORM
 from ..attrs cimport CLUSTER as _CLUSTER
 from ..attrs cimport PREFIX as _PREFIX
 from ..attrs cimport SUFFIX as _SUFFIX
 from ..attrs cimport LEMMA as _LEMMA
 from ..attrs cimport POS as _POS
+from ..attrs cimport TAG as _TAG
 
 
 cpdef enum:
@@ -26,10 +27,11 @@ cpdef enum:
 
     ORTH = _ORTH
     SHAPE = _SHAPE
-    LOWER = _NORM1
-    NORM2 = _NORM2
+    LOWER = _LOWER
+    NORM = _NORM
     PREFIX = _PREFIX
     SUFFIX = _SUFFIX
     CLUSTER = _CLUSTER
     LEMMA = _LEMMA
     POS = _POS
+    TAG = _TAG
diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd
index 4cec661c6..0723ed6fe 100644
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@@ -1,5 +1,5 @@
 from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t, attr_id_t
-from .typedefs cimport ID, ORTH, NORM1, NORM2, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
+from .typedefs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
 from .structs cimport LexemeC
 from .strings cimport StringStore
 
@@ -21,15 +21,15 @@ cdef class Lexeme:
     cdef readonly attr_t length
 
     cdef readonly attr_t orth
-    cdef readonly attr_t norm1
-    cdef readonly attr_t norm2
+    cdef readonly attr_t lower
+    cdef readonly attr_t norm
     cdef readonly attr_t shape
     cdef readonly attr_t prefix
     cdef readonly attr_t suffix
 
     cdef readonly unicode orth_
-    cdef readonly unicode norm1_
-    cdef readonly unicode norm2_
+    cdef readonly unicode lower_
+    cdef readonly unicode norm_
     cdef readonly unicode shape_
     cdef readonly unicode prefix_
     cdef readonly unicode suffix_
@@ -50,15 +50,15 @@ cdef class Lexeme:
         py.length = ptr.length
 
         py.orth = ptr.orth
-        py.norm1 = ptr.norm1
-        py.norm2 = ptr.norm2
+        py.lower = ptr.lower
+        py.norm = ptr.norm
         py.shape = ptr.shape
         py.prefix = ptr.prefix
         py.suffix = ptr.suffix
 
         py.orth_ = strings[ptr.orth]
-        py.norm1_ = strings[ptr.norm1]
-        py.norm2_ = strings[ptr.norm2]
+        py.lower_ = strings[ptr.lower]
+        py.norm_ = strings[ptr.norm]
         py.shape_ = strings[ptr.shape]
         py.prefix_ = strings[ptr.prefix]
         py.suffix_ = strings[ptr.suffix]
@@ -80,10 +80,10 @@ cdef inline attr_t get_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
         return lex.id
     elif feat_name == ORTH:
         return lex.orth
-    elif feat_name == NORM1:
-        return lex.norm1
-    elif feat_name == NORM2:
-        return lex.norm2
+    elif feat_name == LOWER:
+        return lex.norm
+    elif feat_name == NORM:
+        return lex.norm
     elif feat_name == SHAPE:
         return lex.shape
     elif feat_name == PREFIX:
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index 0d6e9f087..59e4741da 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -17,8 +17,8 @@ cdef int set_lex_struct_props(LexemeC* lex, dict props, StringStore string_store
                               const float* empty_vec) except -1:
     lex.length = props['length']
     lex.orth = string_store[props['orth']]
-    lex.norm1 = string_store[props['norm1']] 
-    lex.norm2 = string_store[props['norm2']] 
+    lex.lower = string_store[props['lower']] 
+    lex.norm = string_store[props['norm']] 
     lex.shape = string_store[props['shape']] 
     lex.prefix = string_store[props['prefix']]
     lex.suffix = string_store[props['suffix']]
diff --git a/spacy/structs.pxd b/spacy/structs.pxd
index 1b90abad5..1d6de506c 100644
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@@ -12,8 +12,8 @@ cdef struct LexemeC:
     attr_t length
 
     attr_t orth
-    attr_t norm1
-    attr_t norm2
+    attr_t lower
+    attr_t norm
     attr_t shape
     attr_t prefix
     attr_t suffix
diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd
index 617666bc1..25263db29 100644
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@@ -51,8 +51,8 @@ cdef class Token:
     cdef readonly attr_t cluster
     cdef readonly attr_t length
     cdef readonly attr_t orth
-    cdef readonly attr_t norm1
-    cdef readonly attr_t norm2
+    cdef readonly attr_t lower
+    cdef readonly attr_t norm
     cdef readonly attr_t shape
     cdef readonly attr_t prefix
     cdef readonly attr_t suffix
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index 03fa48e8e..19922cf4c 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -7,7 +7,7 @@ from preshed.counter cimport PreshCounter
 from .vocab cimport EMPTY_LEXEME
 from .typedefs cimport attr_id_t, attr_t
 from .typedefs cimport LEMMA
-from .typedefs cimport ID, ORTH, NORM1, NORM2, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
+from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
 from .typedefs cimport POS, LEMMA
 
 from unidecode import unidecode
@@ -44,10 +44,10 @@ cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
         return lex.id
     elif feat_name == ORTH:
         return lex.orth
-    elif feat_name == NORM1:
-        return lex.norm1
-    elif feat_name == NORM2:
-        return lex.norm2
+    elif feat_name == LOWER:
+        return lex.lower
+    elif feat_name == NORM:
+        return lex.norm
     elif feat_name == SHAPE:
         return lex.shape
     elif feat_name == PREFIX:
@@ -223,8 +223,8 @@ cdef class Token:
         self.cluster = t.lex.cluster
         self.length = t.lex.length
         self.orth = t.lex.orth
-        self.norm1 = t.lex.norm1
-        self.norm2 = t.lex.norm2
+        self.lower = t.lex.lower
+        self.norm = t.lex.norm
         self.shape = t.lex.shape
         self.prefix = t.lex.prefix
         self.suffix = t.lex.suffix
@@ -254,12 +254,6 @@ cdef class Token:
         """
         return self._seq.data[self.i].lex.length
 
-    def check_flag(self, attr_id_t flag):
-        return self.flags & (1 << flag)
-
-    def is_pos(self, univ_tag_t pos):
-        return self.tag == pos
-
     property head:
         """The token predicted by the parser to be the head of the current token."""
         def __get__(self):
@@ -267,7 +261,6 @@ cdef class Token:
             return Token(self._seq, self.i + t.head)
 
     property string:
-        """The unicode string of the word, with no whitespace padding."""
         def __get__(self):
             cdef const TokenC* t = &self._seq.data[self.i]
             if t.lex.orth == 0:
@@ -279,13 +272,13 @@ cdef class Token:
         def __get__(self):
             return self._seq.vocab.strings[self.orth]
 
-    property norm1_:
+    property lower_:
         def __get__(self):
-            return self._seq.vocab.strings[self.norm1]
+            return self._seq.vocab.strings[self.lower]
 
-    property norm2_:
+    property norm_:
         def __get__(self):
-            return self._seq.vocab.strings[self.norm2]
+            return self._seq.vocab.strings[self.norm]
 
     property shape_:
         def __get__(self):
diff --git a/spacy/typedefs.pxd b/spacy/typedefs.pxd
index 74575f4b7..9d086827a 100644
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@@ -90,8 +90,8 @@ cpdef enum attr_id_t:
 
     ID
     ORTH
-    NORM1
-    NORM2
+    LOWER
+    NORM
     SHAPE
     PREFIX
     SUFFIX
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index abede0404..8eb38b7ca 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -195,8 +195,8 @@ cdef class Vocab:
         for i in range(self.lexemes.size()):
             # Cast away the const, cos we can modify our lexemes
             lex = <LexemeC*>self.lexemes[i]
-            if lex.norm1 < vectors.size():
-                lex.repvec = vectors[lex.norm1]
+            if lex.lower < vectors.size():
+                lex.repvec = vectors[lex.lower]
             else:
                 lex.repvec = EMPTY_VEC