From 108f1f786e62b1fc713ca20ff9a1aaf32665824b Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 20 Oct 2017 13:08:44 +0200
Subject: [PATCH] Update symbols and document missing token attributes (see
 #1439)

---
 spacy/symbols.pxd      | 23 ++++++-----------------
 spacy/symbols.pyx      | 13 ++++++++-----
 website/api/token.jade | 20 ++++++++++++++++++++
 3 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd
index 4f1d35cf8..6960681a3 100644
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@@ -13,12 +13,12 @@ cdef enum symbol_t:
     LIKE_EMAIL
     IS_STOP
     IS_OOV
+    IS_BRACKET
+    IS_QUOTE
+    IS_LEFT_PUNCT
+    IS_RIGHT_PUNCT
 
-    FLAG14 = 14
-    FLAG15
-    FLAG16
-    FLAG17
-    FLAG18
+    FLAG18 = 18
     FLAG19
     FLAG20
     FLAG21
@@ -455,16 +455,5 @@ cdef enum symbol_t:
     root
     xcomp
 
-# Move these up to FLAG14--FLAG18 once we finish the functionality
-# and are ready to regenerate the model.
-#IS_BRACKET
-#IS_QUOTE
-#IS_LEFT_PUNCT
-#IS_RIGHT_PUNCT
-
-# These symbols are currently missing. However, if we add them currently,
-# we'll throw off the integer index and the model will have to be retrained.
-# We therefore wait until the next data version to add them.
-# acl
-
+    acl
     LAW
diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx
index f64577309..0e0337b6e 100644
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@@ -18,10 +18,11 @@ IDS = {
     "LIKE_EMAIL": LIKE_EMAIL,
     "IS_STOP": IS_STOP,
     "IS_OOV": IS_OOV,
-    "FLAG14": FLAG14,
-    "FLAG15": FLAG15,
-    "FLAG16": FLAG16,
-    "FLAG17": FLAG17,
+    "IS_BRACKET": IS_BRACKET,
+    "IS_QUOTE": IS_QUOTE,
+    "IS_LEFT_PUNCT": IS_LEFT_PUNCT,
+    "IS_RIGHT_PUNCT": IS_RIGHT_PUNCT,
+
     "FLAG18": FLAG18,
     "FLAG19": FLAG19,
     "FLAG20": FLAG20,
@@ -457,7 +458,9 @@ IDS = {
     "quantmod": quantmod,
     "rcmod": rcmod,
     "root": root,
-    "xcomp": xcomp
+    "xcomp": xcomp,
+
+    "acl": acl,
     "LAW": LAW
 }
 
diff --git a/website/api/token.jade b/website/api/token.jade
index 465d44c66..4062594b4 100644
--- a/website/api/token.jade
+++ b/website/api/token.jade
@@ -586,6 +586,16 @@ p The L2 norm of the token's vector representation.
         +cell bool
         +cell Is the token punctuation?
 
+    +row
+        +cell #[code is_left_punct]
+        +cell bool
+        +cell Is the token a left punctuation mark, e.g. #[code (]?
+
+    +row
+        +cell #[code is_right_punct]
+        +cell bool
+        +cell Is the token a right punctuation mark, e.g. #[code )]?
+
     +row
         +cell #[code is_space]
         +cell bool
@@ -593,6 +603,16 @@ p The L2 norm of the token's vector representation.
             |  Does the token consist of whitespace characters? Equivalent to
             |  #[code token.text.isspace()].
 
+    +row
+        +cell #[code is_bracket]
+        +cell bool
+        +cell Is the token a bracket?
+
+    +row
+        +cell #[code is_quote]
+        +cell bool
+        +cell Is the token a quotation mark?
+
     +row
         +cell #[code like_url]
         +cell bool