Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-07-09 06:13:08 +03:00 · 2017-10-20 16:28:34 +02:00 · 2017-10-20 16:28:34 +02:00 · fec53f09f7
commit fec53f09f7
parent f111b228e0 108f1f786e
4 changed files with 40 additions and 22 deletions
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@ -13,12 +13,12 @@ cdef enum symbol_t:
    LIKE_EMAIL
    IS_STOP
    IS_OOV
    IS_BRACKET
    IS_QUOTE
    IS_LEFT_PUNCT
    IS_RIGHT_PUNCT
-    FLAG14 = 14
+    FLAG18 = 18
    FLAG15
    FLAG16
    FLAG17
    FLAG18
    FLAG19
    FLAG20
    FLAG21
@ -455,15 +455,5 @@ cdef enum symbol_t:
    root
    xcomp
-# Move these up to FLAG14--FLAG18 once we finish the functionality
+    acl
-# and are ready to regenerate the model.
+    LAW
 #IS_BRACKET
 #IS_QUOTE
 #IS_LEFT_PUNCT
 #IS_RIGHT_PUNCT
 # These symbols are currently missing. However, if we add them currently,
 # we'll throw off the integer index and the model will have to be retrained.
 # We therefore wait until the next data version to add them.
 # acl
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@ -18,10 +18,11 @@ IDS = {
    "LIKE_EMAIL": LIKE_EMAIL,
    "IS_STOP": IS_STOP,
    "IS_OOV": IS_OOV,
-    "FLAG14": FLAG14,
+    "IS_BRACKET": IS_BRACKET,
-    "FLAG15": FLAG15,
+    "IS_QUOTE": IS_QUOTE,
-    "FLAG16": FLAG16,
+    "IS_LEFT_PUNCT": IS_LEFT_PUNCT,
-    "FLAG17": FLAG17,
+    "IS_RIGHT_PUNCT": IS_RIGHT_PUNCT,
    "FLAG18": FLAG18,
    "FLAG19": FLAG19,
    "FLAG20": FLAG20,
@ -457,7 +458,10 @@ IDS = {
    "quantmod": quantmod,
    "rcmod": rcmod,
    "root": root,
-    "xcomp": xcomp
+    "xcomp": xcomp,
    "acl": acl,
    "LAW": LAW
 }
 def sort_nums(x):
--- a/website/api/_annotation/_named-entities.jade
+++ b/website/api/_annotation/_named-entities.jade
@ -37,6 +37,10 @@
        +cell #[code WORK_OF_ART]
        +cell Titles of books, songs, etc.
    +row
        +cell #[code LAW]
        +cell Named documents made into laws.
    +row
        +cell #[code LANGUAGE]
        +cell Any named language.
--- a/website/api/token.jade
+++ b/website/api/token.jade
@ -586,6 +586,16 @@ p The L2 norm of the token's vector representation.
        +cell bool
        +cell Is the token punctuation?
    +row
        +cell #[code is_left_punct]
        +cell bool
        +cell Is the token a left punctuation mark, e.g. #[code (]?
    +row
        +cell #[code is_right_punct]
        +cell bool
        +cell Is the token a right punctuation mark, e.g. #[code )]?
    +row
        +cell #[code is_space]
        +cell bool
@ -593,6 +603,16 @@ p The L2 norm of the token's vector representation.
            |  Does the token consist of whitespace characters? Equivalent to
            |  #[code token.text.isspace()].
    +row
        +cell #[code is_bracket]
        +cell bool
        +cell Is the token a bracket?
    +row
        +cell #[code is_quote]
        +cell bool
        +cell Is the token a quotation mark?
    +row
        +cell #[code like_url]
        +cell bool