mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						fec53f09f7
					
				| 
						 | 
				
			
			@ -13,12 +13,12 @@ cdef enum symbol_t:
 | 
			
		|||
    LIKE_EMAIL
 | 
			
		||||
    IS_STOP
 | 
			
		||||
    IS_OOV
 | 
			
		||||
    IS_BRACKET
 | 
			
		||||
    IS_QUOTE
 | 
			
		||||
    IS_LEFT_PUNCT
 | 
			
		||||
    IS_RIGHT_PUNCT
 | 
			
		||||
 | 
			
		||||
    FLAG14 = 14
 | 
			
		||||
    FLAG15
 | 
			
		||||
    FLAG16
 | 
			
		||||
    FLAG17
 | 
			
		||||
    FLAG18
 | 
			
		||||
    FLAG18 = 18
 | 
			
		||||
    FLAG19
 | 
			
		||||
    FLAG20
 | 
			
		||||
    FLAG21
 | 
			
		||||
| 
						 | 
				
			
			@ -455,15 +455,5 @@ cdef enum symbol_t:
 | 
			
		|||
    root
 | 
			
		||||
    xcomp
 | 
			
		||||
 | 
			
		||||
# Move these up to FLAG14--FLAG18 once we finish the functionality
 | 
			
		||||
# and are ready to regenerate the model.
 | 
			
		||||
#IS_BRACKET
 | 
			
		||||
#IS_QUOTE
 | 
			
		||||
#IS_LEFT_PUNCT
 | 
			
		||||
#IS_RIGHT_PUNCT
 | 
			
		||||
 | 
			
		||||
# These symbols are currently missing. However, if we add them currently,
 | 
			
		||||
# we'll throw off the integer index and the model will have to be retrained.
 | 
			
		||||
# We therefore wait until the next data version to add them.
 | 
			
		||||
# acl
 | 
			
		||||
 | 
			
		||||
    acl
 | 
			
		||||
    LAW
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,10 +18,11 @@ IDS = {
 | 
			
		|||
    "LIKE_EMAIL": LIKE_EMAIL,
 | 
			
		||||
    "IS_STOP": IS_STOP,
 | 
			
		||||
    "IS_OOV": IS_OOV,
 | 
			
		||||
    "FLAG14": FLAG14,
 | 
			
		||||
    "FLAG15": FLAG15,
 | 
			
		||||
    "FLAG16": FLAG16,
 | 
			
		||||
    "FLAG17": FLAG17,
 | 
			
		||||
    "IS_BRACKET": IS_BRACKET,
 | 
			
		||||
    "IS_QUOTE": IS_QUOTE,
 | 
			
		||||
    "IS_LEFT_PUNCT": IS_LEFT_PUNCT,
 | 
			
		||||
    "IS_RIGHT_PUNCT": IS_RIGHT_PUNCT,
 | 
			
		||||
 | 
			
		||||
    "FLAG18": FLAG18,
 | 
			
		||||
    "FLAG19": FLAG19,
 | 
			
		||||
    "FLAG20": FLAG20,
 | 
			
		||||
| 
						 | 
				
			
			@ -457,7 +458,10 @@ IDS = {
 | 
			
		|||
    "quantmod": quantmod,
 | 
			
		||||
    "rcmod": rcmod,
 | 
			
		||||
    "root": root,
 | 
			
		||||
    "xcomp": xcomp
 | 
			
		||||
    "xcomp": xcomp,
 | 
			
		||||
 | 
			
		||||
    "acl": acl,
 | 
			
		||||
    "LAW": LAW
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
def sort_nums(x):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,6 +37,10 @@
 | 
			
		|||
        +cell #[code WORK_OF_ART]
 | 
			
		||||
        +cell Titles of books, songs, etc.
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code LAW]
 | 
			
		||||
        +cell Named documents made into laws.
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code LANGUAGE]
 | 
			
		||||
        +cell Any named language.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -586,6 +586,16 @@ p The L2 norm of the token's vector representation.
 | 
			
		|||
        +cell bool
 | 
			
		||||
        +cell Is the token punctuation?
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code is_left_punct]
 | 
			
		||||
        +cell bool
 | 
			
		||||
        +cell Is the token a left punctuation mark, e.g. #[code (]?
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code is_right_punct]
 | 
			
		||||
        +cell bool
 | 
			
		||||
        +cell Is the token a right punctuation mark, e.g. #[code )]?
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code is_space]
 | 
			
		||||
        +cell bool
 | 
			
		||||
| 
						 | 
				
			
			@ -593,6 +603,16 @@ p The L2 norm of the token's vector representation.
 | 
			
		|||
            |  Does the token consist of whitespace characters? Equivalent to
 | 
			
		||||
            |  #[code token.text.isspace()].
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code is_bracket]
 | 
			
		||||
        +cell bool
 | 
			
		||||
        +cell Is the token a bracket?
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code is_quote]
 | 
			
		||||
        +cell bool
 | 
			
		||||
        +cell Is the token a quotation mark?
 | 
			
		||||
 | 
			
		||||
    +row
 | 
			
		||||
        +cell #[code like_url]
 | 
			
		||||
        +cell bool
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user