mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* Consolidate and freeze symbols Instead of having symbol values defined in three potentially conflicting places (`spacy.attrs`, `spacy.parts_of_speech`, `spacy.symbols`), define all symbols in `spacy.symbols` and reference those values in `spacy.attrs` and `spacy.parts_of_speech`. Remove deprecated and placeholder symbols from `spacy.attrs.IDS`. Make `spacy.attrs.NAMES` and `spacy.symbols.NAMES` reverse dicts rather than lists in order to support future use of hash values in `attr_id_t`. Minor changes: * Use `uint64_t` for attrs in `Doc.to_array` to support future use of hash values * Remove unneeded attrs filter for error message in `Doc.to_array` * Remove unused attr `SENT_END` * Handle dynamic size of attr_id_t in Doc.to_array * Undo added warnings * Refactor to make Doc.to_array more similar to Doc.from_array * Improve refactoring
		
			
				
	
	
		
			468 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			468 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| from spacy.symbols import IDS, NAMES
 | |
| 
 | |
| V3_SYMBOLS = {
 | |
|     "": 0,
 | |
|     "IS_ALPHA": 1,
 | |
|     "IS_ASCII": 2,
 | |
|     "IS_DIGIT": 3,
 | |
|     "IS_LOWER": 4,
 | |
|     "IS_PUNCT": 5,
 | |
|     "IS_SPACE": 6,
 | |
|     "IS_TITLE": 7,
 | |
|     "IS_UPPER": 8,
 | |
|     "LIKE_URL": 9,
 | |
|     "LIKE_NUM": 10,
 | |
|     "LIKE_EMAIL": 11,
 | |
|     "IS_STOP": 12,
 | |
|     "IS_OOV_DEPRECATED": 13,
 | |
|     "IS_BRACKET": 14,
 | |
|     "IS_QUOTE": 15,
 | |
|     "IS_LEFT_PUNCT": 16,
 | |
|     "IS_RIGHT_PUNCT": 17,
 | |
|     "IS_CURRENCY": 18,
 | |
|     "FLAG19": 19,
 | |
|     "FLAG20": 20,
 | |
|     "FLAG21": 21,
 | |
|     "FLAG22": 22,
 | |
|     "FLAG23": 23,
 | |
|     "FLAG24": 24,
 | |
|     "FLAG25": 25,
 | |
|     "FLAG26": 26,
 | |
|     "FLAG27": 27,
 | |
|     "FLAG28": 28,
 | |
|     "FLAG29": 29,
 | |
|     "FLAG30": 30,
 | |
|     "FLAG31": 31,
 | |
|     "FLAG32": 32,
 | |
|     "FLAG33": 33,
 | |
|     "FLAG34": 34,
 | |
|     "FLAG35": 35,
 | |
|     "FLAG36": 36,
 | |
|     "FLAG37": 37,
 | |
|     "FLAG38": 38,
 | |
|     "FLAG39": 39,
 | |
|     "FLAG40": 40,
 | |
|     "FLAG41": 41,
 | |
|     "FLAG42": 42,
 | |
|     "FLAG43": 43,
 | |
|     "FLAG44": 44,
 | |
|     "FLAG45": 45,
 | |
|     "FLAG46": 46,
 | |
|     "FLAG47": 47,
 | |
|     "FLAG48": 48,
 | |
|     "FLAG49": 49,
 | |
|     "FLAG50": 50,
 | |
|     "FLAG51": 51,
 | |
|     "FLAG52": 52,
 | |
|     "FLAG53": 53,
 | |
|     "FLAG54": 54,
 | |
|     "FLAG55": 55,
 | |
|     "FLAG56": 56,
 | |
|     "FLAG57": 57,
 | |
|     "FLAG58": 58,
 | |
|     "FLAG59": 59,
 | |
|     "FLAG60": 60,
 | |
|     "FLAG61": 61,
 | |
|     "FLAG62": 62,
 | |
|     "FLAG63": 63,
 | |
|     "ID": 64,
 | |
|     "ORTH": 65,
 | |
|     "LOWER": 66,
 | |
|     "NORM": 67,
 | |
|     "SHAPE": 68,
 | |
|     "PREFIX": 69,
 | |
|     "SUFFIX": 70,
 | |
|     "LENGTH": 71,
 | |
|     "CLUSTER": 72,
 | |
|     "LEMMA": 73,
 | |
|     "POS": 74,
 | |
|     "TAG": 75,
 | |
|     "DEP": 76,
 | |
|     "ENT_IOB": 77,
 | |
|     "ENT_TYPE": 78,
 | |
|     "ENT_ID": 454,
 | |
|     "ENT_KB_ID": 452,
 | |
|     "HEAD": 79,
 | |
|     "SENT_START": 80,
 | |
|     "SPACY": 81,
 | |
|     "PROB": 82,
 | |
|     "LANG": 83,
 | |
|     "IDX": 455,
 | |
|     "ADJ": 84,
 | |
|     "ADP": 85,
 | |
|     "ADV": 86,
 | |
|     "AUX": 87,
 | |
|     "CONJ": 88,
 | |
|     "CCONJ": 89,
 | |
|     "DET": 90,
 | |
|     "INTJ": 91,
 | |
|     "NOUN": 92,
 | |
|     "NUM": 93,
 | |
|     "PART": 94,
 | |
|     "PRON": 95,
 | |
|     "PROPN": 96,
 | |
|     "PUNCT": 97,
 | |
|     "SCONJ": 98,
 | |
|     "SYM": 99,
 | |
|     "VERB": 100,
 | |
|     "X": 101,
 | |
|     "EOL": 102,
 | |
|     "SPACE": 103,
 | |
|     "DEPRECATED001": 104,
 | |
|     "DEPRECATED002": 105,
 | |
|     "DEPRECATED003": 106,
 | |
|     "DEPRECATED004": 107,
 | |
|     "DEPRECATED005": 108,
 | |
|     "DEPRECATED006": 109,
 | |
|     "DEPRECATED007": 110,
 | |
|     "DEPRECATED008": 111,
 | |
|     "DEPRECATED009": 112,
 | |
|     "DEPRECATED010": 113,
 | |
|     "DEPRECATED011": 114,
 | |
|     "DEPRECATED012": 115,
 | |
|     "DEPRECATED013": 116,
 | |
|     "DEPRECATED014": 117,
 | |
|     "DEPRECATED015": 118,
 | |
|     "DEPRECATED016": 119,
 | |
|     "DEPRECATED017": 120,
 | |
|     "DEPRECATED018": 121,
 | |
|     "DEPRECATED019": 122,
 | |
|     "DEPRECATED020": 123,
 | |
|     "DEPRECATED021": 124,
 | |
|     "DEPRECATED022": 125,
 | |
|     "DEPRECATED023": 126,
 | |
|     "DEPRECATED024": 127,
 | |
|     "DEPRECATED025": 128,
 | |
|     "DEPRECATED026": 129,
 | |
|     "DEPRECATED027": 130,
 | |
|     "DEPRECATED028": 131,
 | |
|     "DEPRECATED029": 132,
 | |
|     "DEPRECATED030": 133,
 | |
|     "DEPRECATED031": 134,
 | |
|     "DEPRECATED032": 135,
 | |
|     "DEPRECATED033": 136,
 | |
|     "DEPRECATED034": 137,
 | |
|     "DEPRECATED035": 138,
 | |
|     "DEPRECATED036": 139,
 | |
|     "DEPRECATED037": 140,
 | |
|     "DEPRECATED038": 141,
 | |
|     "DEPRECATED039": 142,
 | |
|     "DEPRECATED040": 143,
 | |
|     "DEPRECATED041": 144,
 | |
|     "DEPRECATED042": 145,
 | |
|     "DEPRECATED043": 146,
 | |
|     "DEPRECATED044": 147,
 | |
|     "DEPRECATED045": 148,
 | |
|     "DEPRECATED046": 149,
 | |
|     "DEPRECATED047": 150,
 | |
|     "DEPRECATED048": 151,
 | |
|     "DEPRECATED049": 152,
 | |
|     "DEPRECATED050": 153,
 | |
|     "DEPRECATED051": 154,
 | |
|     "DEPRECATED052": 155,
 | |
|     "DEPRECATED053": 156,
 | |
|     "DEPRECATED054": 157,
 | |
|     "DEPRECATED055": 158,
 | |
|     "DEPRECATED056": 159,
 | |
|     "DEPRECATED057": 160,
 | |
|     "DEPRECATED058": 161,
 | |
|     "DEPRECATED059": 162,
 | |
|     "DEPRECATED060": 163,
 | |
|     "DEPRECATED061": 164,
 | |
|     "DEPRECATED062": 165,
 | |
|     "DEPRECATED063": 166,
 | |
|     "DEPRECATED064": 167,
 | |
|     "DEPRECATED065": 168,
 | |
|     "DEPRECATED066": 169,
 | |
|     "DEPRECATED067": 170,
 | |
|     "DEPRECATED068": 171,
 | |
|     "DEPRECATED069": 172,
 | |
|     "DEPRECATED070": 173,
 | |
|     "DEPRECATED071": 174,
 | |
|     "DEPRECATED072": 175,
 | |
|     "DEPRECATED073": 176,
 | |
|     "DEPRECATED074": 177,
 | |
|     "DEPRECATED075": 178,
 | |
|     "DEPRECATED076": 179,
 | |
|     "DEPRECATED077": 180,
 | |
|     "DEPRECATED078": 181,
 | |
|     "DEPRECATED079": 182,
 | |
|     "DEPRECATED080": 183,
 | |
|     "DEPRECATED081": 184,
 | |
|     "DEPRECATED082": 185,
 | |
|     "DEPRECATED083": 186,
 | |
|     "DEPRECATED084": 187,
 | |
|     "DEPRECATED085": 188,
 | |
|     "DEPRECATED086": 189,
 | |
|     "DEPRECATED087": 190,
 | |
|     "DEPRECATED088": 191,
 | |
|     "DEPRECATED089": 192,
 | |
|     "DEPRECATED090": 193,
 | |
|     "DEPRECATED091": 194,
 | |
|     "DEPRECATED092": 195,
 | |
|     "DEPRECATED093": 196,
 | |
|     "DEPRECATED094": 197,
 | |
|     "DEPRECATED095": 198,
 | |
|     "DEPRECATED096": 199,
 | |
|     "DEPRECATED097": 200,
 | |
|     "DEPRECATED098": 201,
 | |
|     "DEPRECATED099": 202,
 | |
|     "DEPRECATED100": 203,
 | |
|     "DEPRECATED101": 204,
 | |
|     "DEPRECATED102": 205,
 | |
|     "DEPRECATED103": 206,
 | |
|     "DEPRECATED104": 207,
 | |
|     "DEPRECATED105": 208,
 | |
|     "DEPRECATED106": 209,
 | |
|     "DEPRECATED107": 210,
 | |
|     "DEPRECATED108": 211,
 | |
|     "DEPRECATED109": 212,
 | |
|     "DEPRECATED110": 213,
 | |
|     "DEPRECATED111": 214,
 | |
|     "DEPRECATED112": 215,
 | |
|     "DEPRECATED113": 216,
 | |
|     "DEPRECATED114": 217,
 | |
|     "DEPRECATED115": 218,
 | |
|     "DEPRECATED116": 219,
 | |
|     "DEPRECATED117": 220,
 | |
|     "DEPRECATED118": 221,
 | |
|     "DEPRECATED119": 222,
 | |
|     "DEPRECATED120": 223,
 | |
|     "DEPRECATED121": 224,
 | |
|     "DEPRECATED122": 225,
 | |
|     "DEPRECATED123": 226,
 | |
|     "DEPRECATED124": 227,
 | |
|     "DEPRECATED125": 228,
 | |
|     "DEPRECATED126": 229,
 | |
|     "DEPRECATED127": 230,
 | |
|     "DEPRECATED128": 231,
 | |
|     "DEPRECATED129": 232,
 | |
|     "DEPRECATED130": 233,
 | |
|     "DEPRECATED131": 234,
 | |
|     "DEPRECATED132": 235,
 | |
|     "DEPRECATED133": 236,
 | |
|     "DEPRECATED134": 237,
 | |
|     "DEPRECATED135": 238,
 | |
|     "DEPRECATED136": 239,
 | |
|     "DEPRECATED137": 240,
 | |
|     "DEPRECATED138": 241,
 | |
|     "DEPRECATED139": 242,
 | |
|     "DEPRECATED140": 243,
 | |
|     "DEPRECATED141": 244,
 | |
|     "DEPRECATED142": 245,
 | |
|     "DEPRECATED143": 246,
 | |
|     "DEPRECATED144": 247,
 | |
|     "DEPRECATED145": 248,
 | |
|     "DEPRECATED146": 249,
 | |
|     "DEPRECATED147": 250,
 | |
|     "DEPRECATED148": 251,
 | |
|     "DEPRECATED149": 252,
 | |
|     "DEPRECATED150": 253,
 | |
|     "DEPRECATED151": 254,
 | |
|     "DEPRECATED152": 255,
 | |
|     "DEPRECATED153": 256,
 | |
|     "DEPRECATED154": 257,
 | |
|     "DEPRECATED155": 258,
 | |
|     "DEPRECATED156": 259,
 | |
|     "DEPRECATED157": 260,
 | |
|     "DEPRECATED158": 261,
 | |
|     "DEPRECATED159": 262,
 | |
|     "DEPRECATED160": 263,
 | |
|     "DEPRECATED161": 264,
 | |
|     "DEPRECATED162": 265,
 | |
|     "DEPRECATED163": 266,
 | |
|     "DEPRECATED164": 267,
 | |
|     "DEPRECATED165": 268,
 | |
|     "DEPRECATED166": 269,
 | |
|     "DEPRECATED167": 270,
 | |
|     "DEPRECATED168": 271,
 | |
|     "DEPRECATED169": 272,
 | |
|     "DEPRECATED170": 273,
 | |
|     "DEPRECATED171": 274,
 | |
|     "DEPRECATED172": 275,
 | |
|     "DEPRECATED173": 276,
 | |
|     "DEPRECATED174": 277,
 | |
|     "DEPRECATED175": 278,
 | |
|     "DEPRECATED176": 279,
 | |
|     "DEPRECATED177": 280,
 | |
|     "DEPRECATED178": 281,
 | |
|     "DEPRECATED179": 282,
 | |
|     "DEPRECATED180": 283,
 | |
|     "DEPRECATED181": 284,
 | |
|     "DEPRECATED182": 285,
 | |
|     "DEPRECATED183": 286,
 | |
|     "DEPRECATED184": 287,
 | |
|     "DEPRECATED185": 288,
 | |
|     "DEPRECATED186": 289,
 | |
|     "DEPRECATED187": 290,
 | |
|     "DEPRECATED188": 291,
 | |
|     "DEPRECATED189": 292,
 | |
|     "DEPRECATED190": 293,
 | |
|     "DEPRECATED191": 294,
 | |
|     "DEPRECATED192": 295,
 | |
|     "DEPRECATED193": 296,
 | |
|     "DEPRECATED194": 297,
 | |
|     "DEPRECATED195": 298,
 | |
|     "DEPRECATED196": 299,
 | |
|     "DEPRECATED197": 300,
 | |
|     "DEPRECATED198": 301,
 | |
|     "DEPRECATED199": 302,
 | |
|     "DEPRECATED200": 303,
 | |
|     "DEPRECATED201": 304,
 | |
|     "DEPRECATED202": 305,
 | |
|     "DEPRECATED203": 306,
 | |
|     "DEPRECATED204": 307,
 | |
|     "DEPRECATED205": 308,
 | |
|     "DEPRECATED206": 309,
 | |
|     "DEPRECATED207": 310,
 | |
|     "DEPRECATED208": 311,
 | |
|     "DEPRECATED209": 312,
 | |
|     "DEPRECATED210": 313,
 | |
|     "DEPRECATED211": 314,
 | |
|     "DEPRECATED212": 315,
 | |
|     "DEPRECATED213": 316,
 | |
|     "DEPRECATED214": 317,
 | |
|     "DEPRECATED215": 318,
 | |
|     "DEPRECATED216": 319,
 | |
|     "DEPRECATED217": 320,
 | |
|     "DEPRECATED218": 321,
 | |
|     "DEPRECATED219": 322,
 | |
|     "DEPRECATED220": 323,
 | |
|     "DEPRECATED221": 324,
 | |
|     "DEPRECATED222": 325,
 | |
|     "DEPRECATED223": 326,
 | |
|     "DEPRECATED224": 327,
 | |
|     "DEPRECATED225": 328,
 | |
|     "DEPRECATED226": 329,
 | |
|     "DEPRECATED227": 330,
 | |
|     "DEPRECATED228": 331,
 | |
|     "DEPRECATED229": 332,
 | |
|     "DEPRECATED230": 333,
 | |
|     "DEPRECATED231": 334,
 | |
|     "DEPRECATED232": 335,
 | |
|     "DEPRECATED233": 336,
 | |
|     "DEPRECATED234": 337,
 | |
|     "DEPRECATED235": 338,
 | |
|     "DEPRECATED236": 339,
 | |
|     "DEPRECATED237": 340,
 | |
|     "DEPRECATED238": 341,
 | |
|     "DEPRECATED239": 342,
 | |
|     "DEPRECATED240": 343,
 | |
|     "DEPRECATED241": 344,
 | |
|     "DEPRECATED242": 345,
 | |
|     "DEPRECATED243": 346,
 | |
|     "DEPRECATED244": 347,
 | |
|     "DEPRECATED245": 348,
 | |
|     "DEPRECATED246": 349,
 | |
|     "DEPRECATED247": 350,
 | |
|     "DEPRECATED248": 351,
 | |
|     "DEPRECATED249": 352,
 | |
|     "DEPRECATED250": 353,
 | |
|     "DEPRECATED251": 354,
 | |
|     "DEPRECATED252": 355,
 | |
|     "DEPRECATED253": 356,
 | |
|     "DEPRECATED254": 357,
 | |
|     "DEPRECATED255": 358,
 | |
|     "DEPRECATED256": 359,
 | |
|     "DEPRECATED257": 360,
 | |
|     "DEPRECATED258": 361,
 | |
|     "DEPRECATED259": 362,
 | |
|     "DEPRECATED260": 363,
 | |
|     "DEPRECATED261": 364,
 | |
|     "DEPRECATED262": 365,
 | |
|     "DEPRECATED263": 366,
 | |
|     "DEPRECATED264": 367,
 | |
|     "DEPRECATED265": 368,
 | |
|     "DEPRECATED266": 369,
 | |
|     "DEPRECATED267": 370,
 | |
|     "DEPRECATED268": 371,
 | |
|     "DEPRECATED269": 372,
 | |
|     "DEPRECATED270": 373,
 | |
|     "DEPRECATED271": 374,
 | |
|     "DEPRECATED272": 375,
 | |
|     "DEPRECATED273": 376,
 | |
|     "DEPRECATED274": 377,
 | |
|     "DEPRECATED275": 378,
 | |
|     "DEPRECATED276": 379,
 | |
|     "PERSON": 380,
 | |
|     "NORP": 381,
 | |
|     "FACILITY": 382,
 | |
|     "ORG": 383,
 | |
|     "GPE": 384,
 | |
|     "LOC": 385,
 | |
|     "PRODUCT": 386,
 | |
|     "EVENT": 387,
 | |
|     "WORK_OF_ART": 388,
 | |
|     "LANGUAGE": 389,
 | |
|     "DATE": 391,
 | |
|     "TIME": 392,
 | |
|     "PERCENT": 393,
 | |
|     "MONEY": 394,
 | |
|     "QUANTITY": 395,
 | |
|     "ORDINAL": 396,
 | |
|     "CARDINAL": 397,
 | |
|     "acomp": 398,
 | |
|     "advcl": 399,
 | |
|     "advmod": 400,
 | |
|     "agent": 401,
 | |
|     "amod": 402,
 | |
|     "appos": 403,
 | |
|     "attr": 404,
 | |
|     "aux": 405,
 | |
|     "auxpass": 406,
 | |
|     "cc": 407,
 | |
|     "ccomp": 408,
 | |
|     "complm": 409,
 | |
|     "conj": 410,
 | |
|     "cop": 411,
 | |
|     "csubj": 412,
 | |
|     "csubjpass": 413,
 | |
|     "dep": 414,
 | |
|     "det": 415,
 | |
|     "dobj": 416,
 | |
|     "expl": 417,
 | |
|     "hmod": 418,
 | |
|     "hyph": 419,
 | |
|     "infmod": 420,
 | |
|     "intj": 421,
 | |
|     "iobj": 422,
 | |
|     "mark": 423,
 | |
|     "meta": 424,
 | |
|     "neg": 425,
 | |
|     "nmod": 426,
 | |
|     "nn": 427,
 | |
|     "npadvmod": 428,
 | |
|     "nsubj": 429,
 | |
|     "nsubjpass": 430,
 | |
|     "num": 431,
 | |
|     "number": 432,
 | |
|     "oprd": 433,
 | |
|     "obj": 434,
 | |
|     "obl": 435,
 | |
|     "parataxis": 436,
 | |
|     "partmod": 437,
 | |
|     "pcomp": 438,
 | |
|     "pobj": 439,
 | |
|     "poss": 440,
 | |
|     "possessive": 441,
 | |
|     "preconj": 442,
 | |
|     "prep": 443,
 | |
|     "prt": 444,
 | |
|     "punct": 445,
 | |
|     "quantmod": 446,
 | |
|     "rcmod": 448,
 | |
|     "relcl": 447,
 | |
|     "root": 449,
 | |
|     "xcomp": 450,
 | |
|     "acl": 451,
 | |
|     "LAW": 390,
 | |
|     "MORPH": 453,
 | |
|     "_": 456,
 | |
| }
 | |
| 
 | |
| 
 | |
| def test_frozen_symbols():
 | |
|     assert IDS == V3_SYMBOLS
 | |
|     assert NAMES == {v: k for k, v in IDS.items()}
 |