mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-09 16:10:33 +03:00
* Fix empty values in attributes and parts of speech, so symbols align correctly with the StringStore
This commit is contained in:
parent
e18fbcb604
commit
20e909d2bb
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
IDS = {
|
IDS = {
|
||||||
"NO_TAG": NO_TAG,
|
"": NO_TAG,
|
||||||
"ADJ": ADJ,
|
"ADJ": ADJ,
|
||||||
"ADP": ADP,
|
"ADP": ADP,
|
||||||
"ADV": ADV,
|
"ADV": ADV,
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
SYMBOL_IDS = {
|
SYMBOL_IDS = {
|
||||||
"EMPTY_VALUE": EMPTY_VALUE,
|
|
||||||
"Attr_is_alpha": Attr_is_alpha,
|
"Attr_is_alpha": Attr_is_alpha,
|
||||||
"Attr_is_ascii": Attr_is_ascii,
|
"Attr_is_ascii": Attr_is_ascii,
|
||||||
"Attr_is_digit": Attr_is_digit,
|
"Attr_is_digit": Attr_is_digit,
|
||||||
|
|
|
@ -76,9 +76,11 @@ cdef class Vocab:
|
||||||
# strings are loaded first, because the vocab is open-class, and these
|
# strings are loaded first, because the vocab is open-class, and these
|
||||||
# symbols are closed class.
|
# symbols are closed class.
|
||||||
for name in attrs.NAMES:
|
for name in attrs.NAMES:
|
||||||
_ = self.strings[name]
|
if name:
|
||||||
|
_ = self.strings[name]
|
||||||
for name in parts_of_speech.NAMES:
|
for name in parts_of_speech.NAMES:
|
||||||
_ = self.strings[name]
|
if name:
|
||||||
|
_ = self.strings[name]
|
||||||
#for morph_name in UNIV_MORPH_NAMES:
|
#for morph_name in UNIV_MORPH_NAMES:
|
||||||
# _ = self.strings[morph_name]
|
# _ = self.strings[morph_name]
|
||||||
#for entity_type_name in entity_types.NAMES:
|
#for entity_type_name in entity_types.NAMES:
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from spacy.attrs import LEMMA, ORTH, PROB, IS_ALPHA
|
||||||
|
from spacy.parts_of_speech import NOUN, VERB
|
||||||
|
|
||||||
|
|
||||||
def test_neq(en_vocab):
|
def test_neq(en_vocab):
|
||||||
addr = en_vocab['Hello']
|
addr = en_vocab['Hello']
|
||||||
|
@ -25,3 +28,13 @@ def test_punct_neq(en_vocab):
|
||||||
def test_shape_attr(en_vocab):
|
def test_shape_attr(en_vocab):
|
||||||
example = en_vocab['example']
|
example = en_vocab['example']
|
||||||
assert example.orth != example.shape
|
assert example.orth != example.shape
|
||||||
|
|
||||||
|
|
||||||
|
def test_symbols(en_vocab):
|
||||||
|
assert en_vocab.strings['IS_ALPHA'] == IS_ALPHA
|
||||||
|
assert en_vocab.strings['NOUN'] == NOUN
|
||||||
|
assert en_vocab.strings['VERB'] == VERB
|
||||||
|
assert en_vocab.strings['LEMMA'] == LEMMA
|
||||||
|
assert en_vocab.strings['ORTH'] == ORTH
|
||||||
|
assert en_vocab.strings['PROB'] == PROB
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user