mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-06 21:03:07 +03:00
Fix data reading in morphology
This commit is contained in:
parent
00cfadbf63
commit
987ee6e884
|
@ -6,6 +6,7 @@ from libc.string cimport memset
|
||||||
import srsly
|
import srsly
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
|
from .compat import basestring_
|
||||||
from .strings import get_string_id
|
from .strings import get_string_id
|
||||||
from . import symbols
|
from . import symbols
|
||||||
from .attrs cimport POS, IS_SPACE
|
from .attrs cimport POS, IS_SPACE
|
||||||
|
@ -68,7 +69,8 @@ def _normalize_props(props):
|
||||||
props = dict(props)
|
props = dict(props)
|
||||||
for key in FIELDS:
|
for key in FIELDS:
|
||||||
if key in props:
|
if key in props:
|
||||||
attr = '%s_%s' % (key, props[key])
|
value = str(props[key]).lower()
|
||||||
|
attr = '%s_%s' % (key, value)
|
||||||
if attr in FEATURES:
|
if attr in FEATURES:
|
||||||
props.pop(key)
|
props.pop(key)
|
||||||
props[attr] = True
|
props[attr] = True
|
||||||
|
@ -81,9 +83,11 @@ def _normalize_props(props):
|
||||||
out[key] = value
|
out[key] = value
|
||||||
elif isinstance(key, int):
|
elif isinstance(key, int):
|
||||||
out[key] = value
|
out[key] = value
|
||||||
|
elif value is True:
|
||||||
|
out[key] = value
|
||||||
elif key.lower() == 'pos':
|
elif key.lower() == 'pos':
|
||||||
out[POS] = POS_IDS[value.upper()]
|
out[POS] = POS_IDS[value.upper()]
|
||||||
else:
|
elif key.lower() != 'morph':
|
||||||
out[key] = value
|
out[key] = value
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
@ -132,6 +136,7 @@ cdef class Morphology:
|
||||||
self.reverse_index = {}
|
self.reverse_index = {}
|
||||||
for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
|
for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
|
||||||
attrs = _normalize_props(attrs)
|
attrs = _normalize_props(attrs)
|
||||||
|
self.add({FEATURE_NAMES[feat] for feat in attrs if feat in FEATURE_NAMES})
|
||||||
self.tag_map[tag_str] = dict(attrs)
|
self.tag_map[tag_str] = dict(attrs)
|
||||||
self.reverse_index[self.strings.add(tag_str)] = i
|
self.reverse_index[self.strings.add(tag_str)] = i
|
||||||
|
|
||||||
|
@ -152,7 +157,8 @@ cdef class Morphology:
|
||||||
present. Returns the hash of the new analysis.
|
present. Returns the hash of the new analysis.
|
||||||
"""
|
"""
|
||||||
for f in features:
|
for f in features:
|
||||||
self.strings.add(f)
|
if isinstance(f, basestring_):
|
||||||
|
self.strings.add(f)
|
||||||
features = intify_features(features)
|
features = intify_features(features)
|
||||||
cdef attr_t feature
|
cdef attr_t feature
|
||||||
for feature in features:
|
for feature in features:
|
||||||
|
@ -213,6 +219,7 @@ cdef class Morphology:
|
||||||
"""
|
"""
|
||||||
attrs = dict(attrs)
|
attrs = dict(attrs)
|
||||||
attrs = _normalize_props(attrs)
|
attrs = _normalize_props(attrs)
|
||||||
|
self.add({FEATURE_NAMES[feat] for feat in attrs if feat in FEATURE_NAMES})
|
||||||
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
||||||
self.exc[(tag_str, self.strings.add(orth_str))] = attrs
|
self.exc[(tag_str, self.strings.add(orth_str))] = attrs
|
||||||
|
|
||||||
|
@ -659,7 +666,7 @@ FEATURES = [
|
||||||
"Abbr_yes",
|
"Abbr_yes",
|
||||||
"AdpType_circ",
|
"AdpType_circ",
|
||||||
"AdpType_comprep",
|
"AdpType_comprep",
|
||||||
"AdpType_prep ",
|
"AdpType_prep",
|
||||||
"AdpType_post",
|
"AdpType_post",
|
||||||
"AdpType_voc",
|
"AdpType_voc",
|
||||||
"AdvType_adadj,"
|
"AdvType_adadj,"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user