mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Switch to new data model, tests passing
This commit is contained in:
parent
1b0e01d3d8
commit
59b41a9fd3
|
@ -37,13 +37,7 @@ provides a fully Penn Treebank 3-compliant tokenizer.
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from libc.stdint cimport uint64_t
|
|
||||||
|
|
||||||
cimport lang
|
cimport lang
|
||||||
from spacy.lexeme cimport lexeme_check_flag
|
|
||||||
from spacy.lexeme cimport lexeme_string_view
|
|
||||||
|
|
||||||
from spacy import orth
|
|
||||||
|
|
||||||
|
|
||||||
cdef class English(Language):
|
cdef class English(Language):
|
||||||
|
|
133
spacy/lang.pyx
133
spacy/lang.pyx
|
@ -15,7 +15,7 @@ import re
|
||||||
|
|
||||||
from .util import read_lang_data
|
from .util import read_lang_data
|
||||||
from spacy.tokens import Tokens
|
from spacy.tokens import Tokens
|
||||||
from spacy.lexeme cimport LexemeC, lexeme_init, lexeme_pack, lexeme_unpack
|
from spacy.lexeme cimport LexemeC, get_lexeme_dict, lexeme_pack, lexeme_unpack
|
||||||
from murmurhash.mrmr cimport hash64
|
from murmurhash.mrmr cimport hash64
|
||||||
|
|
||||||
from cpython.ref cimport Py_INCREF
|
from cpython.ref cimport Py_INCREF
|
||||||
|
@ -30,99 +30,11 @@ from spacy import orth
|
||||||
from spacy import util
|
from spacy import util
|
||||||
|
|
||||||
|
|
||||||
cdef enum Flags:
|
|
||||||
Flag_IsAlpha
|
|
||||||
Flag_IsAscii
|
|
||||||
Flag_IsDigit
|
|
||||||
Flag_IsLower
|
|
||||||
Flag_IsPunct
|
|
||||||
Flag_IsSpace
|
|
||||||
Flag_IsTitle
|
|
||||||
Flag_IsUpper
|
|
||||||
|
|
||||||
Flag_CanAdj
|
|
||||||
Flag_CanAdp
|
|
||||||
Flag_CanAdv
|
|
||||||
Flag_CanConj
|
|
||||||
Flag_CanDet
|
|
||||||
Flag_CanNoun
|
|
||||||
Flag_CanNum
|
|
||||||
Flag_CanPdt
|
|
||||||
Flag_CanPos
|
|
||||||
Flag_CanPron
|
|
||||||
Flag_CanPrt
|
|
||||||
Flag_CanPunct
|
|
||||||
Flag_CanVerb
|
|
||||||
|
|
||||||
Flag_OftLower
|
|
||||||
Flag_OftTitle
|
|
||||||
Flag_OftUpper
|
|
||||||
Flag_N
|
|
||||||
|
|
||||||
|
|
||||||
cdef enum Views:
|
|
||||||
View_CanonForm
|
|
||||||
View_WordShape
|
|
||||||
View_NonSparse
|
|
||||||
View_Asciied
|
|
||||||
View_N
|
|
||||||
|
|
||||||
|
|
||||||
# Assign the flag and view functions by enum value.
|
|
||||||
# This is verbose, but it ensures we don't get nasty order sensitivities.
|
|
||||||
STRING_VIEW_FUNCS = [None] * View_N
|
|
||||||
STRING_VIEW_FUNCS[View_CanonForm] = orth.canon_case
|
|
||||||
STRING_VIEW_FUNCS[View_WordShape] = orth.word_shape
|
|
||||||
STRING_VIEW_FUNCS[View_NonSparse] = orth.non_sparse
|
|
||||||
STRING_VIEW_FUNCS[View_Asciied] = orth.asciied
|
|
||||||
|
|
||||||
FLAG_FUNCS = [None] * Flag_N
|
|
||||||
FLAG_FUNCS[Flag_IsAlpha] = orth.is_alpha
|
|
||||||
FLAG_FUNCS[Flag_IsAscii] = orth.is_ascii
|
|
||||||
FLAG_FUNCS[Flag_IsDigit] = orth.is_digit
|
|
||||||
FLAG_FUNCS[Flag_IsLower] = orth.is_lower
|
|
||||||
FLAG_FUNCS[Flag_IsPunct] = orth.is_punct
|
|
||||||
FLAG_FUNCS[Flag_IsSpace] = orth.is_space
|
|
||||||
FLAG_FUNCS[Flag_IsTitle] = orth.is_title
|
|
||||||
FLAG_FUNCS[Flag_IsUpper] = orth.is_upper
|
|
||||||
|
|
||||||
FLAG_FUNCS[Flag_CanAdj] = orth.can_tag('ADJ')
|
|
||||||
FLAG_FUNCS[Flag_CanAdp] = orth.can_tag('ADP')
|
|
||||||
FLAG_FUNCS[Flag_CanAdv] = orth.can_tag('ADV')
|
|
||||||
FLAG_FUNCS[Flag_CanConj] = orth.can_tag('CONJ')
|
|
||||||
FLAG_FUNCS[Flag_CanDet] = orth.can_tag('DET')
|
|
||||||
FLAG_FUNCS[Flag_CanNoun] = orth.can_tag('NOUN')
|
|
||||||
FLAG_FUNCS[Flag_CanNum] = orth.can_tag('NUM')
|
|
||||||
FLAG_FUNCS[Flag_CanPdt] = orth.can_tag('PDT')
|
|
||||||
FLAG_FUNCS[Flag_CanPos] = orth.can_tag('POS')
|
|
||||||
FLAG_FUNCS[Flag_CanPron] = orth.can_tag('PRON')
|
|
||||||
FLAG_FUNCS[Flag_CanPrt] = orth.can_tag('PRT')
|
|
||||||
FLAG_FUNCS[Flag_CanPunct] = orth.can_tag('PUNCT')
|
|
||||||
FLAG_FUNCS[Flag_CanVerb] = orth.can_tag('VERB')
|
|
||||||
|
|
||||||
FLAG_FUNCS[Flag_OftLower] = orth.oft_case('lower', 0.7)
|
|
||||||
FLAG_FUNCS[Flag_OftTitle] = orth.oft_case('title', 0.7)
|
|
||||||
FLAG_FUNCS[Flag_OftUpper] = orth.oft_case('upper', 0.7)
|
|
||||||
|
|
||||||
|
|
||||||
cdef class Language:
|
cdef class Language:
|
||||||
"""Base class for language-specific tokenizers.
|
"""Base class for language-specific tokenizers.
|
||||||
|
|
||||||
Most subclasses will override the _split or _split_one methods, which take
|
|
||||||
a string of non-whitespace characters and output a list of strings. This
|
|
||||||
function is called by _tokenize, which sits behind a cache and turns the
|
|
||||||
list of strings into Lexeme objects via the Lexicon. Most languages will not
|
|
||||||
need to override _tokenize or tokenize.
|
|
||||||
|
|
||||||
The language is supplied a list of boolean functions, used to compute flag
|
|
||||||
features. These are passed to the language's Lexicon object.
|
|
||||||
|
|
||||||
The language's name is used to look up default data-files, found in data/<name.
|
The language's name is used to look up default data-files, found in data/<name.
|
||||||
"""
|
"""
|
||||||
fl_is_alpha = Flag_IsAlpha
|
|
||||||
fl_is_digit = Flag_IsDigit
|
|
||||||
v_shape = View_WordShape
|
|
||||||
|
|
||||||
def __init__(self, name, user_string_features, user_flag_features):
|
def __init__(self, name, user_string_features, user_flag_features):
|
||||||
self.name = name
|
self.name = name
|
||||||
self._mem = Pool()
|
self._mem = Pool()
|
||||||
|
@ -131,9 +43,7 @@ cdef class Language:
|
||||||
rules, prefix, suffix, lexemes = util.read_lang_data(name)
|
rules, prefix, suffix, lexemes = util.read_lang_data(name)
|
||||||
self.prefix_re = re.compile(prefix)
|
self.prefix_re = re.compile(prefix)
|
||||||
self.suffix_re = re.compile(suffix)
|
self.suffix_re = re.compile(suffix)
|
||||||
self.lexicon = Lexicon(lexemes,
|
self.lexicon = Lexicon(lexemes)
|
||||||
STRING_VIEW_FUNCS + user_string_features,
|
|
||||||
FLAG_FUNCS + user_flag_features)
|
|
||||||
self._load_special_tokenization(rules)
|
self._load_special_tokenization(rules)
|
||||||
|
|
||||||
property nr_types:
|
property nr_types:
|
||||||
|
@ -155,17 +65,17 @@ cdef class Language:
|
||||||
cpdef Tokens tokenize(self, unicode string):
|
cpdef Tokens tokenize(self, unicode string):
|
||||||
"""Tokenize a string.
|
"""Tokenize a string.
|
||||||
|
|
||||||
The tokenization rules are defined in two places:
|
The tokenization rules are defined in three places:
|
||||||
|
|
||||||
* The data/<lang>/tokenization table, which handles special cases like contractions;
|
* The data/<lang>/tokenization table, which handles special cases like contractions;
|
||||||
* The appropriate :py:meth:`find_split` function, which is used to split
|
* The data/<lang>/prefix file, used to build a regex to split off prefixes;
|
||||||
off punctuation etc.
|
* The data/<lang>/suffix file, used to build a regex to split off suffixes.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
string (unicode): The string to be tokenized.
|
string (unicode): The string to be tokenized.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tokens (Tokens): A Tokens object, giving access to a sequence of LexIDs.
|
tokens (Tokens): A Tokens object, giving access to a sequence of Lexemes.
|
||||||
"""
|
"""
|
||||||
cdef size_t length = len(string)
|
cdef size_t length = len(string)
|
||||||
cdef Tokens tokens = Tokens(length)
|
cdef Tokens tokens = Tokens(length)
|
||||||
|
@ -339,10 +249,8 @@ cdef class Language:
|
||||||
|
|
||||||
|
|
||||||
cdef class Lexicon:
|
cdef class Lexicon:
|
||||||
def __cinit__(self, lexemes, string_features, flag_features):
|
def __cinit__(self, lexemes):
|
||||||
self._mem = Pool()
|
self._mem = Pool()
|
||||||
self._flag_features = flag_features
|
|
||||||
self._string_features = string_features
|
|
||||||
self._dict = PreshMap(2 ** 20)
|
self._dict = PreshMap(2 ** 20)
|
||||||
self.size = 0
|
self.size = 0
|
||||||
cdef String string
|
cdef String string
|
||||||
|
@ -351,29 +259,22 @@ cdef class Lexicon:
|
||||||
for lexeme_dict in lexemes:
|
for lexeme_dict in lexemes:
|
||||||
string_from_unicode(&string, lexeme_dict['string'])
|
string_from_unicode(&string, lexeme_dict['string'])
|
||||||
lexeme = <LexemeC*>self._mem.alloc(1, sizeof(LexemeC))
|
lexeme = <LexemeC*>self._mem.alloc(1, sizeof(LexemeC))
|
||||||
lexeme.views = <char**>self._mem.alloc(len(string_features), sizeof(char*))
|
|
||||||
lexeme_unpack(lexeme, lexeme_dict)
|
lexeme_unpack(lexeme, lexeme_dict)
|
||||||
self._dict.set(string.key, lexeme)
|
self._dict.set(string.key, lexeme)
|
||||||
self.size += 1
|
self.size += 1
|
||||||
|
|
||||||
cdef LexemeC* get(self, String* string) except NULL:
|
cdef LexemeC* get(self, String* string) except NULL:
|
||||||
cdef LexemeC* lexeme
|
cdef LexemeC* lex
|
||||||
lexeme = <LexemeC*>self._dict.get(string.key)
|
lex = <LexemeC*>self._dict.get(string.key)
|
||||||
if lexeme != NULL:
|
if lex != NULL:
|
||||||
return lexeme
|
return lex
|
||||||
|
|
||||||
cdef unicode uni_string = string.chars[:string.n]
|
lex = <LexemeC*>self._mem.alloc(1, sizeof(LexemeC))
|
||||||
views = [string_view(uni_string, 0.0, 0, {}, {})
|
cdef unicode unicode_string = string.chars[:string.n]
|
||||||
for string_view in self._string_features]
|
lexeme_unpack(lex, get_lexeme_dict(self.size, unicode_string))
|
||||||
flags = set()
|
self._dict.set(string.key, lex)
|
||||||
for i, flag_feature in enumerate(self._flag_features):
|
|
||||||
if flag_feature(uni_string, 0.0, {}, {}):
|
|
||||||
flags.add(i)
|
|
||||||
|
|
||||||
lexeme = lexeme_init(self._mem, self.size, uni_string, 0, 0, views, flags)
|
|
||||||
self._dict.set(string.key, lexeme)
|
|
||||||
self.size += 1
|
self.size += 1
|
||||||
return lexeme
|
return lex
|
||||||
|
|
||||||
cpdef Lexeme lookup(self, unicode uni_string):
|
cpdef Lexeme lookup(self, unicode uni_string):
|
||||||
"""Retrieve (or create, if not found) a Lexeme for a string, and return it.
|
"""Retrieve (or create, if not found) a Lexeme for a string, and return it.
|
||||||
|
|
|
@ -70,6 +70,7 @@ cdef struct LexemeC:
|
||||||
flag_t orth_flags
|
flag_t orth_flags
|
||||||
flag_t dist_flags
|
flag_t dist_flags
|
||||||
|
|
||||||
|
cpdef dict get_lexeme_dict(size_t i, unicode string)
|
||||||
|
|
||||||
cdef char* intern_and_encode(unicode string, size_t* length) except NULL
|
cdef char* intern_and_encode(unicode string, size_t* length) except NULL
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import orth
|
||||||
OOV_DIST_FLAGS = 0
|
OOV_DIST_FLAGS = 0
|
||||||
|
|
||||||
|
|
||||||
def get_lexeme_dict(size_t i, unicode string):
|
cpdef dict get_lexeme_dict(size_t i, unicode string):
|
||||||
ints = [None for _ in range(LexInt_N)]
|
ints = [None for _ in range(LexInt_N)]
|
||||||
ints[<int>LexInt_i] = i
|
ints[<int>LexInt_i] = i
|
||||||
ints[<int>LexInt_length] = len(string)
|
ints[<int>LexInt_length] = len(string)
|
||||||
|
@ -18,13 +18,12 @@ def get_lexeme_dict(size_t i, unicode string):
|
||||||
floats[<int>LexFloat_prob] = 0
|
floats[<int>LexFloat_prob] = 0
|
||||||
floats[<int>LexFloat_sentiment] = 0
|
floats[<int>LexFloat_sentiment] = 0
|
||||||
|
|
||||||
cdef size_t length
|
|
||||||
strings = [None for _ in range(LexStr_N)]
|
strings = [None for _ in range(LexStr_N)]
|
||||||
strings[<int>LexStr_key] = intern_and_encode(string, &length)
|
strings[<int>LexStr_key] = string
|
||||||
strings[<int>LexStr_casefix] = strings[<int>LexStr_key]
|
strings[<int>LexStr_casefix] = strings[<int>LexStr_key]
|
||||||
strings[<int>LexStr_shape] = intern_and_encode(orth.word_shape(string), &length)
|
strings[<int>LexStr_shape] = orth.word_shape(string)
|
||||||
strings[<int>LexStr_unsparse] = strings[<int>LexStr_shape]
|
strings[<int>LexStr_unsparse] = strings[<int>LexStr_shape]
|
||||||
strings[<int>LexStr_asciied] = intern_and_encode(orth.asciied(string), &length)
|
strings[<int>LexStr_asciied] = orth.asciied(string)
|
||||||
|
|
||||||
orth_flags = get_orth_flags(string)
|
orth_flags = get_orth_flags(string)
|
||||||
dist_flags = OOV_DIST_FLAGS
|
dist_flags = OOV_DIST_FLAGS
|
||||||
|
@ -33,8 +32,18 @@ def get_lexeme_dict(size_t i, unicode string):
|
||||||
'orth_flags': orth_flags, 'dist_flags': dist_flags}
|
'orth_flags': orth_flags, 'dist_flags': dist_flags}
|
||||||
|
|
||||||
def get_orth_flags(unicode string):
|
def get_orth_flags(unicode string):
|
||||||
return 0
|
cdef flag_t flags = 0
|
||||||
|
|
||||||
|
flags |= orth.is_ascii(string) << LexOrth_ascii
|
||||||
|
flags |= orth.is_alpha(string) << LexOrth_alpha
|
||||||
|
flags |= orth.is_digit(string) << LexOrth_digit
|
||||||
|
flags |= orth.is_lower(string) << LexOrth_lower
|
||||||
|
flags |= orth.is_punct(string) << LexOrth_punct
|
||||||
|
flags |= orth.is_space(string) << LexOrth_space
|
||||||
|
flags |= orth.is_title(string) << LexOrth_title
|
||||||
|
flags |= orth.is_upper(string) << LexOrth_upper
|
||||||
|
|
||||||
|
return flags
|
||||||
|
|
||||||
def get_dist_flags(unicode string):
|
def get_dist_flags(unicode string):
|
||||||
return 0
|
return 0
|
||||||
|
@ -87,9 +96,9 @@ cdef int lexeme_unpack(LexemeC* lex, dict p) except -1:
|
||||||
for i, lex_int in enumerate(p['ints']):
|
for i, lex_int in enumerate(p['ints']):
|
||||||
lex.ints[i] = lex_int
|
lex.ints[i] = lex_int
|
||||||
for i, lex_float in enumerate(p['floats']):
|
for i, lex_float in enumerate(p['floats']):
|
||||||
lex.ints[i] = lex_int
|
lex.floats[i] = lex_float
|
||||||
cdef size_t _
|
cdef size_t _
|
||||||
for i, lex_string in enumerate(p['strings']):
|
for i, lex_string in enumerate(p['strings']):
|
||||||
lex.strings[i] = intern_and_encode(lex_string, &_)
|
lex.strings[i] = intern_and_encode(lex_string, &_)
|
||||||
lex.orth_flags = p['orth_flags']
|
lex.orth_flags = p['orth_flags']
|
||||||
lex.orth_flags = p['orth_flags']
|
lex.dist_flags = p['dist_flags']
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# -*- coding: utf8 -*-
|
# -*- coding: utf8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
from unidecode import unidecode
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
@ -9,15 +10,15 @@ TAGS = 'adj adp adv conj det noun num pdt pos pron prt punct verb'.upper().split
|
||||||
|
|
||||||
|
|
||||||
# Binary string features
|
# Binary string features
|
||||||
def is_alpha(string, prob, case_stats, tag_stats):
|
def is_alpha(string):
|
||||||
return string.isalpha()
|
return string.isalpha()
|
||||||
|
|
||||||
|
|
||||||
def is_digit(string, prob, case_stats, tag_stats):
|
def is_digit(string):
|
||||||
return string.isdigit()
|
return string.isdigit()
|
||||||
|
|
||||||
|
|
||||||
def is_punct(string, prob, case_stats, tag_stats):
|
def is_punct(string):
|
||||||
for c in string:
|
for c in string:
|
||||||
if not unicodedata.category(c).startswith('P'):
|
if not unicodedata.category(c).startswith('P'):
|
||||||
return False
|
return False
|
||||||
|
@ -25,11 +26,11 @@ def is_punct(string, prob, case_stats, tag_stats):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_space(string, prob, case_stats, tag_stats):
|
def is_space(string):
|
||||||
return string.isspace()
|
return string.isspace()
|
||||||
|
|
||||||
|
|
||||||
def is_ascii(string, prob, case_stats, tag_stats):
|
def is_ascii(string):
|
||||||
for c in string:
|
for c in string:
|
||||||
if ord(c) >= 128:
|
if ord(c) >= 128:
|
||||||
return False
|
return False
|
||||||
|
@ -37,15 +38,15 @@ def is_ascii(string, prob, case_stats, tag_stats):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_title(string, prob, case_stats, tag_stats):
|
def is_title(string):
|
||||||
return string.istitle()
|
return string.istitle()
|
||||||
|
|
||||||
|
|
||||||
def is_lower(string, prob, case_stats, tag_stats):
|
def is_lower(string):
|
||||||
return string.islower()
|
return string.islower()
|
||||||
|
|
||||||
|
|
||||||
def is_upper(string, prob, case_stats, tag_stats):
|
def is_upper(string):
|
||||||
return string.isupper()
|
return string.isupper()
|
||||||
|
|
||||||
|
|
||||||
|
@ -103,7 +104,7 @@ def word_shape(string, *args):
|
||||||
|
|
||||||
|
|
||||||
def non_sparse(string, prob, cluster, case_stats, tag_stats):
|
def non_sparse(string, prob, cluster, case_stats, tag_stats):
|
||||||
if is_alpha(string, prob, case_stats, tag_stats):
|
if is_alpha(string):
|
||||||
return canon_case(string, prob, cluster, case_stats, tag_stats)
|
return canon_case(string, prob, cluster, case_stats, tag_stats)
|
||||||
elif prob >= math.log(0.0001):
|
elif prob >= math.log(0.0001):
|
||||||
return string
|
return string
|
||||||
|
@ -112,22 +113,5 @@ def non_sparse(string, prob, cluster, case_stats, tag_stats):
|
||||||
|
|
||||||
|
|
||||||
def asciied(string, prob=0, cluster=0, case_stats=None, tag_stats=None):
|
def asciied(string, prob=0, cluster=0, case_stats=None, tag_stats=None):
|
||||||
'''"ASCIIfy" a Unicode string by stripping all umlauts, tildes, etc.'''
|
ascii_string = unidecode(string)
|
||||||
# Snippet from
|
return ascii_string.decode('ascii')
|
||||||
# http://www.physic.ut.ee/~kkannike/english/prog/python/util/asciify/index.html
|
|
||||||
# TODO: Rewrite and improve this
|
|
||||||
lookup_table = {
|
|
||||||
u'“': '"',
|
|
||||||
u'”': '"'
|
|
||||||
}
|
|
||||||
temp = u''
|
|
||||||
for char in string:
|
|
||||||
if char in lookup_table:
|
|
||||||
temp += lookup_table[char]
|
|
||||||
else:
|
|
||||||
decomp = unicodedata.decomposition(char)
|
|
||||||
if decomp: # Not an empty string
|
|
||||||
temp += unichr(int(decomp.split()[0], 16))
|
|
||||||
else:
|
|
||||||
temp += char
|
|
||||||
return temp
|
|
||||||
|
|
|
@ -5,21 +5,17 @@ from libcpp.vector cimport vector
|
||||||
cdef class Tokens:
|
cdef class Tokens:
|
||||||
cdef vector[LexemeC*] *v
|
cdef vector[LexemeC*] *v
|
||||||
|
|
||||||
cpdef size_t id(self, size_t i) except 0
|
|
||||||
cpdef unicode string(self, size_t i)
|
cpdef unicode string(self, size_t i)
|
||||||
cpdef double prob(self, size_t i) except 1
|
cpdef float prob(self, size_t i) except 1
|
||||||
cpdef size_t cluster(self, size_t i) except *
|
cpdef int cluster(self, size_t i) except *
|
||||||
cpdef bint check_flag(self, size_t i, size_t flag_id) except *
|
cpdef bint check_orth_flag(self, size_t i, size_t flag_id) except *
|
||||||
|
cpdef bint check_dist_flag(self, size_t i, size_t flag_id) except *
|
||||||
cpdef unicode string_view(self, size_t i, size_t view_id)
|
cpdef unicode string_view(self, size_t i, size_t view_id)
|
||||||
|
|
||||||
cpdef size_t canon(self, size_t i) except 0
|
cpdef unicode casefix(self, size_t i)
|
||||||
cpdef size_t shape(self, size_t i) except 0
|
cpdef unicode shape(self, size_t i)
|
||||||
cpdef size_t non_sparse(self, size_t i) except 0
|
cpdef unicode unsparse(self, size_t i)
|
||||||
cpdef size_t asciied(self, size_t i) except 0
|
cpdef unicode asciied(self, size_t i)
|
||||||
cpdef unicode canon_string(self, size_t i)
|
|
||||||
cpdef unicode shape_string(self, size_t i)
|
|
||||||
cpdef unicode non_sparse_string(self, size_t i)
|
|
||||||
cpdef unicode asciied_string(self, size_t i)
|
|
||||||
cpdef bint is_alpha(self, size_t i) except *
|
cpdef bint is_alpha(self, size_t i) except *
|
||||||
cpdef bint is_ascii(self, size_t i) except *
|
cpdef bint is_ascii(self, size_t i) except *
|
||||||
cpdef bint is_digit(self, size_t i) except *
|
cpdef bint is_digit(self, size_t i) except *
|
||||||
|
|
138
spacy/tokens.pyx
138
spacy/tokens.pyx
|
@ -1,45 +1,7 @@
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
from spacy.word cimport Lexeme
|
from .word cimport Lexeme
|
||||||
from spacy.lexeme cimport lexeme_check_flag
|
|
||||||
from spacy.lexeme cimport lexeme_string_view
|
|
||||||
|
|
||||||
|
from .lexeme cimport *
|
||||||
cdef enum Flags:
|
|
||||||
Flag_IsAlpha
|
|
||||||
Flag_IsAscii
|
|
||||||
Flag_IsDigit
|
|
||||||
Flag_IsLower
|
|
||||||
Flag_IsPunct
|
|
||||||
Flag_IsSpace
|
|
||||||
Flag_IsTitle
|
|
||||||
Flag_IsUpper
|
|
||||||
|
|
||||||
Flag_CanAdj
|
|
||||||
Flag_CanAdp
|
|
||||||
Flag_CanAdv
|
|
||||||
Flag_CanConj
|
|
||||||
Flag_CanDet
|
|
||||||
Flag_CanNoun
|
|
||||||
Flag_CanNum
|
|
||||||
Flag_CanPdt
|
|
||||||
Flag_CanPos
|
|
||||||
Flag_CanPron
|
|
||||||
Flag_CanPrt
|
|
||||||
Flag_CanPunct
|
|
||||||
Flag_CanVerb
|
|
||||||
|
|
||||||
Flag_OftLower
|
|
||||||
Flag_OftTitle
|
|
||||||
Flag_OftUpper
|
|
||||||
Flag_N
|
|
||||||
|
|
||||||
|
|
||||||
cdef enum Views:
|
|
||||||
View_CanonForm
|
|
||||||
View_WordShape
|
|
||||||
View_NonSparse
|
|
||||||
View_Asciied
|
|
||||||
View_N
|
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokens:
|
cdef class Tokens:
|
||||||
|
@ -79,120 +41,108 @@ cdef class Tokens:
|
||||||
self.v.push_back(lexeme._c)
|
self.v.push_back(lexeme._c)
|
||||||
|
|
||||||
cpdef unicode string(self, size_t i):
|
cpdef unicode string(self, size_t i):
|
||||||
cdef bytes utf8_string = self.v.at(i).string[:self.v.at(i).length]
|
cdef bytes utf8_string = self.v.at(i).strings[<int>LexStr_key]
|
||||||
cdef unicode string = utf8_string.decode('utf8')
|
cdef unicode string = utf8_string.decode('utf8')
|
||||||
return string
|
return string
|
||||||
|
|
||||||
cpdef size_t id(self, size_t i) except 0:
|
cpdef float prob(self, size_t i) except 1:
|
||||||
return <size_t>&self.v.at(i).string
|
return self.v.at(i).floats[<int>LexFloat_prob]
|
||||||
|
|
||||||
cpdef double prob(self, size_t i) except 1:
|
cpdef int cluster(self, size_t i) except *:
|
||||||
return self.v.at(i).prob
|
return self.v.at(i).ints[<int>LexInt_cluster]
|
||||||
|
|
||||||
cpdef size_t cluster(self, size_t i) except *:
|
cpdef bint check_orth_flag(self, size_t i, size_t flag_id) except *:
|
||||||
return self.v.at(i).cluster
|
return lexeme_check_orth_flag(self.v.at(i), flag_id)
|
||||||
|
|
||||||
cpdef bint check_flag(self, size_t i, size_t flag_id) except *:
|
cpdef bint check_dist_flag(self, size_t i, size_t flag_id) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), flag_id)
|
return lexeme_check_dist_flag(self.v.at(i), flag_id)
|
||||||
|
|
||||||
cpdef unicode string_view(self, size_t i, size_t view_id):
|
cpdef unicode string_view(self, size_t i, size_t view_id):
|
||||||
return lexeme_string_view(self.v.at(i), view_id)
|
return lexeme_get_string(self.v.at(i), view_id)
|
||||||
|
|
||||||
# Provide accessor methods for the features supported by the language.
|
# Provide accessor methods for the features supported by the language.
|
||||||
# Without these, clients have to use the underlying string_view and check_flag
|
# Without these, clients have to use the underlying string_view and check_flag
|
||||||
# methods, which requires them to know the IDs.
|
# methods, which requires them to know the IDs.
|
||||||
cpdef unicode canon_string(self, size_t i):
|
cpdef unicode casefix(self, size_t i):
|
||||||
return lexeme_string_view(self.v.at(i), View_CanonForm)
|
return lexeme_get_string(self.v.at(i), LexStr_casefix)
|
||||||
|
|
||||||
cpdef unicode shape_string(self, size_t i):
|
cpdef unicode shape(self, size_t i):
|
||||||
return lexeme_string_view(self.v.at(i), View_WordShape)
|
return lexeme_get_string(self.v.at(i), LexStr_shape)
|
||||||
|
|
||||||
cpdef unicode non_sparse_string(self, size_t i):
|
cpdef unicode unsparse(self, size_t i):
|
||||||
return lexeme_string_view(self.v.at(i), View_NonSparse)
|
return lexeme_get_string(self.v.at(i), LexStr_unsparse)
|
||||||
|
|
||||||
cpdef unicode asciied_string(self, size_t i):
|
cpdef unicode asciied(self, size_t i):
|
||||||
return lexeme_string_view(self.v.at(i), View_Asciied)
|
return lexeme_get_string(self.v.at(i), LexStr_asciied)
|
||||||
|
|
||||||
cpdef size_t canon(self, size_t i) except *:
|
|
||||||
return id(self.v.at(i).views[<size_t>View_CanonForm])
|
|
||||||
|
|
||||||
cpdef size_t shape(self, size_t i) except *:
|
|
||||||
return id(self.v.at(i).views[<size_t>View_WordShape])
|
|
||||||
|
|
||||||
cpdef size_t non_sparse(self, size_t i) except *:
|
|
||||||
return id(self.v.at(i).views[<size_t>View_NonSparse])
|
|
||||||
|
|
||||||
cpdef size_t asciied(self, size_t i) except *:
|
|
||||||
return id(self.v.at(i).views[<size_t>View_Asciied])
|
|
||||||
|
|
||||||
cpdef bint is_alpha(self, size_t i) except *:
|
cpdef bint is_alpha(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsAlpha)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_alpha)
|
||||||
|
|
||||||
cpdef bint is_ascii(self, size_t i) except *:
|
cpdef bint is_ascii(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsAscii)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_ascii)
|
||||||
|
|
||||||
cpdef bint is_digit(self, size_t i) except *:
|
cpdef bint is_digit(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsDigit)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_digit)
|
||||||
|
|
||||||
cpdef bint is_lower(self, size_t i) except *:
|
cpdef bint is_lower(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsLower)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_lower)
|
||||||
|
|
||||||
cpdef bint is_punct(self, size_t i) except *:
|
cpdef bint is_punct(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsPunct)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_punct)
|
||||||
|
|
||||||
cpdef bint is_space(self, size_t i) except *:
|
cpdef bint is_space(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsSpace)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_space)
|
||||||
|
|
||||||
cpdef bint is_title(self, size_t i) except *:
|
cpdef bint is_title(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsTitle)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_title)
|
||||||
|
|
||||||
cpdef bint is_upper(self, size_t i) except *:
|
cpdef bint is_upper(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_IsUpper)
|
return lexeme_check_orth_flag(self.v.at(i), LexOrth_upper)
|
||||||
|
|
||||||
cpdef bint can_adj(self, size_t i) except *:
|
cpdef bint can_adj(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanAdj)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_adj)
|
||||||
|
|
||||||
cpdef bint can_adp(self, size_t i) except *:
|
cpdef bint can_adp(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanAdp)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_adp)
|
||||||
|
|
||||||
cpdef bint can_adv(self, size_t i) except *:
|
cpdef bint can_adv(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanAdv)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_adv)
|
||||||
|
|
||||||
cpdef bint can_conj(self, size_t i) except *:
|
cpdef bint can_conj(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanConj)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_conj)
|
||||||
|
|
||||||
cpdef bint can_det(self, size_t i) except *:
|
cpdef bint can_det(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanDet)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_det)
|
||||||
|
|
||||||
cpdef bint can_noun(self, size_t i) except *:
|
cpdef bint can_noun(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanNoun)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_noun)
|
||||||
|
|
||||||
cpdef bint can_num(self, size_t i) except *:
|
cpdef bint can_num(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanNum)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_num)
|
||||||
|
|
||||||
cpdef bint can_pdt(self, size_t i) except *:
|
cpdef bint can_pdt(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanPdt)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_pdt)
|
||||||
|
|
||||||
cpdef bint can_pos(self, size_t i) except *:
|
cpdef bint can_pos(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanPos)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_pos)
|
||||||
|
|
||||||
cpdef bint can_pron(self, size_t i) except *:
|
cpdef bint can_pron(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanPron)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_pron)
|
||||||
|
|
||||||
cpdef bint can_prt(self, size_t i) except *:
|
cpdef bint can_prt(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanPrt)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_prt)
|
||||||
|
|
||||||
cpdef bint can_punct(self, size_t i) except *:
|
cpdef bint can_punct(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanPunct)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_punct)
|
||||||
|
|
||||||
cpdef bint can_verb(self, size_t i) except *:
|
cpdef bint can_verb(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_CanVerb)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_verb)
|
||||||
|
|
||||||
cpdef bint oft_lower(self, size_t i) except *:
|
cpdef bint oft_lower(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_OftLower)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_lower)
|
||||||
|
|
||||||
cpdef bint oft_title(self, size_t i) except *:
|
cpdef bint oft_title(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_OftTitle)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_title)
|
||||||
|
|
||||||
cpdef bint oft_upper(self, size_t i) except *:
|
cpdef bint oft_upper(self, size_t i) except *:
|
||||||
return lexeme_check_flag(self.v.at(i), Flag_OftUpper)
|
return lexeme_check_dist_flag(self.v.at(i), LexDist_upper)
|
||||||
|
|
|
@ -7,5 +7,6 @@ DEF MAX_FLAG = 64
|
||||||
cdef class Lexeme:
|
cdef class Lexeme:
|
||||||
cdef LexemeC* _c
|
cdef LexemeC* _c
|
||||||
|
|
||||||
cpdef bint check_flag(self, size_t flag_id) except *
|
cpdef bint check_orth_flag(self, size_t flag_id) except *
|
||||||
|
cpdef bint check_dist_flag(self, size_t flag_id) except *
|
||||||
cpdef unicode string_view(self, size_t view_id)
|
cpdef unicode string_view(self, size_t view_id)
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
# cython: embedsignature=True
|
# cython: embedsignature=True
|
||||||
|
|
||||||
from spacy.lexeme cimport lexeme_check_flag, lexeme_string_view
|
from .lexeme cimport lexeme_get_string
|
||||||
|
from .lexeme cimport lexeme_check_orth_flag, lexeme_check_dist_flag
|
||||||
|
|
||||||
|
from .lexeme cimport *
|
||||||
|
|
||||||
|
|
||||||
cdef class Lexeme:
|
cdef class Lexeme:
|
||||||
|
@ -51,49 +54,27 @@ cdef class Lexeme:
|
||||||
|
|
||||||
property string:
|
property string:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
cdef bytes utf8_string = self._c.string
|
cdef bytes utf8_string = self._c.strings[<int>LexStr_key]
|
||||||
cdef unicode string = utf8_string.decode('utf8')
|
cdef unicode string = utf8_string.decode('utf8')
|
||||||
return string
|
return string
|
||||||
|
|
||||||
property prob:
|
property prob:
|
||||||
def __get__(self): return self._c.prob
|
def __get__(self):
|
||||||
|
return self._c.floats[<int>LexFloat_prob]
|
||||||
|
|
||||||
property cluster:
|
property cluster:
|
||||||
def __get__(self): return self._c.cluster
|
def __get__(self):
|
||||||
|
return self._c.ints[<int>LexInt_cluster]
|
||||||
|
|
||||||
property length:
|
property length:
|
||||||
def __get__(self): return self._c.length
|
def __get__(self):
|
||||||
|
return self._c.ints[<int>LexInt_length]
|
||||||
|
|
||||||
cpdef bint check_flag(self, size_t flag_id) except *:
|
cpdef bint check_orth_flag(self, size_t flag_id) except *:
|
||||||
"""Lexemes may store language-specific boolean features in a bit-field,
|
return lexeme_check_orth_flag(self._c, flag_id)
|
||||||
with values accessed by providing an ID constant to this function.
|
|
||||||
|
|
||||||
The ID constants are exposed as global variables in the language module,
|
cpdef bint check_dist_flag(self, size_t flag_id) except *:
|
||||||
e.g.
|
return lexeme_check_dist_flag(self._c, flag_id)
|
||||||
|
|
||||||
>>> from spacy.en import EN
|
|
||||||
>>> lexeme = EN.lookup(u'Nasa')
|
|
||||||
>>> lexeme.check_flag(EN.IS_UPPER)
|
|
||||||
False
|
|
||||||
>>> lexeme.check_flag(EN.OFT_UPPER)
|
|
||||||
True
|
|
||||||
"""
|
|
||||||
return lexeme_check_flag(self._c, flag_id)
|
|
||||||
|
|
||||||
cpdef unicode string_view(self, size_t view_id):
|
cpdef unicode string_view(self, size_t view_id):
|
||||||
"""Lexemes may store language-specific string-view features, obtained
|
return lexeme_get_string(self._c, view_id)
|
||||||
by transforming the string, possibly in light of distributional information.
|
|
||||||
The string-view features are accessed by providing an ID constant to this
|
|
||||||
function.
|
|
||||||
|
|
||||||
The ID constants are exposed as global variables in the language module,
|
|
||||||
e.g.
|
|
||||||
|
|
||||||
>>> from spacy.en import EN
|
|
||||||
>>> lexeme = EN.lookup(u'Nasa')
|
|
||||||
>>> lexeme.string_view(EN.CANON_CASED)
|
|
||||||
u'NASA'
|
|
||||||
>>> lexeme.string_view(EN.SHAPE)
|
|
||||||
u'Xxxx'
|
|
||||||
>>> lexeme.string_view(EN.NON_SPARSE)
|
|
||||||
u'Xxxx'
|
|
||||||
"""
|
|
||||||
return lexeme_string_view(self._c, view_id)
|
|
||||||
|
|
|
@ -16,26 +16,26 @@ def words():
|
||||||
"!d", "\nd"]
|
"!d", "\nd"]
|
||||||
|
|
||||||
def test_is_alpha(words):
|
def test_is_alpha(words):
|
||||||
assert is_alpha(words[0], 0, {}, {}) == False
|
assert not is_alpha(words[0])
|
||||||
assert is_alpha(words[1], 0, {}, {}) == False
|
assert not is_alpha(words[1])
|
||||||
assert is_alpha(words[2], 0, {}, {}) == False
|
assert not is_alpha(words[2])
|
||||||
assert is_alpha(words[3], 0, {}, {}) == True
|
assert is_alpha(words[3])
|
||||||
assert is_alpha(words[4], 0, {}, {}) == True
|
assert is_alpha(words[4])
|
||||||
assert is_alpha(words[5], 0, {}, {}) == False
|
assert not is_alpha(words[5])
|
||||||
assert is_alpha(words[6], 0, {}, {}) == False
|
assert not is_alpha(words[6])
|
||||||
assert is_alpha(words[7], 0, {}, {}) == False
|
assert not is_alpha(words[7])
|
||||||
assert is_alpha(words[8], 0, {}, {}) == False
|
assert not is_alpha(words[8])
|
||||||
assert is_alpha(words[9], 0, {}, {}) == False
|
assert not is_alpha(words[9])
|
||||||
|
|
||||||
|
|
||||||
def test_is_digit(words):
|
def test_is_digit(words):
|
||||||
assert is_digit(words[0], 0, {}, {}) == True
|
assert is_digit(words[0])
|
||||||
assert is_digit(words[1], 0, {}, {}) == False
|
assert not is_digit(words[1])
|
||||||
assert is_digit(words[2], 0, {}, {}) == False
|
assert not is_digit(words[2])
|
||||||
assert is_digit(words[3], 0, {}, {}) == False
|
assert not is_digit(words[3])
|
||||||
assert is_digit(words[4], 0, {}, {}) == False
|
assert not is_digit(words[4])
|
||||||
assert is_digit(words[5], 0, {}, {}) == False
|
assert not is_digit(words[5])
|
||||||
assert is_digit(words[6], 0, {}, {}) == False
|
assert not is_digit(words[6])
|
||||||
assert is_digit(words[7], 0, {}, {}) == False
|
assert not is_digit(words[7])
|
||||||
assert is_digit(words[8], 0, {}, {}) == False
|
assert not is_digit(words[8])
|
||||||
assert is_digit(words[9], 0, {}, {}) == False
|
assert not is_digit(words[9])
|
||||||
|
|
|
@ -5,12 +5,12 @@ from spacy.orth import is_punct
|
||||||
|
|
||||||
|
|
||||||
def test_comma():
|
def test_comma():
|
||||||
assert is_punct(',', 0, {}, {}) == True
|
assert is_punct(',')
|
||||||
|
|
||||||
|
|
||||||
def test_space():
|
def test_space():
|
||||||
assert is_punct(' ', 0, {}, {}) == False
|
assert not is_punct(' ')
|
||||||
|
|
||||||
|
|
||||||
def test_letter():
|
def test_letter():
|
||||||
assert is_punct('a', 0, {}, {}) == False
|
assert not is_punct('a')
|
||||||
|
|
|
@ -3,23 +3,24 @@ from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy.en import *
|
from spacy.en import *
|
||||||
|
from spacy.lexeme import *
|
||||||
|
|
||||||
|
|
||||||
def test_is_alpha():
|
def test_is_alpha():
|
||||||
the = EN.lookup('the')
|
the = EN.lookup('the')
|
||||||
assert the.check_flag(EN.fl_is_alpha)
|
assert the.check_orth_flag(LexOrth_alpha)
|
||||||
year = EN.lookup('1999')
|
year = EN.lookup('1999')
|
||||||
assert not year.check_flag(EN.fl_is_alpha)
|
assert not year.check_orth_flag(LexOrth_alpha)
|
||||||
mixed = EN.lookup('hello1')
|
mixed = EN.lookup('hello1')
|
||||||
assert not mixed.check_flag(EN.fl_is_alpha)
|
assert not mixed.check_orth_flag(LexOrth_alpha)
|
||||||
|
|
||||||
|
|
||||||
def test_is_digit():
|
def test_is_digit():
|
||||||
the = EN.lookup('the')
|
the = EN.lookup('the')
|
||||||
assert not the.check_flag(EN.fl_is_digit)
|
assert not the.check_orth_flag(LexOrth_digit)
|
||||||
year = EN.lookup('1999')
|
year = EN.lookup('1999')
|
||||||
assert year.check_flag(EN.fl_is_digit)
|
assert year.check_orth_flag(LexOrth_digit)
|
||||||
mixed = EN.lookup('hello1')
|
mixed = EN.lookup('hello1')
|
||||||
assert not mixed.check_flag(EN.fl_is_digit)
|
assert not mixed.check_orth_flag(LexOrth_digit)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import pytest
|
||||||
|
|
||||||
import spacy.word
|
import spacy.word
|
||||||
from spacy.en import EN
|
from spacy.en import EN
|
||||||
|
from spacy.lexeme import *
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -12,7 +13,7 @@ def C3P0():
|
||||||
|
|
||||||
|
|
||||||
def test_shape(C3P0):
|
def test_shape(C3P0):
|
||||||
assert C3P0.string_view(EN.v_shape) == "XdXd"
|
assert C3P0.string_view(LexStr_shape) == "XdXd"
|
||||||
|
|
||||||
|
|
||||||
def test_length():
|
def test_length():
|
||||||
|
|
Loading…
Reference in New Issue
Block a user