mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-03 19:08:06 +03:00
* Work on train
This commit is contained in:
parent
4d4d2c0db4
commit
4c4aa2c5c9
44
spacy/en/__init__.py
Normal file
44
spacy/en/__init__.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from os import path
|
||||||
|
|
||||||
|
from .. import orth
|
||||||
|
from ..vocab import Vocab
|
||||||
|
from ..tokenizer import Tokenizer
|
||||||
|
from ..syntax.parser import GreedyParser
|
||||||
|
from ..tokens import Tokens
|
||||||
|
from ..morphology import Morphologizer
|
||||||
|
from .lemmatizer import Lemmatizer
|
||||||
|
from .pos import EnPosTagger
|
||||||
|
from .attrs import get_flags
|
||||||
|
|
||||||
|
|
||||||
|
def get_lex_props(string):
|
||||||
|
return {'flags': get_flags(string), 'dense': 1}
|
||||||
|
|
||||||
|
|
||||||
|
class English(object):
|
||||||
|
def __init__(self, data_dir=None, pos_tag=True, parse=False):
|
||||||
|
if data_dir is None:
|
||||||
|
data_dir = path.join(path.dirname(__file__), 'data')
|
||||||
|
self.vocab = Vocab.from_dir(data_dir, get_lex_props=get_lex_props)
|
||||||
|
self.tokenizer = Tokenizer.from_dir(self.vocab, data_dir)
|
||||||
|
if pos_tag:
|
||||||
|
self.pos_tagger = EnPosTagger(data_dir,
|
||||||
|
Morphologizer.from_dir(
|
||||||
|
self.vocab.strings,
|
||||||
|
Lemmatizer(path.join(data_dir, 'wordnet')),
|
||||||
|
data_dir))
|
||||||
|
else:
|
||||||
|
self.pos_tagger = None
|
||||||
|
if parse:
|
||||||
|
self.parser = GreedyParser(data_dir)
|
||||||
|
else:
|
||||||
|
self.parser = None
|
||||||
|
|
||||||
|
def __call__(self, text, pos_tag=True, parse=True):
|
||||||
|
tokens = self.tokenizer.tokenize(text)
|
||||||
|
if self.pos_tagger and pos_tag:
|
||||||
|
self.pos_tagger(tokens)
|
||||||
|
if self.parser and parse:
|
||||||
|
self.parser.parse(tokens)
|
||||||
|
return tokens
|
|
@ -1,13 +1,13 @@
|
||||||
from ..lexeme cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
|
from ..typedefs cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
|
||||||
from ..lexeme cimport FLAG8, FLAG9
|
from ..typedefs cimport FLAG8, FLAG9
|
||||||
from ..lexeme cimport ID as _ID
|
from ..typedefs cimport ID as _ID
|
||||||
from ..lexeme cimport SIC as _SIC
|
from ..typedefs cimport SIC as _SIC
|
||||||
from ..lexeme cimport SHAPE as _SHAPE
|
from ..typedefs cimport SHAPE as _SHAPE
|
||||||
from ..lexeme cimport DENSE as _DENSE
|
from ..typedefs cimport DENSE as _DENSE
|
||||||
from ..lexeme cimport SHAPE as _SHAPE
|
from ..typedefs cimport SHAPE as _SHAPE
|
||||||
from ..lexeme cimport PREFIX as _PREFIX
|
from ..typedefs cimport PREFIX as _PREFIX
|
||||||
from ..lexeme cimport SUFFIX as _SUFFIX
|
from ..typedefs cimport SUFFIX as _SUFFIX
|
||||||
from ..lexeme cimport LEMMA as _LEMMA
|
from ..typedefs cimport LEMMA as _LEMMA
|
||||||
|
|
||||||
|
|
||||||
# Work around the lack of global cpdef variables
|
# Work around the lack of global cpdef variables
|
||||||
|
|
|
@ -3,4 +3,4 @@ from ..morphology cimport Morphologizer
|
||||||
|
|
||||||
|
|
||||||
cdef class EnPosTagger(Tagger):
|
cdef class EnPosTagger(Tagger):
|
||||||
cdef Morphologizer morphologizer
|
cdef readonly Morphologizer morphologizer
|
||||||
|
|
|
@ -1,88 +1,9 @@
|
||||||
from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t
|
from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t, attr_id_t
|
||||||
|
from .typedefs cimport ID, SIC, DENSE, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER, POS_TYPE
|
||||||
from .structs cimport Lexeme
|
from .structs cimport Lexeme
|
||||||
from .strings cimport StringStore
|
from .strings cimport StringStore
|
||||||
|
|
||||||
|
|
||||||
# Reserve 64 values for flag features
|
|
||||||
cpdef enum attr_id_t:
|
|
||||||
FLAG0
|
|
||||||
FLAG1
|
|
||||||
FLAG2
|
|
||||||
FLAG3
|
|
||||||
FLAG4
|
|
||||||
FLAG5
|
|
||||||
FLAG6
|
|
||||||
FLAG7
|
|
||||||
FLAG8
|
|
||||||
FLAG9
|
|
||||||
FLAG10
|
|
||||||
FLAG11
|
|
||||||
FLAG12
|
|
||||||
FLAG13
|
|
||||||
FLAG14
|
|
||||||
FLAG15
|
|
||||||
FLAG16
|
|
||||||
FLAG17
|
|
||||||
FLAG18
|
|
||||||
FLAG19
|
|
||||||
FLAG20
|
|
||||||
FLAG21
|
|
||||||
FLAG22
|
|
||||||
FLAG23
|
|
||||||
FLAG24
|
|
||||||
FLAG25
|
|
||||||
FLAG26
|
|
||||||
FLAG27
|
|
||||||
FLAG28
|
|
||||||
FLAG29
|
|
||||||
FLAG30
|
|
||||||
FLAG31
|
|
||||||
FLAG32
|
|
||||||
FLAG33
|
|
||||||
FLAG34
|
|
||||||
FLAG35
|
|
||||||
FLAG36
|
|
||||||
FLAG37
|
|
||||||
FLAG38
|
|
||||||
FLAG39
|
|
||||||
FLAG40
|
|
||||||
FLAG41
|
|
||||||
FLAG42
|
|
||||||
FLAG43
|
|
||||||
FLAG44
|
|
||||||
FLAG45
|
|
||||||
FLAG46
|
|
||||||
FLAG47
|
|
||||||
FLAG48
|
|
||||||
FLAG49
|
|
||||||
FLAG50
|
|
||||||
FLAG51
|
|
||||||
FLAG52
|
|
||||||
FLAG53
|
|
||||||
FLAG54
|
|
||||||
FLAG55
|
|
||||||
FLAG56
|
|
||||||
FLAG57
|
|
||||||
FLAG58
|
|
||||||
FLAG59
|
|
||||||
FLAG60
|
|
||||||
FLAG61
|
|
||||||
FLAG62
|
|
||||||
FLAG63
|
|
||||||
|
|
||||||
ID
|
|
||||||
SIC
|
|
||||||
DENSE
|
|
||||||
SHAPE
|
|
||||||
PREFIX
|
|
||||||
SUFFIX
|
|
||||||
|
|
||||||
LENGTH
|
|
||||||
CLUSTER
|
|
||||||
POS_TYPE
|
|
||||||
LEMMA
|
|
||||||
|
|
||||||
|
|
||||||
cdef Lexeme EMPTY_LEXEME
|
cdef Lexeme EMPTY_LEXEME
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,6 @@ cpdef Lexeme init(id_t i, unicode string, hash_t hashed,
|
||||||
lex.prefix = string_store[string[:1]]
|
lex.prefix = string_store[string[:1]]
|
||||||
lex.suffix = string_store[string[-3:]]
|
lex.suffix = string_store[string[-3:]]
|
||||||
lex.shape = string_store[orth.word_shape(string)]
|
lex.shape = string_store[orth.word_shape(string)]
|
||||||
lex.dense = string_store[props['dense']]
|
|
||||||
|
|
||||||
lex.flags = props.get('flags', 0)
|
lex.flags = props.get('flags', 0)
|
||||||
return lex
|
return lex
|
||||||
|
|
|
@ -59,9 +59,10 @@ cdef class Morphologizer:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dir(cls, StringStore strings, object lemmatizer, data_dir):
|
def from_dir(cls, StringStore strings, object lemmatizer, data_dir):
|
||||||
tag_map = None
|
tagger_cfg = json.loads(open(path.join(data_dir, 'pos', 'config.json')).read())
|
||||||
irregulars = None
|
tag_map = tagger_cfg['tag_map']
|
||||||
tag_names = None
|
tag_names = tagger_cfg['tag_names']
|
||||||
|
irregulars = json.loads(open(path.join(data_dir, 'morphs.json')).read())
|
||||||
return cls(strings, lemmatizer, tag_map=tag_map, irregulars=irregulars,
|
return cls(strings, lemmatizer, tag_map=tag_map, irregulars=irregulars,
|
||||||
tag_names=tag_names)
|
tag_names=tag_names)
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,11 @@ cdef inline void slice_unicode(UniStr* s, Py_UNICODE* chars, int start, int end)
|
||||||
s.key = hash64(s.chars, <int>(s.n * sizeof(Py_UNICODE)), 0)
|
s.key = hash64(s.chars, <int>(s.n * sizeof(Py_UNICODE)), 0)
|
||||||
|
|
||||||
|
|
||||||
|
cdef class _SymbolMap:
|
||||||
|
cdef dict _string_to_id
|
||||||
|
cdef list _id_to_string
|
||||||
|
|
||||||
|
|
||||||
cdef class StringStore:
|
cdef class StringStore:
|
||||||
cdef Pool mem
|
cdef Pool mem
|
||||||
cdef Utf8Str* strings
|
cdef Utf8Str* strings
|
||||||
|
|
|
@ -9,13 +9,42 @@ from .typedefs cimport hash_t
|
||||||
SEPARATOR = '\n|-SEP-|\n'
|
SEPARATOR = '\n|-SEP-|\n'
|
||||||
|
|
||||||
|
|
||||||
|
cdef class _SymbolMap:
|
||||||
|
def __init__(self):
|
||||||
|
self._string_to_id = {'': 0}
|
||||||
|
self._id_to_string = ['']
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for id_, string in enumerate(self._id_to_string[1:]):
|
||||||
|
yield string, id_
|
||||||
|
|
||||||
|
def __getitem__(self, object string_or_id):
|
||||||
|
cdef bytes byte_string
|
||||||
|
if isinstance(string_or_id, int) or isinstance(string_or_id, long):
|
||||||
|
if string_or_id < 1 or string_or_id >= self.size:
|
||||||
|
raise IndexError(string_or_id)
|
||||||
|
return self._int_to_string[string_or_id]
|
||||||
|
else:
|
||||||
|
string = string_or_id
|
||||||
|
if isinstance(string, unicode):
|
||||||
|
string = string.encode('utf8')
|
||||||
|
if string in self._string_to_id:
|
||||||
|
id_ = self._string_to_id[string]
|
||||||
|
else:
|
||||||
|
id_ = len(self._string_to_id)
|
||||||
|
self._string_to_id[string] = id_
|
||||||
|
self._id_to_string.append(string)
|
||||||
|
return id_
|
||||||
|
|
||||||
|
|
||||||
cdef class StringStore:
|
cdef class StringStore:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._map = PreshMap()
|
self._map = PreshMap()
|
||||||
self._resize_at = 10000
|
self._resize_at = 10000
|
||||||
self.strings = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
|
self.strings = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
|
||||||
self.size = 1
|
self.pos_tags = _SymbolMap()
|
||||||
|
self.dep_tags = _SymbolMap()
|
||||||
|
|
||||||
property size:
|
property size:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
from libc.stdint cimport uint32_t, uint64_t
|
|
||||||
from thinc.features cimport Extractor
|
from thinc.features cimport Extractor
|
||||||
from thinc.learner cimport LinearModel
|
from thinc.learner cimport LinearModel
|
||||||
|
|
||||||
from .arc_eager cimport TransitionSystem
|
from .arc_eager cimport TransitionSystem
|
||||||
|
|
||||||
from ..tokens cimport Tokens, TokenC
|
from ..structs cimport TokenC
|
||||||
from ._state cimport State
|
from ..tokens cimport Tokens
|
||||||
|
|
||||||
|
|
||||||
cdef class GreedyParser:
|
cdef class GreedyParser:
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from os import path
|
from os import path
|
||||||
|
import re
|
||||||
|
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
from cython.operator cimport preincrement as preinc
|
from cython.operator cimport preincrement as preinc
|
||||||
|
@ -27,7 +28,7 @@ cdef class Tokenizer:
|
||||||
self._prefix_re = prefix_re
|
self._prefix_re = prefix_re
|
||||||
self._suffix_re = suffix_re
|
self._suffix_re = suffix_re
|
||||||
self._infix_re = infix_re
|
self._infix_re = infix_re
|
||||||
self.vocab = Vocab(self.get_props)
|
self.vocab = vocab
|
||||||
self._load_special_tokenization(rules)
|
self._load_special_tokenization(rules)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -39,11 +40,12 @@ cdef class Tokenizer:
|
||||||
|
|
||||||
assert path.exists(data_dir) and path.isdir(data_dir)
|
assert path.exists(data_dir) and path.isdir(data_dir)
|
||||||
rules, prefix_re, suffix_re, infix_re = util.read_lang_data(data_dir)
|
rules, prefix_re, suffix_re, infix_re = util.read_lang_data(data_dir)
|
||||||
return cls(vocab, rules, prefix_re, suffix_re, infix_re)
|
return cls(vocab, rules, re.compile(prefix_re), re.compile(suffix_re),
|
||||||
|
re.compile(infix_re))
|
||||||
|
|
||||||
cpdef Tokens tokens_from_list(self, list strings):
|
cpdef Tokens tokens_from_list(self, list strings):
|
||||||
cdef int length = sum([len(s) for s in strings])
|
cdef int length = sum([len(s) for s in strings])
|
||||||
cdef Tokens tokens = Tokens(self.vocab.strings, length)
|
cdef Tokens tokens = Tokens(self.vocab, length)
|
||||||
if length == 0:
|
if length == 0:
|
||||||
return tokens
|
return tokens
|
||||||
cdef UniStr string_struct
|
cdef UniStr string_struct
|
||||||
|
@ -76,7 +78,7 @@ cdef class Tokenizer:
|
||||||
tokens (Tokens): A Tokens object, giving access to a sequence of Lexemes.
|
tokens (Tokens): A Tokens object, giving access to a sequence of Lexemes.
|
||||||
"""
|
"""
|
||||||
cdef int length = len(string)
|
cdef int length = len(string)
|
||||||
cdef Tokens tokens = Tokens(self.vocab.strings, length)
|
cdef Tokens tokens = Tokens(self.vocab, length)
|
||||||
if length == 0:
|
if length == 0:
|
||||||
return tokens
|
return tokens
|
||||||
cdef int i = 0
|
cdef int i = 0
|
||||||
|
|
|
@ -4,11 +4,11 @@ import numpy as np
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
from thinc.typedefs cimport atom_t
|
||||||
|
|
||||||
from .structs cimport Lexeme, TokenC, Morphology
|
from .typedefs cimport flags_t
|
||||||
|
from .structs cimport Morphology, TokenC, Lexeme
|
||||||
from .typedefs cimport flags_t, attr_t, flags_t
|
from .vocab cimport Vocab
|
||||||
|
|
||||||
from .strings cimport StringStore
|
from .strings cimport StringStore
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ ctypedef fused LexemeOrToken:
|
||||||
|
|
||||||
cdef class Tokens:
|
cdef class Tokens:
|
||||||
cdef Pool mem
|
cdef Pool mem
|
||||||
cdef StringStore strings
|
cdef Vocab vocab
|
||||||
cdef list tag_names
|
cdef list tag_names
|
||||||
|
|
||||||
cdef TokenC* data
|
cdef TokenC* data
|
||||||
|
@ -36,7 +36,7 @@ cdef class Tokens:
|
||||||
|
|
||||||
|
|
||||||
cdef class Token:
|
cdef class Token:
|
||||||
cdef public StringStore strings
|
cdef readonly StringStore string_store
|
||||||
cdef public int i
|
cdef public int i
|
||||||
cdef public int idx
|
cdef public int idx
|
||||||
cdef int pos
|
cdef int pos
|
||||||
|
@ -44,18 +44,18 @@ cdef class Token:
|
||||||
cdef public int head
|
cdef public int head
|
||||||
cdef public int dep_tag
|
cdef public int dep_tag
|
||||||
|
|
||||||
cdef public attr_t id
|
cdef public atom_t id
|
||||||
cdef public attr_t cluster
|
cdef public atom_t cluster
|
||||||
cdef public attr_t length
|
cdef public atom_t length
|
||||||
cdef public attr_t postype
|
cdef public atom_t postype
|
||||||
cdef public attr_t sensetype
|
cdef public atom_t sensetype
|
||||||
|
|
||||||
cdef public attr_t sic
|
cdef public atom_t sic
|
||||||
cdef public attr_t norm
|
cdef public atom_t norm
|
||||||
cdef public attr_t shape
|
cdef public atom_t shape
|
||||||
cdef public attr_t asciied
|
cdef public atom_t asciied
|
||||||
cdef public attr_t prefix
|
cdef public atom_t prefix
|
||||||
cdef public attr_t suffix
|
cdef public atom_t suffix
|
||||||
|
|
||||||
cdef public float prob
|
cdef public float prob
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
from preshed.maps cimport PreshMap
|
from preshed.maps cimport PreshMap
|
||||||
from preshed.counter cimport PreshCounter
|
from preshed.counter cimport PreshCounter
|
||||||
|
|
||||||
from .lexeme cimport get_attr, EMPTY_LEXEME, LEMMA, attr_id_t
|
from .lexeme cimport get_attr, EMPTY_LEXEME
|
||||||
|
from .typedefs cimport attr_id_t, attr_t
|
||||||
|
from .typedefs cimport LEMMA
|
||||||
cimport cython
|
cimport cython
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -30,8 +32,8 @@ cdef class Tokens:
|
||||||
>>> from spacy.en import EN
|
>>> from spacy.en import EN
|
||||||
>>> tokens = EN.tokenize('An example sentence.')
|
>>> tokens = EN.tokenize('An example sentence.')
|
||||||
"""
|
"""
|
||||||
def __init__(self, StringStore string_store, string_length=0):
|
def __init__(self, Vocab vocab, string_length=0):
|
||||||
self.string_store = string_store
|
self.vocab = vocab
|
||||||
if string_length >= 3:
|
if string_length >= 3:
|
||||||
size = int(string_length / 3.0)
|
size = int(string_length / 3.0)
|
||||||
else:
|
else:
|
||||||
|
@ -50,7 +52,7 @@ cdef class Tokens:
|
||||||
|
|
||||||
def __getitem__(self, i):
|
def __getitem__(self, i):
|
||||||
bounds_check(i, self.length, PADDING)
|
bounds_check(i, self.length, PADDING)
|
||||||
return Token(self.string_store, i, self.data[i].idx, self.data[i].pos,
|
return Token(self.vocab.strings, i, self.data[i].idx, self.data[i].pos,
|
||||||
self.data[i].lemma, self.data[i].head, self.data[i].dep_tag,
|
self.data[i].lemma, self.data[i].head, self.data[i].dep_tag,
|
||||||
self.data[i].lex[0])
|
self.data[i].lex[0])
|
||||||
|
|
||||||
|
@ -119,10 +121,10 @@ cdef class Token:
|
||||||
int pos, int lemma, int head, int dep_tag, dict lex):
|
int pos, int lemma, int head, int dep_tag, dict lex):
|
||||||
self.string_store = string_store
|
self.string_store = string_store
|
||||||
self.idx = idx
|
self.idx = idx
|
||||||
self.pos = pos
|
self.pos_id = pos
|
||||||
self.i = i
|
self.i = i
|
||||||
self.head = head
|
self.head = head
|
||||||
self.dep_tag = dep_tag
|
self.dep_id = dep_tag
|
||||||
self.id = lex['id']
|
self.id = lex['id']
|
||||||
|
|
||||||
self.lemma = lemma
|
self.lemma = lemma
|
||||||
|
@ -154,6 +156,9 @@ cdef class Token:
|
||||||
cdef bytes utf8string = self.string_store[self.lemma]
|
cdef bytes utf8string = self.string_store[self.lemma]
|
||||||
return utf8string.decode('utf8')
|
return utf8string.decode('utf8')
|
||||||
|
|
||||||
|
property dep:
|
||||||
|
def __get__(self):
|
||||||
|
return self.string_store.dep_tags[self.dep]
|
||||||
property pos:
|
property pos:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.lang.pos_tagger.tag_names[self.pos]
|
return self.string_store.pos_tags[self.pos]
|
||||||
|
|
|
@ -21,6 +21,87 @@ cpdef enum univ_tag_t:
|
||||||
N_UNIV_TAGS
|
N_UNIV_TAGS
|
||||||
|
|
||||||
|
|
||||||
|
# Reserve 64 values for flag features
|
||||||
|
cpdef enum attr_id_t:
|
||||||
|
FLAG0
|
||||||
|
FLAG1
|
||||||
|
FLAG2
|
||||||
|
FLAG3
|
||||||
|
FLAG4
|
||||||
|
FLAG5
|
||||||
|
FLAG6
|
||||||
|
FLAG7
|
||||||
|
FLAG8
|
||||||
|
FLAG9
|
||||||
|
FLAG10
|
||||||
|
FLAG11
|
||||||
|
FLAG12
|
||||||
|
FLAG13
|
||||||
|
FLAG14
|
||||||
|
FLAG15
|
||||||
|
FLAG16
|
||||||
|
FLAG17
|
||||||
|
FLAG18
|
||||||
|
FLAG19
|
||||||
|
FLAG20
|
||||||
|
FLAG21
|
||||||
|
FLAG22
|
||||||
|
FLAG23
|
||||||
|
FLAG24
|
||||||
|
FLAG25
|
||||||
|
FLAG26
|
||||||
|
FLAG27
|
||||||
|
FLAG28
|
||||||
|
FLAG29
|
||||||
|
FLAG30
|
||||||
|
FLAG31
|
||||||
|
FLAG32
|
||||||
|
FLAG33
|
||||||
|
FLAG34
|
||||||
|
FLAG35
|
||||||
|
FLAG36
|
||||||
|
FLAG37
|
||||||
|
FLAG38
|
||||||
|
FLAG39
|
||||||
|
FLAG40
|
||||||
|
FLAG41
|
||||||
|
FLAG42
|
||||||
|
FLAG43
|
||||||
|
FLAG44
|
||||||
|
FLAG45
|
||||||
|
FLAG46
|
||||||
|
FLAG47
|
||||||
|
FLAG48
|
||||||
|
FLAG49
|
||||||
|
FLAG50
|
||||||
|
FLAG51
|
||||||
|
FLAG52
|
||||||
|
FLAG53
|
||||||
|
FLAG54
|
||||||
|
FLAG55
|
||||||
|
FLAG56
|
||||||
|
FLAG57
|
||||||
|
FLAG58
|
||||||
|
FLAG59
|
||||||
|
FLAG60
|
||||||
|
FLAG61
|
||||||
|
FLAG62
|
||||||
|
FLAG63
|
||||||
|
|
||||||
|
ID
|
||||||
|
SIC
|
||||||
|
DENSE
|
||||||
|
SHAPE
|
||||||
|
PREFIX
|
||||||
|
SUFFIX
|
||||||
|
|
||||||
|
LENGTH
|
||||||
|
CLUSTER
|
||||||
|
POS_TYPE
|
||||||
|
LEMMA
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ctypedef uint64_t hash_t
|
ctypedef uint64_t hash_t
|
||||||
ctypedef char* utf8_t
|
ctypedef char* utf8_t
|
||||||
ctypedef uint32_t attr_t
|
ctypedef uint32_t attr_t
|
||||||
|
|
Loading…
Reference in New Issue
Block a user