mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-23 15:54:13 +03:00
Tidy up and fix formatting and imports
This commit is contained in:
parent
fefe6684cd
commit
0739ae7b76
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||
|
||||
import six
|
||||
import sys
|
||||
import json
|
||||
import ujson
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
|
@ -28,14 +28,14 @@ if is_python2:
|
|||
unicode_ = unicode
|
||||
basestring_ = basestring
|
||||
input_ = raw_input
|
||||
json_dumps = lambda data: json.dumps(data, indent=2).decode('utf8')
|
||||
json_dumps = lambda data: ujson.dumps(data, indent=2).decode('utf8')
|
||||
|
||||
elif is_python3:
|
||||
bytes_ = bytes
|
||||
unicode_ = str
|
||||
basestring_ = str
|
||||
input_ = input
|
||||
json_dumps = lambda data: json.dumps(data, indent=2)
|
||||
json_dumps = lambda data: ujson.dumps(data, indent=2)
|
||||
|
||||
|
||||
def symlink_to(orig, dest):
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from . import about
|
||||
|
|
|
@ -7,17 +7,17 @@ out of "context") is in features/extractor.pyx
|
|||
The atomic feature names are listed in a big enum, so that the feature tuples
|
||||
can refer to them.
|
||||
"""
|
||||
from libc.string cimport memset
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from libc.string cimport memset
|
||||
from itertools import combinations
|
||||
from cymem.cymem cimport Pool
|
||||
|
||||
from ..structs cimport TokenC
|
||||
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
|
||||
|
||||
cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
|
||||
if token is NULL:
|
||||
|
|
|
@ -1,29 +1,26 @@
|
|||
# cython: profile=True
|
||||
# cython: cdivision=True
|
||||
# cython: infer_types=True
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
|
||||
|
||||
import ctypes
|
||||
import os
|
||||
|
||||
from ..structs cimport TokenC
|
||||
from libc.stdint cimport uint32_t
|
||||
from libc.string cimport memcpy
|
||||
from cymem.cymem cimport Pool
|
||||
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC, is_space_token
|
||||
from .nonproj import PseudoProjectivity
|
||||
from .nonproj import is_nonproj_tree
|
||||
from .transition_system cimport do_func_t, get_cost_func_t
|
||||
from .transition_system cimport move_cost_func_t, label_cost_func_t
|
||||
from ..gold cimport GoldParse
|
||||
from ..gold cimport GoldParseC
|
||||
from ..attrs cimport TAG, HEAD, DEP, ENT_IOB, ENT_TYPE, IS_SPACE
|
||||
from ..lexeme cimport Lexeme
|
||||
|
||||
from libc.stdint cimport uint32_t
|
||||
from libc.string cimport memcpy
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC, is_space_token
|
||||
from .nonproj import PseudoProjectivity
|
||||
from .nonproj import is_nonproj_tree
|
||||
from ..structs cimport TokenC
|
||||
|
||||
|
||||
DEF NON_MONOTONIC = True
|
||||
|
|
|
@ -1,50 +1,34 @@
|
|||
"""
|
||||
MALT-style dependency parser
|
||||
"""
|
||||
# cython: profile=True
|
||||
# cython: experimental_cpp_class_def=True
|
||||
# cython: cdivision=True
|
||||
# cython: infer_types=True
|
||||
"""
|
||||
MALT-style dependency parser
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals, print_function
|
||||
cimport cython
|
||||
|
||||
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
|
||||
|
||||
from libc.stdint cimport uint32_t, uint64_t
|
||||
from libc.string cimport memset, memcpy
|
||||
from libc.stdlib cimport rand
|
||||
from libc.math cimport log, exp, isnan, isinf
|
||||
import random
|
||||
import os.path
|
||||
from os import path
|
||||
import shutil
|
||||
import json
|
||||
import math
|
||||
|
||||
from cymem.cymem cimport Pool, Address
|
||||
from murmurhash.mrmr cimport real_hash64 as hash64
|
||||
from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
|
||||
|
||||
|
||||
from util import Config
|
||||
|
||||
from thinc.linear.features cimport ConjunctionExtracter
|
||||
from thinc.structs cimport FeatureC, ExampleC
|
||||
|
||||
from thinc.extra.search cimport Beam
|
||||
from thinc.extra.search cimport MaxViolation
|
||||
from thinc.extra.search cimport Beam, MaxViolation
|
||||
from thinc.extra.eg cimport Example
|
||||
from thinc.extra.mb cimport Minibatch
|
||||
|
||||
from ..structs cimport TokenC
|
||||
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..strings cimport StringStore
|
||||
|
||||
from .transition_system cimport TransitionSystem, Transition
|
||||
|
||||
from ..gold cimport GoldParse
|
||||
|
||||
from . import _parse_features
|
||||
from ._parse_features cimport CONTEXT_SIZE
|
||||
from ._parse_features cimport fill_context
|
||||
|
@ -266,4 +250,3 @@ def is_gold(StateClass state, GoldParse gold, StringStore strings):
|
|||
id_, word, tag, head, dep, ner = gold.orig_annot[gold.cand_to_gold[i]]
|
||||
truth.add((id_, head, dep))
|
||||
return truth == predicted
|
||||
|
||||
|
|
|
@ -1,9 +1,14 @@
|
|||
from spacy.parts_of_speech cimport NOUN, PROPN, PRON
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..parts_of_speech cimport NOUN, PROPN, PRON
|
||||
|
||||
|
||||
def english_noun_chunks(obj):
|
||||
'''Detect base noun phrases from a dependency parse.
|
||||
Works on both Doc and Span.'''
|
||||
"""
|
||||
Detect base noun phrases from a dependency parse.
|
||||
Works on both Doc and Span.
|
||||
"""
|
||||
labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
|
||||
'attr', 'ROOT', 'root']
|
||||
doc = obj.doc # Ensure works on both Doc and Span.
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .transition_system cimport Transition
|
||||
from .transition_system cimport do_func_t
|
||||
|
||||
from ..structs cimport TokenC, Entity
|
||||
|
||||
from thinc.typedefs cimport weight_t
|
||||
from ..gold cimport GoldParseC
|
||||
from ..gold cimport GoldParse
|
||||
from ..attrs cimport ENT_TYPE, ENT_IOB
|
||||
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
from .transition_system cimport Transition
|
||||
from .transition_system cimport do_func_t
|
||||
from ..structs cimport TokenC, Entity
|
||||
from ..gold cimport GoldParseC
|
||||
from ..gold cimport GoldParse
|
||||
from ..attrs cimport ENT_TYPE, ENT_IOB
|
||||
|
||||
|
||||
cdef enum:
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from copy import copy
|
||||
|
||||
from ..tokens.doc cimport Doc
|
||||
from spacy.attrs import DEP, HEAD
|
||||
from ..attrs import DEP, HEAD
|
||||
|
||||
|
||||
def ancestors(tokenid, heads):
|
||||
|
@ -201,5 +202,3 @@ class PseudoProjectivity:
|
|||
filtered_sents.append(((ids,words,tags,heads,filtered_labels,iob), ctnts))
|
||||
filtered.append((raw_text, filtered_sents))
|
||||
return filtered
|
||||
|
||||
|
||||
|
|
|
@ -1,56 +1,44 @@
|
|||
# cython: infer_types=True
|
||||
"""
|
||||
MALT-style dependency parser
|
||||
"""
|
||||
# coding: utf-8
|
||||
# cython: infer_types=True
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import Counter
|
||||
import ujson
|
||||
|
||||
cimport cython
|
||||
cimport cython.parallel
|
||||
|
||||
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
from libc.stdint cimport uint32_t, uint64_t
|
||||
from libc.string cimport memset, memcpy
|
||||
from libc.stdlib cimport malloc, calloc, free
|
||||
|
||||
import os.path
|
||||
from collections import Counter
|
||||
from os import path
|
||||
import shutil
|
||||
import json
|
||||
import sys
|
||||
from .nonproj import PseudoProjectivity
|
||||
|
||||
from cymem.cymem cimport Pool, Address
|
||||
from murmurhash.mrmr cimport hash64
|
||||
from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
|
||||
from thinc.linear.avgtron cimport AveragedPerceptron
|
||||
from thinc.linalg cimport VecVec
|
||||
from thinc.structs cimport SparseArrayC
|
||||
from thinc.structs cimport SparseArrayC, FeatureC, ExampleC
|
||||
from thinc.extra.eg cimport Example
|
||||
from cymem.cymem cimport Pool, Address
|
||||
from murmurhash.mrmr cimport hash64
|
||||
from preshed.maps cimport MapStruct
|
||||
from preshed.maps cimport map_get
|
||||
|
||||
from thinc.structs cimport FeatureC
|
||||
from thinc.structs cimport ExampleC
|
||||
from thinc.extra.eg cimport Example
|
||||
|
||||
from util import Config
|
||||
|
||||
from ..structs cimport TokenC
|
||||
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..strings cimport StringStore
|
||||
|
||||
from .transition_system import OracleError
|
||||
from .transition_system cimport TransitionSystem, Transition
|
||||
|
||||
from ..gold cimport GoldParse
|
||||
|
||||
from . import _parse_features
|
||||
from ._parse_features cimport CONTEXT_SIZE
|
||||
from ._parse_features cimport fill_context
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
from .nonproj import PseudoProjectivity
|
||||
from .transition_system import OracleError
|
||||
from .transition_system cimport TransitionSystem, Transition
|
||||
from ..structs cimport TokenC
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..strings cimport StringStore
|
||||
from ..gold cimport GoldParse
|
||||
|
||||
|
||||
USE_FTRL = False
|
||||
DEBUG = False
|
||||
|
@ -80,7 +68,9 @@ cdef class ParserModel(AveragedPerceptron):
|
|||
return nr_feat
|
||||
|
||||
def update(self, Example eg, itn=0):
|
||||
'''Does regression on negative cost. Sort of cute?'''
|
||||
"""
|
||||
Does regression on negative cost. Sort of cute?
|
||||
"""
|
||||
self.time += 1
|
||||
cdef int best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
||||
cdef int guess = eg.guess
|
||||
|
@ -132,10 +122,13 @@ cdef class ParserModel(AveragedPerceptron):
|
|||
|
||||
|
||||
cdef class Parser:
|
||||
"""Base class of the DependencyParser and EntityRecognizer."""
|
||||
"""
|
||||
Base class of the DependencyParser and EntityRecognizer.
|
||||
"""
|
||||
@classmethod
|
||||
def load(cls, path, Vocab vocab, TransitionSystem=None, require=False, **cfg):
|
||||
"""Load the statistical model from the supplied path.
|
||||
"""
|
||||
Load the statistical model from the supplied path.
|
||||
|
||||
Arguments:
|
||||
path (Path):
|
||||
|
@ -148,7 +141,7 @@ cdef class Parser:
|
|||
The newly constructed object.
|
||||
"""
|
||||
with (path / 'config.json').open() as file_:
|
||||
cfg = json.load(file_)
|
||||
cfg = ujson.load(file_)
|
||||
# TODO: remove this shim when we don't have to support older data
|
||||
if 'labels' in cfg and 'actions' not in cfg:
|
||||
cfg['actions'] = cfg.pop('labels')
|
||||
|
@ -168,7 +161,8 @@ cdef class Parser:
|
|||
return self
|
||||
|
||||
def __init__(self, Vocab vocab, TransitionSystem=None, ParserModel model=None, **cfg):
|
||||
"""Create a Parser.
|
||||
"""
|
||||
Create a Parser.
|
||||
|
||||
Arguments:
|
||||
vocab (Vocab):
|
||||
|
@ -198,7 +192,8 @@ cdef class Parser:
|
|||
return (Parser, (self.vocab, self.moves, self.model), None, None)
|
||||
|
||||
def __call__(self, Doc tokens):
|
||||
"""Apply the entity recognizer, setting the annotations onto the Doc object.
|
||||
"""
|
||||
Apply the entity recognizer, setting the annotations onto the Doc object.
|
||||
|
||||
Arguments:
|
||||
doc (Doc): The document to be processed.
|
||||
|
@ -215,7 +210,8 @@ cdef class Parser:
|
|||
self.moves.finalize_doc(tokens)
|
||||
|
||||
def pipe(self, stream, int batch_size=1000, int n_threads=2):
|
||||
"""Process a stream of documents.
|
||||
"""
|
||||
Process a stream of documents.
|
||||
|
||||
Arguments:
|
||||
stream: The sequence of documents to process.
|
||||
|
@ -303,7 +299,8 @@ cdef class Parser:
|
|||
return 0
|
||||
|
||||
def update(self, Doc tokens, GoldParse gold, itn=0):
|
||||
"""Update the statistical model.
|
||||
"""
|
||||
Update the statistical model.
|
||||
|
||||
Arguments:
|
||||
doc (Doc):
|
||||
|
@ -342,7 +339,8 @@ cdef class Parser:
|
|||
return loss
|
||||
|
||||
def step_through(self, Doc doc, GoldParse gold=None):
|
||||
"""Set up a stepwise state, to introspect and control the transition sequence.
|
||||
"""
|
||||
Set up a stepwise state, to introspect and control the transition sequence.
|
||||
|
||||
Arguments:
|
||||
doc (Doc): The document to step through.
|
||||
|
@ -426,7 +424,9 @@ cdef class StepwiseState:
|
|||
|
||||
@property
|
||||
def costs(self):
|
||||
'''Find the action-costs for the current state'''
|
||||
"""
|
||||
Find the action-costs for the current state.
|
||||
"""
|
||||
self.parser.moves.set_costs(self.eg.c.is_valid, self.eg.c.costs,
|
||||
self.stcls, self.gold)
|
||||
costs = {}
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from libc.string cimport memcpy, memset
|
||||
from libc.stdint cimport uint32_t
|
||||
|
||||
from ..vocab cimport EMPTY_LEXEME
|
||||
from ..structs cimport Entity
|
||||
from ..lexeme cimport Lexeme
|
||||
|
@ -28,6 +32,6 @@ cdef class StateClass:
|
|||
top = words[self.S(0)] + '_%d' % self.S_(0).head
|
||||
second = words[self.S(1)] + '_%d' % self.S_(1).head
|
||||
third = words[self.S(2)] + '_%d' % self.S_(2).head
|
||||
n0 = words[self.B(0)]
|
||||
n1 = words[self.B(1)]
|
||||
n0 = words[self.B(0)]
|
||||
n1 = words[self.B(1)]
|
||||
return ' '.join((third, second, top, '|', n0, n1))
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
# cython: infer_types=True
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
|
||||
from cymem.cymem cimport Pool
|
||||
from thinc.typedefs cimport weight_t
|
||||
from collections import defaultdict
|
||||
|
@ -6,7 +10,6 @@ from collections import defaultdict
|
|||
from ..structs cimport TokenC
|
||||
from .stateclass cimport StateClass
|
||||
from ..attrs cimport TAG, HEAD, DEP, ENT_TYPE, ENT_IOB
|
||||
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
|
||||
|
||||
|
||||
cdef weight_t MIN_SCORE = -90000
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
from os import path
|
||||
import json
|
||||
|
||||
class Config(object):
|
||||
def __init__(self, **kwargs):
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def get(self, attr, default=None):
|
||||
return self.__dict__.get(attr, default)
|
||||
|
||||
@classmethod
|
||||
def write(cls, model_dir, name, **kwargs):
|
||||
open(path.join(model_dir, '%s.json' % name), 'w').write(json.dumps(kwargs))
|
||||
|
||||
@classmethod
|
||||
def read(cls, model_dir, name):
|
||||
return cls(**json.load(open(path.join(model_dir, '%s.json' % name))))
|
|
@ -1,15 +1,18 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
cimport cython
|
||||
cimport numpy as np
|
||||
import numpy
|
||||
import numpy.linalg
|
||||
import struct
|
||||
|
||||
from libc.string cimport memcpy, memset
|
||||
from libc.stdint cimport uint32_t
|
||||
from libc.math cimport sqrt
|
||||
|
||||
import numpy
|
||||
import numpy.linalg
|
||||
import struct
|
||||
cimport numpy as np
|
||||
import six
|
||||
import warnings
|
||||
|
||||
from .span cimport Span
|
||||
from .token cimport Token
|
||||
from ..lexeme cimport Lexeme
|
||||
from ..lexeme cimport EMPTY_LEXEME
|
||||
from ..typedefs cimport attr_t, flags_t
|
||||
|
@ -19,11 +22,10 @@ from ..attrs cimport POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
|
|||
from ..parts_of_speech cimport CCONJ, PUNCT, NOUN
|
||||
from ..parts_of_speech cimport univ_pos_t
|
||||
from ..lexeme cimport Lexeme
|
||||
from .span cimport Span
|
||||
from .token cimport Token
|
||||
from ..serialize.bits cimport BitArray
|
||||
from ..util import normalize_slice
|
||||
from ..syntax.iterators import CHUNKERS
|
||||
from ..compat import is_config
|
||||
|
||||
|
||||
DEF PADDING = 5
|
||||
|
@ -76,7 +78,7 @@ cdef class Doc:
|
|||
|
||||
"""
|
||||
def __init__(self, Vocab vocab, words=None, spaces=None, orths_and_spaces=None):
|
||||
'''
|
||||
"""
|
||||
Create a Doc object.
|
||||
|
||||
Aside: Implementation
|
||||
|
@ -97,7 +99,7 @@ cdef class Doc:
|
|||
A list of boolean values, of the same length as words. True
|
||||
means that the word is followed by a space, False means it is not.
|
||||
If None, defaults to [True]*len(words)
|
||||
'''
|
||||
"""
|
||||
self.vocab = vocab
|
||||
size = 20
|
||||
self.mem = Pool()
|
||||
|
@ -158,7 +160,7 @@ cdef class Doc:
|
|||
self.is_parsed = True
|
||||
|
||||
def __getitem__(self, object i):
|
||||
'''
|
||||
"""
|
||||
doc[i]
|
||||
Get the Token object at position i, where i is an integer.
|
||||
Negative indexing is supported, and follows the usual Python
|
||||
|
@ -172,7 +174,7 @@ cdef class Doc:
|
|||
are not supported, as `Span` objects must be contiguous (cannot have gaps).
|
||||
You can use negative indices and open-ended ranges, which have their
|
||||
normal Python semantics.
|
||||
'''
|
||||
"""
|
||||
if isinstance(i, slice):
|
||||
start, stop = normalize_slice(len(self), i.start, i.stop, i.step)
|
||||
return Span(self, start, stop, label=0)
|
||||
|
@ -186,7 +188,7 @@ cdef class Doc:
|
|||
return Token.cinit(self.vocab, &self.c[i], i, self)
|
||||
|
||||
def __iter__(self):
|
||||
'''
|
||||
"""
|
||||
for token in doc
|
||||
Iterate over `Token` objects, from which the annotations can
|
||||
be easily accessed. This is the main way of accessing Token
|
||||
|
@ -194,7 +196,7 @@ cdef class Doc:
|
|||
Python. If faster-than-Python speeds are required, you can
|
||||
instead access the annotations as a numpy array, or access the
|
||||
underlying C data directly from Cython.
|
||||
'''
|
||||
"""
|
||||
cdef int i
|
||||
for i in range(self.length):
|
||||
if self._py_tokens[i] is not None:
|
||||
|
@ -203,10 +205,10 @@ cdef class Doc:
|
|||
yield Token.cinit(self.vocab, &self.c[i], i, self)
|
||||
|
||||
def __len__(self):
|
||||
'''
|
||||
"""
|
||||
len(doc)
|
||||
The number of tokens in the document.
|
||||
'''
|
||||
"""
|
||||
return self.length
|
||||
|
||||
def __unicode__(self):
|
||||
|
@ -216,7 +218,7 @@ cdef class Doc:
|
|||
return u''.join([t.text_with_ws for t in self]).encode('utf-8')
|
||||
|
||||
def __str__(self):
|
||||
if six.PY3:
|
||||
if is_config(python3=True):
|
||||
return self.__unicode__()
|
||||
return self.__bytes__()
|
||||
|
||||
|
@ -228,7 +230,8 @@ cdef class Doc:
|
|||
return self
|
||||
|
||||
def similarity(self, other):
|
||||
'''Make a semantic similarity estimate. The default estimate is cosine
|
||||
"""
|
||||
Make a semantic similarity estimate. The default estimate is cosine
|
||||
similarity using an average of word vectors.
|
||||
|
||||
Arguments:
|
||||
|
@ -237,7 +240,7 @@ cdef class Doc:
|
|||
|
||||
Return:
|
||||
score (float): A scalar similarity score. Higher is more similar.
|
||||
'''
|
||||
"""
|
||||
if 'similarity' in self.user_hooks:
|
||||
return self.user_hooks['similarity'](self, other)
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
|
@ -245,9 +248,9 @@ cdef class Doc:
|
|||
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
|
||||
property has_vector:
|
||||
'''
|
||||
"""
|
||||
A boolean value indicating whether a word vector is associated with the object.
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'has_vector' in self.user_hooks:
|
||||
return self.user_hooks['has_vector'](self)
|
||||
|
@ -255,11 +258,11 @@ cdef class Doc:
|
|||
return any(token.has_vector for token in self)
|
||||
|
||||
property vector:
|
||||
'''
|
||||
"""
|
||||
A real-valued meaning representation. Defaults to an average of the token vectors.
|
||||
|
||||
Type: numpy.ndarray[ndim=1, dtype='float32']
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'vector' in self.user_hooks:
|
||||
return self.user_hooks['vector'](self)
|
||||
|
@ -294,17 +297,21 @@ cdef class Doc:
|
|||
return self.text
|
||||
|
||||
property text:
|
||||
'''A unicode representation of the document text.'''
|
||||
"""
|
||||
A unicode representation of the document text.
|
||||
"""
|
||||
def __get__(self):
|
||||
return u''.join(t.text_with_ws for t in self)
|
||||
|
||||
property text_with_ws:
|
||||
'''An alias of Doc.text, provided for duck-type compatibility with Span and Token.'''
|
||||
"""
|
||||
An alias of Doc.text, provided for duck-type compatibility with Span and Token.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.text
|
||||
|
||||
property ents:
|
||||
'''
|
||||
"""
|
||||
Yields named-entity `Span` objects, if the entity recognizer
|
||||
has been applied to the document. Iterate over the span to get
|
||||
individual Token objects, or access the label:
|
||||
|
@ -318,7 +325,7 @@ cdef class Doc:
|
|||
assert ents[0].label_ == 'PERSON'
|
||||
assert ents[0].orth_ == 'Best'
|
||||
assert ents[0].text == 'Mr. Best'
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef int i
|
||||
cdef const TokenC* token
|
||||
|
@ -382,13 +389,13 @@ cdef class Doc:
|
|||
self.c[start].ent_iob = 3
|
||||
|
||||
property noun_chunks:
|
||||
'''
|
||||
"""
|
||||
Yields base noun-phrase #[code Span] objects, if the document
|
||||
has been syntactically parsed. A base noun phrase, or
|
||||
'NP chunk', is a noun phrase that does not permit other NPs to
|
||||
be nested within it – so no NP-level coordination, no prepositional
|
||||
phrases, and no relative clauses. For example:
|
||||
'''
|
||||
phrases, and no relative clauses.
|
||||
"""
|
||||
def __get__(self):
|
||||
if not self.is_parsed:
|
||||
raise ValueError(
|
||||
|
@ -496,7 +503,8 @@ cdef class Doc:
|
|||
return output
|
||||
|
||||
def count_by(self, attr_id_t attr_id, exclude=None, PreshCounter counts=None):
|
||||
"""Produce a dict of {attribute (int): count (ints)} frequencies, keyed
|
||||
"""
|
||||
Produce a dict of {attribute (int): count (ints)} frequencies, keyed
|
||||
by the values of the given attribute ID.
|
||||
|
||||
Example:
|
||||
|
@ -563,8 +571,9 @@ cdef class Doc:
|
|||
self.c[i] = parsed[i]
|
||||
|
||||
def from_array(self, attrs, array):
|
||||
'''Write to a `Doc` object, from an `(M, N)` array of attributes.
|
||||
'''
|
||||
"""
|
||||
Write to a `Doc` object, from an `(M, N)` array of attributes.
|
||||
"""
|
||||
cdef int i, col
|
||||
cdef attr_id_t attr_id
|
||||
cdef TokenC* tokens = self.c
|
||||
|
@ -603,19 +612,23 @@ cdef class Doc:
|
|||
return self
|
||||
|
||||
def to_bytes(self):
|
||||
'''Serialize, producing a byte string.'''
|
||||
"""
|
||||
Serialize, producing a byte string.
|
||||
"""
|
||||
byte_string = self.vocab.serializer.pack(self)
|
||||
cdef uint32_t length = len(byte_string)
|
||||
return struct.pack('I', length) + byte_string
|
||||
|
||||
def from_bytes(self, data):
|
||||
'''Deserialize, loading from bytes.'''
|
||||
"""
|
||||
Deserialize, loading from bytes.
|
||||
"""
|
||||
self.vocab.serializer.unpack_into(data[4:], self)
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def read_bytes(file_):
|
||||
'''
|
||||
"""
|
||||
A static method, used to read serialized #[code Doc] objects from
|
||||
a file. For example:
|
||||
|
||||
|
@ -630,7 +643,7 @@ cdef class Doc:
|
|||
for byte_string in Doc.read_bytes(file_):
|
||||
docs.append(Doc(nlp.vocab).from_bytes(byte_string))
|
||||
assert len(docs) == 2
|
||||
'''
|
||||
"""
|
||||
keep_reading = True
|
||||
while keep_reading:
|
||||
try:
|
||||
|
@ -644,7 +657,8 @@ cdef class Doc:
|
|||
yield n_bytes_str + data
|
||||
|
||||
def merge(self, int start_idx, int end_idx, *args, **attributes):
|
||||
"""Retokenize the document, such that the span at doc.text[start_idx : end_idx]
|
||||
"""
|
||||
Retokenize the document, such that the span at doc.text[start_idx : end_idx]
|
||||
is merged into a single token. If start_idx and end_idx do not mark start
|
||||
and end token boundaries, the document remains unchanged.
|
||||
|
||||
|
@ -658,7 +672,6 @@ cdef class Doc:
|
|||
token (Token):
|
||||
The newly merged token, or None if the start and end indices did
|
||||
not fall at token boundaries.
|
||||
|
||||
"""
|
||||
cdef unicode tag, lemma, ent_type
|
||||
if len(args) == 3:
|
||||
|
|
|
@ -1,26 +1,31 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
from collections import defaultdict
|
||||
|
||||
cimport numpy as np
|
||||
import numpy
|
||||
import numpy.linalg
|
||||
cimport numpy as np
|
||||
from libc.math cimport sqrt
|
||||
import six
|
||||
|
||||
from .doc cimport token_by_start, token_by_end
|
||||
from ..structs cimport TokenC, LexemeC
|
||||
from ..typedefs cimport flags_t, attr_t, hash_t
|
||||
from ..attrs cimport attr_id_t
|
||||
from ..parts_of_speech cimport univ_pos_t
|
||||
from ..util import normalize_slice
|
||||
from .doc cimport token_by_start, token_by_end
|
||||
from ..attrs cimport IS_PUNCT, IS_SPACE
|
||||
from ..lexeme cimport Lexeme
|
||||
from ..compat import is_config
|
||||
|
||||
|
||||
cdef class Span:
|
||||
"""A slice from a Doc object."""
|
||||
"""
|
||||
A slice from a Doc object.
|
||||
"""
|
||||
def __cinit__(self, Doc doc, int start, int end, int label=0, vector=None,
|
||||
vector_norm=None):
|
||||
'''Create a Span object from the slice doc[start : end]
|
||||
"""
|
||||
Create a Span object from the slice doc[start : end]
|
||||
|
||||
Arguments:
|
||||
doc (Doc): The parent document.
|
||||
|
@ -30,7 +35,7 @@ cdef class Span:
|
|||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
|
||||
Returns:
|
||||
Span The newly constructed object.
|
||||
'''
|
||||
"""
|
||||
if not (0 <= start <= end <= len(doc)):
|
||||
raise IndexError
|
||||
|
||||
|
@ -68,7 +73,7 @@ cdef class Span:
|
|||
return self.end - self.start
|
||||
|
||||
def __repr__(self):
|
||||
if six.PY3:
|
||||
if is_config(python3=True):
|
||||
return self.text
|
||||
return self.text.encode('utf-8')
|
||||
|
||||
|
@ -89,7 +94,8 @@ cdef class Span:
|
|||
yield self.doc[i]
|
||||
|
||||
def merge(self, *args, **attributes):
|
||||
"""Retokenize the document, such that the span is merged into a single token.
|
||||
"""
|
||||
Retokenize the document, such that the span is merged into a single token.
|
||||
|
||||
Arguments:
|
||||
**attributes:
|
||||
|
@ -102,7 +108,8 @@ cdef class Span:
|
|||
return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
|
||||
|
||||
def similarity(self, other):
|
||||
'''Make a semantic similarity estimate. The default estimate is cosine
|
||||
"""
|
||||
Make a semantic similarity estimate. The default estimate is cosine
|
||||
similarity using an average of word vectors.
|
||||
|
||||
Arguments:
|
||||
|
@ -111,7 +118,7 @@ cdef class Span:
|
|||
|
||||
Return:
|
||||
score (float): A scalar similarity score. Higher is more similar.
|
||||
'''
|
||||
"""
|
||||
if 'similarity' in self.doc.user_span_hooks:
|
||||
self.doc.user_span_hooks['similarity'](self, other)
|
||||
if self.vector_norm == 0.0 or other.vector_norm == 0.0:
|
||||
|
@ -133,11 +140,12 @@ cdef class Span:
|
|||
self.end = end + 1
|
||||
|
||||
property sent:
|
||||
'''The sentence span that this span is a part of.
|
||||
"""
|
||||
The sentence span that this span is a part of.
|
||||
|
||||
Returns:
|
||||
Span The sentence this is part of.
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'sent' in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks['sent'](self)
|
||||
|
@ -198,13 +206,13 @@ cdef class Span:
|
|||
return u''.join([t.text_with_ws for t in self])
|
||||
|
||||
property noun_chunks:
|
||||
'''
|
||||
"""
|
||||
Yields base noun-phrase #[code Span] objects, if the document
|
||||
has been syntactically parsed. A base noun phrase, or
|
||||
'NP chunk', is a noun phrase that does not permit other NPs to
|
||||
be nested within it – so no NP-level coordination, no prepositional
|
||||
phrases, and no relative clauses. For example:
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
if not self.doc.is_parsed:
|
||||
raise ValueError(
|
||||
|
@ -223,17 +231,16 @@ cdef class Span:
|
|||
yield span
|
||||
|
||||
property root:
|
||||
"""The token within the span that's highest in the parse tree. If there's a tie, the earlist is prefered.
|
||||
"""
|
||||
The token within the span that's highest in the parse tree. If there's a
|
||||
tie, the earlist is prefered.
|
||||
|
||||
Returns:
|
||||
Token: The root token.
|
||||
|
||||
i.e. has the
|
||||
shortest path to the root of the sentence (or is the root itself).
|
||||
|
||||
If multiple words are equally high in the tree, the first word is taken.
|
||||
|
||||
For example:
|
||||
i.e. has the shortest path to the root of the sentence (or is the root
|
||||
itself). If multiple words are equally high in the tree, the first word
|
||||
is taken. For example:
|
||||
|
||||
>>> toks = nlp(u'I like New York in Autumn.')
|
||||
|
||||
|
@ -303,7 +310,8 @@ cdef class Span:
|
|||
return self.doc[root]
|
||||
|
||||
property lefts:
|
||||
"""Tokens that are to the left of the span, whose head is within the Span.
|
||||
"""
|
||||
Tokens that are to the left of the span, whose head is within the Span.
|
||||
|
||||
Yields: Token A left-child of a token of the span.
|
||||
"""
|
||||
|
@ -314,7 +322,8 @@ cdef class Span:
|
|||
yield left
|
||||
|
||||
property rights:
|
||||
"""Tokens that are to the right of the Span, whose head is within the Span.
|
||||
"""
|
||||
Tokens that are to the right of the Span, whose head is within the Span.
|
||||
|
||||
Yields: Token A right-child of a token of the span.
|
||||
"""
|
||||
|
@ -325,7 +334,8 @@ cdef class Span:
|
|||
yield right
|
||||
|
||||
property subtree:
|
||||
"""Tokens that descend from tokens in the span, but fall outside it.
|
||||
"""
|
||||
Tokens that descend from tokens in the span, but fall outside it.
|
||||
|
||||
Yields: Token A descendant of a token within the span.
|
||||
"""
|
||||
|
@ -337,7 +347,9 @@ cdef class Span:
|
|||
yield from word.subtree
|
||||
|
||||
property ent_id:
|
||||
'''An (integer) entity ID. Usually assigned by patterns in the Matcher.'''
|
||||
"""
|
||||
An (integer) entity ID. Usually assigned by patterns in the Matcher.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.root.ent_id
|
||||
|
||||
|
@ -345,9 +357,11 @@ cdef class Span:
|
|||
# TODO
|
||||
raise NotImplementedError(
|
||||
"Can't yet set ent_id from Span. Vote for this feature on the issue "
|
||||
"tracker: http://github.com/spacy-io/spaCy")
|
||||
"tracker: http://github.com/explosion/spaCy/issues")
|
||||
property ent_id_:
|
||||
'''A (string) entity ID. Usually assigned by patterns in the Matcher.'''
|
||||
"""
|
||||
A (string) entity ID. Usually assigned by patterns in the Matcher.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.root.ent_id_
|
||||
|
||||
|
@ -355,7 +369,7 @@ cdef class Span:
|
|||
# TODO
|
||||
raise NotImplementedError(
|
||||
"Can't yet set ent_id_ from Span. Vote for this feature on the issue "
|
||||
"tracker: http://github.com/spacy-io/spaCy")
|
||||
"tracker: http://github.com/explosion/spaCy/issues")
|
||||
|
||||
property orth_:
|
||||
def __get__(self):
|
||||
|
@ -397,5 +411,5 @@ cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
|||
raise RuntimeError(
|
||||
"Array bounds exceeded while searching for root word. This likely "
|
||||
"means the parse tree is in an invalid state. Please report this "
|
||||
"issue here: http://github.com/honnibal/spaCy/")
|
||||
"issue here: http://github.com/explosion/spaCy/issues")
|
||||
return n
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# coding: utf8
|
||||
# cython: infer_types=True
|
||||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from libc.string cimport memcpy
|
||||
|
@ -8,20 +8,15 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free
|
|||
from cython.view cimport array as cvarray
|
||||
cimport numpy as np
|
||||
np.import_array()
|
||||
|
||||
import numpy
|
||||
import six
|
||||
|
||||
|
||||
from ..typedefs cimport hash_t
|
||||
from ..lexeme cimport Lexeme
|
||||
from .. import parts_of_speech
|
||||
|
||||
from ..attrs cimport LEMMA
|
||||
from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||
from ..attrs cimport POS, LEMMA, TAG, DEP
|
||||
from ..parts_of_speech cimport CCONJ, PUNCT
|
||||
|
||||
from ..attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
|
||||
from ..attrs cimport IS_BRACKET
|
||||
from ..attrs cimport IS_QUOTE
|
||||
|
@ -29,12 +24,13 @@ from ..attrs cimport IS_LEFT_PUNCT
|
|||
from ..attrs cimport IS_RIGHT_PUNCT
|
||||
from ..attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP
|
||||
from ..attrs cimport IS_OOV
|
||||
|
||||
from ..lexeme cimport Lexeme
|
||||
from ..compat import is_config
|
||||
|
||||
|
||||
cdef class Token:
|
||||
"""An individual token --- i.e. a word, punctuation symbol, whitespace, etc.
|
||||
"""
|
||||
An individual token --- i.e. a word, punctuation symbol, whitespace, etc.
|
||||
"""
|
||||
def __cinit__(self, Vocab vocab, Doc doc, int offset):
|
||||
self.vocab = vocab
|
||||
|
@ -46,7 +42,9 @@ cdef class Token:
|
|||
return hash((self.doc, self.i))
|
||||
|
||||
def __len__(self):
|
||||
'''Number of unicode characters in token.text'''
|
||||
"""
|
||||
Number of unicode characters in token.text.
|
||||
"""
|
||||
return self.c.lex.length
|
||||
|
||||
def __unicode__(self):
|
||||
|
@ -56,7 +54,7 @@ cdef class Token:
|
|||
return self.text.encode('utf8')
|
||||
|
||||
def __str__(self):
|
||||
if six.PY3:
|
||||
if is_config(python3=True):
|
||||
return self.__unicode__()
|
||||
return self.__bytes__()
|
||||
|
||||
|
@ -83,27 +81,30 @@ cdef class Token:
|
|||
raise ValueError(op)
|
||||
|
||||
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
||||
'''Check the value of a boolean flag.
|
||||
"""
|
||||
Check the value of a boolean flag.
|
||||
|
||||
Arguments:
|
||||
flag_id (int): The ID of the flag attribute.
|
||||
Returns:
|
||||
is_set (bool): Whether the flag is set.
|
||||
'''
|
||||
"""
|
||||
return Lexeme.c_check_flag(self.c.lex, flag_id)
|
||||
|
||||
def nbor(self, int i=1):
|
||||
'''Get a neighboring token.
|
||||
"""
|
||||
Get a neighboring token.
|
||||
|
||||
Arguments:
|
||||
i (int): The relative position of the token to get. Defaults to 1.
|
||||
Returns:
|
||||
neighbor (Token): The token at position self.doc[self.i+i]
|
||||
'''
|
||||
"""
|
||||
return self.doc[self.i+i]
|
||||
|
||||
def similarity(self, other):
|
||||
'''Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
"""
|
||||
Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
|
||||
Arguments:
|
||||
other:
|
||||
|
@ -111,7 +112,7 @@ cdef class Token:
|
|||
Token and Lexeme objects.
|
||||
Returns:
|
||||
score (float): A scalar similarity score. Higher is more similar.
|
||||
'''
|
||||
"""
|
||||
if 'similarity' in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks['similarity'](self)
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
|
@ -209,9 +210,9 @@ cdef class Token:
|
|||
self.c.dep = label
|
||||
|
||||
property has_vector:
|
||||
'''
|
||||
"""
|
||||
A boolean value indicating whether a word vector is associated with the object.
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'has_vector' in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks['has_vector'](self)
|
||||
|
@ -223,11 +224,11 @@ cdef class Token:
|
|||
return False
|
||||
|
||||
property vector:
|
||||
'''
|
||||
"""
|
||||
A real-valued meaning representation.
|
||||
|
||||
Type: numpy.ndarray[ndim=1, dtype='float32']
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'vector' in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks['vector'](self)
|
||||
|
@ -245,6 +246,7 @@ cdef class Token:
|
|||
property repvec:
|
||||
def __get__(self):
|
||||
raise AttributeError("repvec was renamed to vector in v0.100")
|
||||
|
||||
property has_repvec:
|
||||
def __get__(self):
|
||||
raise AttributeError("has_repvec was renamed to has_vector in v0.100")
|
||||
|
@ -265,7 +267,8 @@ cdef class Token:
|
|||
|
||||
property lefts:
|
||||
def __get__(self):
|
||||
"""The leftward immediate children of the word, in the syntactic
|
||||
"""
|
||||
The leftward immediate children of the word, in the syntactic
|
||||
dependency parse.
|
||||
"""
|
||||
cdef int nr_iter = 0
|
||||
|
@ -282,8 +285,10 @@ cdef class Token:
|
|||
|
||||
property rights:
|
||||
def __get__(self):
|
||||
"""The rightward immediate children of the word, in the syntactic
|
||||
dependency parse."""
|
||||
"""
|
||||
The rightward immediate children of the word, in the syntactic
|
||||
dependency parse.
|
||||
"""
|
||||
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
|
||||
tokens = []
|
||||
cdef int nr_iter = 0
|
||||
|
@ -300,19 +305,21 @@ cdef class Token:
|
|||
yield t
|
||||
|
||||
property children:
|
||||
'''A sequence of the token's immediate syntactic children.
|
||||
"""
|
||||
A sequence of the token's immediate syntactic children.
|
||||
|
||||
Yields: Token A child token such that child.head==self
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
yield from self.lefts
|
||||
yield from self.rights
|
||||
|
||||
property subtree:
|
||||
'''A sequence of all the token's syntactic descendents.
|
||||
"""
|
||||
A sequence of all the token's syntactic descendents.
|
||||
|
||||
Yields: Token A descendent token such that self.is_ancestor(descendent)
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
for word in self.lefts:
|
||||
yield from word.subtree
|
||||
|
@ -321,26 +328,29 @@ cdef class Token:
|
|||
yield from word.subtree
|
||||
|
||||
property left_edge:
|
||||
'''The leftmost token of this token's syntactic descendents.
|
||||
"""
|
||||
The leftmost token of this token's syntactic descendents.
|
||||
|
||||
Returns: Token The first token such that self.is_ancestor(token)
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.doc[self.c.l_edge]
|
||||
|
||||
property right_edge:
|
||||
'''The rightmost token of this token's syntactic descendents.
|
||||
"""
|
||||
The rightmost token of this token's syntactic descendents.
|
||||
|
||||
Returns: Token The last token such that self.is_ancestor(token)
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.doc[self.c.r_edge]
|
||||
|
||||
property ancestors:
|
||||
'''A sequence of this token's syntactic ancestors.
|
||||
"""
|
||||
A sequence of this token's syntactic ancestors.
|
||||
|
||||
Yields: Token A sequence of ancestor tokens such that ancestor.is_ancestor(self)
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef const TokenC* head_ptr = self.c
|
||||
# guard against infinite loop, no token can have
|
||||
|
@ -356,25 +366,29 @@ cdef class Token:
|
|||
return self.is_ancestor(descendant)
|
||||
|
||||
def is_ancestor(self, descendant):
|
||||
'''Check whether this token is a parent, grandparent, etc. of another
|
||||
"""
|
||||
Check whether this token is a parent, grandparent, etc. of another
|
||||
in the dependency tree.
|
||||
|
||||
Arguments:
|
||||
descendant (Token): Another token.
|
||||
Returns:
|
||||
is_ancestor (bool): Whether this token is the ancestor of the descendant.
|
||||
'''
|
||||
"""
|
||||
if self.doc is not descendant.doc:
|
||||
return False
|
||||
return any( ancestor.i == self.i for ancestor in descendant.ancestors )
|
||||
|
||||
property head:
|
||||
'''The syntactic parent, or "governor", of this token.
|
||||
"""
|
||||
The syntactic parent, or "governor", of this token.
|
||||
|
||||
Returns: Token
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
"""The token predicted by the parser to be the head of the current token."""
|
||||
"""
|
||||
The token predicted by the parser to be the head of the current token.
|
||||
"""
|
||||
return self.doc[self.i + self.c.head]
|
||||
def __set__(self, Token new_head):
|
||||
# this function sets the head of self to new_head
|
||||
|
@ -467,10 +481,11 @@ cdef class Token:
|
|||
self.c.head = rel_newhead_i
|
||||
|
||||
property conjuncts:
|
||||
'''A sequence of coordinated tokens, including the token itself.
|
||||
"""
|
||||
A sequence of coordinated tokens, including the token itself.
|
||||
|
||||
Yields: Token A coordinated token
|
||||
'''
|
||||
"""
|
||||
def __get__(self):
|
||||
"""Get a list of conjoined words."""
|
||||
cdef Token word
|
||||
|
@ -501,7 +516,9 @@ cdef class Token:
|
|||
return iob_strings[self.c.ent_iob]
|
||||
|
||||
property ent_id:
|
||||
'''An (integer) entity ID. Usually assigned by patterns in the Matcher.'''
|
||||
"""
|
||||
An (integer) entity ID. Usually assigned by patterns in the Matcher.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.ent_id
|
||||
|
||||
|
@ -509,7 +526,9 @@ cdef class Token:
|
|||
self.c.ent_id = key
|
||||
|
||||
property ent_id_:
|
||||
'''A (string) entity ID. Usually assigned by patterns in the Matcher.'''
|
||||
"""
|
||||
A (string) entity ID. Usually assigned by patterns in the Matcher.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.ent_id]
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user