mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Merge branch 'develop' into docs/model-docstrings
This commit is contained in:
commit
fe29ceec9e
|
@ -6,7 +6,7 @@ requires = [
|
|||
"cymem>=2.0.2,<2.1.0",
|
||||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.0a22,<8.0.0a30",
|
||||
"thinc>=8.0.0a23,<8.0.0a30",
|
||||
"blis>=0.4.0,<0.5.0",
|
||||
"pytokenizations",
|
||||
"smart_open>=2.0.0,<3.0.0"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Our libraries
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a22,<8.0.0a30
|
||||
thinc>=8.0.0a23,<8.0.0a30
|
||||
blis>=0.4.0,<0.5.0
|
||||
ml_datasets>=0.1.1
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
|
|
|
@ -34,13 +34,13 @@ setup_requires =
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
thinc>=8.0.0a22,<8.0.0a30
|
||||
thinc>=8.0.0a23,<8.0.0a30
|
||||
install_requires =
|
||||
# Our libraries
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a22,<8.0.0a30
|
||||
thinc>=8.0.0a23,<8.0.0a30
|
||||
blis>=0.4.0,<0.5.0
|
||||
wasabi>=0.7.1,<1.1.0
|
||||
srsly>=2.1.0,<3.0.0
|
||||
|
|
|
@ -17,23 +17,28 @@ from .. import displacy
|
|||
def evaluate_cli(
|
||||
# fmt: off
|
||||
model: str = Arg(..., help="Model name or path"),
|
||||
data_path: Path = Arg(..., help="Location of JSON-formatted evaluation data", exists=True),
|
||||
data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
|
||||
output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
|
||||
gpu_id: int = Opt(-1, "--gpu-id", "-g", help="Use GPU"),
|
||||
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
|
||||
gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
|
||||
displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
|
||||
displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
|
||||
# fmt: on
|
||||
):
|
||||
"""
|
||||
Evaluate a model. To render a sample of parses in a HTML file, set an
|
||||
output directory as the displacy_path argument.
|
||||
Evaluate a model. Expects a loadable spaCy model and evaluation data in the
|
||||
binary .spacy format. The --gold-preproc option sets up the evaluation
|
||||
examples with gold-standard sentences and tokens for the predictions. Gold
|
||||
preprocessing helps the annotations align to the tokenization, and may
|
||||
result in sequences of more consistent length. However, it may reduce
|
||||
runtime accuracy due to train/test skew. To render a sample of dependency
|
||||
parses in a HTML file, set as output directory as the displacy_path argument.
|
||||
"""
|
||||
evaluate(
|
||||
model,
|
||||
data_path,
|
||||
output=output,
|
||||
gpu_id=gpu_id,
|
||||
use_gpu=use_gpu,
|
||||
gold_preproc=gold_preproc,
|
||||
displacy_path=displacy_path,
|
||||
displacy_limit=displacy_limit,
|
||||
|
@ -45,7 +50,7 @@ def evaluate(
|
|||
model: str,
|
||||
data_path: Path,
|
||||
output: Optional[Path] = None,
|
||||
gpu_id: int = -1,
|
||||
use_gpu: int = -1,
|
||||
gold_preproc: bool = False,
|
||||
displacy_path: Optional[Path] = None,
|
||||
displacy_limit: int = 25,
|
||||
|
@ -53,8 +58,8 @@ def evaluate(
|
|||
) -> Scorer:
|
||||
msg = Printer(no_print=silent, pretty=not silent)
|
||||
fix_random_seed()
|
||||
if gpu_id >= 0:
|
||||
require_gpu(gpu_id)
|
||||
if use_gpu >= 0:
|
||||
require_gpu(use_gpu)
|
||||
util.set_env_log(False)
|
||||
data_path = util.ensure_path(data_path)
|
||||
output_path = util.ensure_path(output)
|
||||
|
|
|
@ -19,9 +19,6 @@ after_pipeline_creation = null
|
|||
[nlp.tokenizer]
|
||||
@tokenizers = "spacy.Tokenizer.v1"
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.Lemmatizer.v1"
|
||||
|
||||
[components]
|
||||
|
||||
# Training hyper-parameters and additional features.
|
||||
|
|
|
@ -510,7 +510,7 @@ class Errors:
|
|||
E952 = ("The section '{name}' is not a valid section in the provided config.")
|
||||
E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
|
||||
E954 = ("The Tok2Vec listener did not receive a valid input.")
|
||||
E955 = ("Can't find table '{table}' for language '{lang}' in spacy-lookups-data.")
|
||||
E955 = ("Can't find table(s) '{table}' for language '{lang}' in spacy-lookups-data.")
|
||||
E956 = ("Can't find component '{name}' in [components] block in the config. "
|
||||
"Available components: {opts}")
|
||||
E957 = ("Writing directly to Language.factories isn't needed anymore in "
|
||||
|
@ -633,6 +633,11 @@ class Errors:
|
|||
E1001 = ("Target token outside of matched span for match with tokens "
|
||||
"'{span}' and offset '{index}' matched by patterns '{patterns}'.")
|
||||
E1002 = ("Span index out of range.")
|
||||
E1003 = ("Unsupported lemmatizer mode '{mode}'.")
|
||||
E1004 = ("Missing lemmatizer table(s) found for lemmatizer mode '{mode}'. "
|
||||
"Required tables '{tables}', found '{found}'. If you are not "
|
||||
"providing custom lookups, make sure you have the package "
|
||||
"spacy-lookups-data installed.")
|
||||
|
||||
|
||||
@add_codes
|
||||
|
|
|
@ -1,38 +1,17 @@
|
|||
from typing import Callable
|
||||
from thinc.api import Config
|
||||
from typing import Optional
|
||||
from thinc.api import Model
|
||||
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .lemmatizer import GreekLemmatizer
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
|
||||
from ...lookups import load_lookups
|
||||
from .lemmatizer import GreekLemmatizer
|
||||
from ...lookups import Lookups
|
||||
from ...language import Language
|
||||
from ...util import registry
|
||||
|
||||
|
||||
DEFAULT_CONFIG = """
|
||||
[nlp]
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.el.GreekLemmatizer"
|
||||
"""
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.el.GreekLemmatizer")
|
||||
def create_lemmatizer() -> Callable[[Language], GreekLemmatizer]:
|
||||
tables = ["lemma_index", "lemma_exc", "lemma_rules"]
|
||||
|
||||
def lemmatizer_factory(nlp: Language) -> GreekLemmatizer:
|
||||
lookups = load_lookups(lang=nlp.lang, tables=tables)
|
||||
return GreekLemmatizer(lookups=lookups)
|
||||
|
||||
return lemmatizer_factory
|
||||
|
||||
|
||||
class GreekDefaults(Language.Defaults):
|
||||
config = Config().from_str(DEFAULT_CONFIG)
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
prefixes = TOKENIZER_PREFIXES
|
||||
suffixes = TOKENIZER_SUFFIXES
|
||||
|
@ -47,4 +26,22 @@ class Greek(Language):
|
|||
Defaults = GreekDefaults
|
||||
|
||||
|
||||
@Greek.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={"model": None, "mode": "rule", "lookups": None},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
):
|
||||
lookups = GreekLemmatizer.load_lookups(nlp.lang, mode, lookups)
|
||||
return GreekLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
|
||||
__all__ = ["Greek"]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from typing import Dict, List
|
||||
from typing import List
|
||||
|
||||
from ...lemmatizer import Lemmatizer
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
||||
|
||||
class GreekLemmatizer(Lemmatizer):
|
||||
|
@ -14,13 +15,27 @@ class GreekLemmatizer(Lemmatizer):
|
|||
not applicable for Greek language.
|
||||
"""
|
||||
|
||||
def lemmatize(
|
||||
self,
|
||||
string: str,
|
||||
index: Dict[str, List[str]],
|
||||
exceptions: Dict[str, Dict[str, List[str]]],
|
||||
rules: Dict[str, List[List[str]]],
|
||||
) -> List[str]:
|
||||
def rule_lemmatize(self, token: Token) -> List[str]:
|
||||
"""Lemmatize using a rule-based approach.
|
||||
|
||||
token (Token): The token to lemmatize.
|
||||
RETURNS (list): The available lemmas for the string.
|
||||
"""
|
||||
cache_key = (token.lower, token.pos)
|
||||
if cache_key in self.cache:
|
||||
return self.cache[cache_key]
|
||||
string = token.text
|
||||
univ_pos = token.pos_.lower()
|
||||
if univ_pos in ("", "eol", "space"):
|
||||
return [string.lower()]
|
||||
|
||||
index_table = self.lookups.get_table("lemma_index", {})
|
||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||
index = index_table.get(univ_pos, {})
|
||||
exceptions = exc_table.get(univ_pos, {})
|
||||
rules = rules_table.get(univ_pos, {})
|
||||
|
||||
string = string.lower()
|
||||
forms = []
|
||||
if string in index:
|
||||
|
@ -42,4 +57,6 @@ class GreekLemmatizer(Lemmatizer):
|
|||
forms.extend(oov_forms)
|
||||
if not forms:
|
||||
forms.append(string)
|
||||
return list(set(forms))
|
||||
forms = list(set(forms))
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
|
|
|
@ -1,39 +1,18 @@
|
|||
from typing import Callable
|
||||
from thinc.api import Config
|
||||
from typing import Optional
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
from .lemmatizer import is_base_form
|
||||
from .punctuation import TOKENIZER_INFIXES
|
||||
from .lemmatizer import EnglishLemmatizer
|
||||
from ...language import Language
|
||||
from ...lemmatizer import Lemmatizer
|
||||
from ...lookups import load_lookups
|
||||
from ...util import registry
|
||||
|
||||
|
||||
DEFAULT_CONFIG = """
|
||||
[nlp]
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.en.EnglishLemmatizer"
|
||||
"""
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.en.EnglishLemmatizer")
|
||||
def create_lemmatizer() -> Callable[[Language], Lemmatizer]:
|
||||
tables = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
|
||||
|
||||
def lemmatizer_factory(nlp: Language) -> Lemmatizer:
|
||||
lookups = load_lookups(lang=nlp.lang, tables=tables)
|
||||
return Lemmatizer(lookups=lookups, is_base_form=is_base_form)
|
||||
|
||||
return lemmatizer_factory
|
||||
from ...lookups import Lookups
|
||||
|
||||
|
||||
class EnglishDefaults(Language.Defaults):
|
||||
config = Config().from_str(DEFAULT_CONFIG)
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
infixes = TOKENIZER_INFIXES
|
||||
lex_attr_getters = LEX_ATTRS
|
||||
|
@ -46,4 +25,22 @@ class English(Language):
|
|||
Defaults = EnglishDefaults
|
||||
|
||||
|
||||
@English.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={"model": None, "mode": "rule", "lookups": None},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
):
|
||||
lookups = EnglishLemmatizer.load_lookups(nlp.lang, mode, lookups)
|
||||
return EnglishLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
|
||||
__all__ = ["English"]
|
||||
|
|
|
@ -1,7 +1,14 @@
|
|||
from typing import Optional
|
||||
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
||||
def is_base_form(univ_pos: str, morphology: Optional[dict] = None) -> bool:
|
||||
|
||||
class EnglishLemmatizer(Lemmatizer):
|
||||
"""English lemmatizer. Only overrides is_base_form.
|
||||
"""
|
||||
|
||||
def is_base_form(self, token: Token) -> bool:
|
||||
"""
|
||||
Check whether we're dealing with an uninflected paradigm, so we can
|
||||
avoid lemmatization entirely.
|
||||
|
@ -10,27 +17,27 @@ def is_base_form(univ_pos: str, morphology: Optional[dict] = None) -> bool:
|
|||
morphology (dict): The token's morphological features following the
|
||||
Universal Dependencies scheme.
|
||||
"""
|
||||
if morphology is None:
|
||||
morphology = {}
|
||||
if univ_pos == "noun" and morphology.get("Number") == "sing":
|
||||
univ_pos = token.pos_.lower()
|
||||
morphology = token.morph.to_dict()
|
||||
if univ_pos == "noun" and morphology.get("Number") == "Sing":
|
||||
return True
|
||||
elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
|
||||
elif univ_pos == "verb" and morphology.get("VerbForm") == "Inf":
|
||||
return True
|
||||
# This maps 'VBP' to base form -- probably just need 'IS_BASE'
|
||||
# morphology
|
||||
elif univ_pos == "verb" and (
|
||||
morphology.get("VerbForm") == "fin"
|
||||
and morphology.get("Tense") == "pres"
|
||||
morphology.get("VerbForm") == "Fin"
|
||||
and morphology.get("Tense") == "Pres"
|
||||
and morphology.get("Number") is None
|
||||
):
|
||||
return True
|
||||
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
|
||||
elif univ_pos == "adj" and morphology.get("Degree") == "Pos":
|
||||
return True
|
||||
elif morphology.get("VerbForm") == "inf":
|
||||
elif morphology.get("VerbForm") == "Inf":
|
||||
return True
|
||||
elif morphology.get("VerbForm") == "none":
|
||||
elif morphology.get("VerbForm") == "None":
|
||||
return True
|
||||
elif morphology.get("Degree") == "pos":
|
||||
elif morphology.get("Degree") == "Pos":
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Callable
|
||||
from thinc.api import Config
|
||||
from typing import Optional
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
|
||||
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
|
||||
|
@ -7,33 +8,12 @@ from .punctuation import TOKENIZER_SUFFIXES
|
|||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
from .lemmatizer import FrenchLemmatizer, is_base_form
|
||||
from ...lookups import load_lookups
|
||||
from .lemmatizer import FrenchLemmatizer
|
||||
from ...lookups import Lookups
|
||||
from ...language import Language
|
||||
from ...util import registry
|
||||
|
||||
|
||||
DEFAULT_CONFIG = """
|
||||
[nlp]
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.fr.FrenchLemmatizer"
|
||||
"""
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.fr.FrenchLemmatizer")
|
||||
def create_lemmatizer() -> Callable[[Language], FrenchLemmatizer]:
|
||||
tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"]
|
||||
|
||||
def lemmatizer_factory(nlp: Language) -> FrenchLemmatizer:
|
||||
lookups = load_lookups(lang=nlp.lang, tables=tables)
|
||||
return FrenchLemmatizer(lookups=lookups, is_base_form=is_base_form)
|
||||
|
||||
return lemmatizer_factory
|
||||
|
||||
|
||||
class FrenchDefaults(Language.Defaults):
|
||||
config = Config().from_str(DEFAULT_CONFIG)
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
prefixes = TOKENIZER_PREFIXES
|
||||
infixes = TOKENIZER_INFIXES
|
||||
|
@ -49,4 +29,22 @@ class French(Language):
|
|||
Defaults = FrenchDefaults
|
||||
|
||||
|
||||
@French.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={"model": None, "mode": "rule", "lookups": None},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
):
|
||||
lookups = FrenchLemmatizer.load_lookups(nlp.lang, mode, lookups)
|
||||
return FrenchLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
|
||||
__all__ = ["French"]
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from typing import Optional, List, Dict
|
||||
from typing import List, Dict
|
||||
|
||||
from ...lemmatizer import Lemmatizer
|
||||
from ...symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP
|
||||
from ...symbols import SCONJ, CCONJ
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
||||
|
||||
class FrenchLemmatizer(Lemmatizer):
|
||||
|
@ -15,65 +14,55 @@ class FrenchLemmatizer(Lemmatizer):
|
|||
the lookup table.
|
||||
"""
|
||||
|
||||
def __call__(
|
||||
self, string: str, univ_pos: str, morphology: Optional[dict] = None
|
||||
) -> List[str]:
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
if "lemma_rules" not in self.lookups:
|
||||
return [lookup_table.get(string, string)]
|
||||
if univ_pos in (NOUN, "NOUN", "noun"):
|
||||
univ_pos = "noun"
|
||||
elif univ_pos in (VERB, "VERB", "verb"):
|
||||
univ_pos = "verb"
|
||||
elif univ_pos in (ADJ, "ADJ", "adj"):
|
||||
univ_pos = "adj"
|
||||
elif univ_pos in (ADP, "ADP", "adp"):
|
||||
univ_pos = "adp"
|
||||
elif univ_pos in (ADV, "ADV", "adv"):
|
||||
univ_pos = "adv"
|
||||
elif univ_pos in (AUX, "AUX", "aux"):
|
||||
univ_pos = "aux"
|
||||
elif univ_pos in (CCONJ, "CCONJ", "cconj"):
|
||||
univ_pos = "cconj"
|
||||
elif univ_pos in (DET, "DET", "det"):
|
||||
univ_pos = "det"
|
||||
elif univ_pos in (PRON, "PRON", "pron"):
|
||||
univ_pos = "pron"
|
||||
elif univ_pos in (PUNCT, "PUNCT", "punct"):
|
||||
univ_pos = "punct"
|
||||
elif univ_pos in (SCONJ, "SCONJ", "sconj"):
|
||||
univ_pos = "sconj"
|
||||
@classmethod
|
||||
def get_lookups_config(cls, mode: str) -> Dict:
|
||||
if mode == "rule":
|
||||
return {
|
||||
"required_tables": [
|
||||
"lemma_lookup",
|
||||
"lemma_rules",
|
||||
"lemma_exc",
|
||||
"lemma_index",
|
||||
],
|
||||
"optional_tables": [],
|
||||
}
|
||||
else:
|
||||
return [self.lookup(string)]
|
||||
return super().get_lookups_config(mode)
|
||||
|
||||
def rule_lemmatize(self, token: Token) -> List[str]:
|
||||
cache_key = (token.orth, token.pos)
|
||||
if cache_key in self.cache:
|
||||
return self.cache[cache_key]
|
||||
string = token.text
|
||||
univ_pos = token.pos_.lower()
|
||||
if univ_pos in ("", "eol", "space"):
|
||||
return [string.lower()]
|
||||
elif "lemma_rules" not in self.lookups or univ_pos not in (
|
||||
"noun",
|
||||
"verb",
|
||||
"adj",
|
||||
"adp",
|
||||
"adv",
|
||||
"aux",
|
||||
"cconj",
|
||||
"det",
|
||||
"pron",
|
||||
"punct",
|
||||
"sconj",
|
||||
):
|
||||
return self.lookup_lemmatize(token)
|
||||
index_table = self.lookups.get_table("lemma_index", {})
|
||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||
lemmas = self.lemmatize(
|
||||
string,
|
||||
index_table.get(univ_pos, {}),
|
||||
exc_table.get(univ_pos, {}),
|
||||
rules_table.get(univ_pos, []),
|
||||
)
|
||||
return lemmas
|
||||
|
||||
def lookup(self, string: str, orth: Optional[int] = None) -> str:
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
if orth is not None and orth in lookup_table:
|
||||
return lookup_table[orth][0]
|
||||
return string
|
||||
|
||||
def lemmatize(
|
||||
self,
|
||||
string: str,
|
||||
index: Dict[str, List[str]],
|
||||
exceptions: Dict[str, Dict[str, List[str]]],
|
||||
rules: Dict[str, List[List[str]]],
|
||||
) -> List[str]:
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
index = index_table.get(univ_pos, {})
|
||||
exceptions = exc_table.get(univ_pos, {})
|
||||
rules = rules_table.get(univ_pos, [])
|
||||
string = string.lower()
|
||||
forms = []
|
||||
if string in index:
|
||||
forms.append(string)
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
forms.extend(exceptions.get(string, []))
|
||||
oov_forms = []
|
||||
|
@ -90,45 +79,9 @@ class FrenchLemmatizer(Lemmatizer):
|
|||
if not forms:
|
||||
forms.extend(oov_forms)
|
||||
if not forms and string in lookup_table.keys():
|
||||
forms.append(lookup_table[string][0])
|
||||
forms.append(self.lookup_lemmatize(token)[0])
|
||||
if not forms:
|
||||
forms.append(string)
|
||||
return list(set(forms))
|
||||
|
||||
|
||||
def is_base_form(univ_pos: str, morphology: Optional[dict] = None) -> bool:
|
||||
"""
|
||||
Check whether we're dealing with an uninflected paradigm, so we can
|
||||
avoid lemmatization entirely.
|
||||
"""
|
||||
morphology = {} if morphology is None else morphology
|
||||
others = [
|
||||
key
|
||||
for key in morphology
|
||||
if key not in (POS, "Number", "POS", "VerbForm", "Tense")
|
||||
]
|
||||
if univ_pos == "noun" and morphology.get("Number") == "sing":
|
||||
return True
|
||||
elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
|
||||
return True
|
||||
# This maps 'VBP' to base form -- probably just need 'IS_BASE'
|
||||
# morphology
|
||||
elif univ_pos == "verb" and (
|
||||
morphology.get("VerbForm") == "fin"
|
||||
and morphology.get("Tense") == "pres"
|
||||
and morphology.get("Number") is None
|
||||
and not others
|
||||
):
|
||||
return True
|
||||
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
|
||||
return True
|
||||
elif "VerbForm=inf" in morphology:
|
||||
return True
|
||||
elif "VerbForm=none" in morphology:
|
||||
return True
|
||||
elif "Number=sing" in morphology:
|
||||
return True
|
||||
elif "Degree=pos" in morphology:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
forms = list(set(forms))
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
|
|
|
@ -38,8 +38,6 @@ def create_tokenizer(split_mode: Optional[str] = None):
|
|||
class JapaneseTokenizer(DummyTokenizer):
|
||||
def __init__(self, nlp: Language, split_mode: Optional[str] = None) -> None:
|
||||
self.vocab = nlp.vocab
|
||||
# TODO: is this the right way to do it?
|
||||
self.vocab.morphology.load_tag_map(TAG_MAP)
|
||||
self.split_mode = split_mode
|
||||
self.tokenizer = try_sudachi_import(self.split_mode)
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ from .lex_attrs import LEX_ATTRS
|
|||
from ...language import Language
|
||||
from ...tokens import Doc
|
||||
from ...compat import copy_reg
|
||||
from ...symbols import POS
|
||||
from ...util import DummyTokenizer, registry
|
||||
|
||||
|
||||
|
@ -29,8 +30,6 @@ def create_tokenizer():
|
|||
class KoreanTokenizer(DummyTokenizer):
|
||||
def __init__(self, nlp: Optional[Language] = None):
|
||||
self.vocab = nlp.vocab
|
||||
# TODO: is this the right way to do it?
|
||||
self.vocab.morphology.load_tag_map(TAG_MAP)
|
||||
MeCab = try_mecab_import()
|
||||
self.mecab_tokenizer = MeCab("-F%f[0],%f[7]")
|
||||
|
||||
|
@ -44,6 +43,7 @@ class KoreanTokenizer(DummyTokenizer):
|
|||
for token, dtoken in zip(doc, dtokens):
|
||||
first_tag, sep, eomi_tags = dtoken["tag"].partition("+")
|
||||
token.tag_ = first_tag # stem(어간) or pre-final(선어말 어미)
|
||||
token.pos = TAG_MAP[token.tag_][POS]
|
||||
token.lemma_ = dtoken["lemma"]
|
||||
doc.user_data["full_tags"] = [dt["tag"] for dt in dtokens]
|
||||
return doc
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Callable
|
||||
from thinc.api import Config
|
||||
from typing import Optional
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
|
@ -7,32 +8,11 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
|||
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
|
||||
from .punctuation import TOKENIZER_SUFFIXES
|
||||
from .lemmatizer import DutchLemmatizer
|
||||
from ...lookups import load_lookups
|
||||
from ...lookups import Lookups
|
||||
from ...language import Language
|
||||
from ...util import registry
|
||||
|
||||
|
||||
DEFAULT_CONFIG = """
|
||||
[nlp]
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.nl.DutchLemmatizer"
|
||||
"""
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.nl.DutchLemmatizer")
|
||||
def create_lemmatizer() -> Callable[[Language], DutchLemmatizer]:
|
||||
tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"]
|
||||
|
||||
def lemmatizer_factory(nlp: Language) -> DutchLemmatizer:
|
||||
lookups = load_lookups(lang=nlp.lang, tables=tables)
|
||||
return DutchLemmatizer(lookups=lookups)
|
||||
|
||||
return lemmatizer_factory
|
||||
|
||||
|
||||
class DutchDefaults(Language.Defaults):
|
||||
config = Config().from_str(DEFAULT_CONFIG)
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
prefixes = TOKENIZER_PREFIXES
|
||||
infixes = TOKENIZER_INFIXES
|
||||
|
@ -46,4 +26,22 @@ class Dutch(Language):
|
|||
Defaults = DutchDefaults
|
||||
|
||||
|
||||
@Dutch.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={"model": None, "mode": "rule", "lookups": None},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
):
|
||||
lookups = DutchLemmatizer.load_lookups(nlp.lang, mode, lookups)
|
||||
return DutchLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
|
||||
__all__ = ["Dutch"]
|
||||
|
|
|
@ -1,44 +1,34 @@
|
|||
from typing import Optional, List, Dict, Tuple
|
||||
from typing import List, Dict
|
||||
|
||||
from ...lemmatizer import Lemmatizer
|
||||
from ...symbols import NOUN, VERB, ADJ, NUM, DET, PRON, ADP, AUX, ADV
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
||||
|
||||
class DutchLemmatizer(Lemmatizer):
|
||||
# Note: CGN does not distinguish AUX verbs, so we treat AUX as VERB.
|
||||
univ_pos_name_variants = {
|
||||
NOUN: "noun",
|
||||
"NOUN": "noun",
|
||||
"noun": "noun",
|
||||
VERB: "verb",
|
||||
"VERB": "verb",
|
||||
"verb": "verb",
|
||||
AUX: "verb",
|
||||
"AUX": "verb",
|
||||
"aux": "verb",
|
||||
ADJ: "adj",
|
||||
"ADJ": "adj",
|
||||
"adj": "adj",
|
||||
ADV: "adv",
|
||||
"ADV": "adv",
|
||||
"adv": "adv",
|
||||
PRON: "pron",
|
||||
"PRON": "pron",
|
||||
"pron": "pron",
|
||||
DET: "det",
|
||||
"DET": "det",
|
||||
"det": "det",
|
||||
ADP: "adp",
|
||||
"ADP": "adp",
|
||||
"adp": "adp",
|
||||
NUM: "num",
|
||||
"NUM": "num",
|
||||
"num": "num",
|
||||
@classmethod
|
||||
def get_lookups_config(cls, mode: str) -> Dict:
|
||||
if mode == "rule":
|
||||
return {
|
||||
"required_tables": [
|
||||
"lemma_lookup",
|
||||
"lemma_rules",
|
||||
"lemma_exc",
|
||||
"lemma_index",
|
||||
],
|
||||
}
|
||||
else:
|
||||
return super().get_lookups_config(mode)
|
||||
|
||||
def __call__(
|
||||
self, string: str, univ_pos: str, morphology: Optional[dict] = None
|
||||
) -> List[str]:
|
||||
def lookup_lemmatize(self, token: Token) -> List[str]:
|
||||
"""Overrides parent method so that a lowercased version of the string
|
||||
is used to search the lookup table. This is necessary because our
|
||||
lookup table consists entirely of lowercase keys."""
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
string = token.text.lower()
|
||||
return [lookup_table.get(string, string)]
|
||||
|
||||
# Note: CGN does not distinguish AUX verbs, so we treat AUX as VERB.
|
||||
def rule_lemmatize(self, token: Token) -> List[str]:
|
||||
# Difference 1: self.rules is assumed to be non-None, so no
|
||||
# 'is None' check required.
|
||||
# String lowercased from the get-go. All lemmatization results in
|
||||
|
@ -46,74 +36,61 @@ class DutchLemmatizer(Lemmatizer):
|
|||
# any problems, and it keeps the exceptions indexes small. If this
|
||||
# creates problems for proper nouns, we can introduce a check for
|
||||
# univ_pos == "PROPN".
|
||||
string = string.lower()
|
||||
try:
|
||||
univ_pos = self.univ_pos_name_variants[univ_pos]
|
||||
except KeyError:
|
||||
# Because PROPN not in self.univ_pos_name_variants, proper names
|
||||
# are not lemmatized. They are lowercased, however.
|
||||
return [string]
|
||||
# if string in self.lemma_index.get(univ_pos)
|
||||
cache_key = (token.lower, token.pos)
|
||||
if cache_key in self.cache:
|
||||
return self.cache[cache_key]
|
||||
string = token.text
|
||||
univ_pos = token.pos_.lower()
|
||||
if univ_pos in ("", "eol", "space"):
|
||||
forms = [string.lower()]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
|
||||
index_table = self.lookups.get_table("lemma_index", {})
|
||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||
index = index_table.get(univ_pos, {})
|
||||
exceptions = exc_table.get(univ_pos, {})
|
||||
rules = rules_table.get(univ_pos, {})
|
||||
|
||||
string = string.lower()
|
||||
if univ_pos not in (
|
||||
"noun",
|
||||
"verb",
|
||||
"aux",
|
||||
"adj",
|
||||
"adv",
|
||||
"pron",
|
||||
"det",
|
||||
"adp",
|
||||
"num",
|
||||
):
|
||||
forms = [string]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
lemma_index = index_table.get(univ_pos, {})
|
||||
# string is already lemma
|
||||
if string in lemma_index:
|
||||
return [string]
|
||||
forms = [string]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||
exceptions = exc_table.get(univ_pos, {})
|
||||
# string is irregular token contained in exceptions index.
|
||||
try:
|
||||
lemma = exceptions[string]
|
||||
return [lemma[0]]
|
||||
forms = [exceptions[string][0]]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
except KeyError:
|
||||
pass
|
||||
# string corresponds to key in lookup table
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
looked_up_lemma = lookup_table.get(string)
|
||||
if looked_up_lemma and looked_up_lemma in lemma_index:
|
||||
return [looked_up_lemma]
|
||||
forms = [looked_up_lemma]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||
forms, is_known = self.lemmatize(
|
||||
string, lemma_index, exceptions, rules_table.get(univ_pos, [])
|
||||
)
|
||||
# Back-off through remaining return value candidates.
|
||||
if forms:
|
||||
if is_known:
|
||||
return forms
|
||||
else:
|
||||
for form in forms:
|
||||
if form in exceptions:
|
||||
return [form]
|
||||
if looked_up_lemma:
|
||||
return [looked_up_lemma]
|
||||
else:
|
||||
return forms
|
||||
elif looked_up_lemma:
|
||||
return [looked_up_lemma]
|
||||
else:
|
||||
return [string]
|
||||
|
||||
# Overrides parent method so that a lowercased version of the string is
|
||||
# used to search the lookup table. This is necessary because our lookup
|
||||
# table consists entirely of lowercase keys.
|
||||
def lookup(self, string: str, orth: Optional[int] = None) -> str:
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
string = string.lower()
|
||||
if orth is not None:
|
||||
return lookup_table.get(orth, string)
|
||||
else:
|
||||
return lookup_table.get(string, string)
|
||||
|
||||
# Reimplemented to focus more on application of suffix rules and to return
|
||||
# as early as possible.
|
||||
def lemmatize(
|
||||
self,
|
||||
string: str,
|
||||
index: Dict[str, List[str]],
|
||||
exceptions: Dict[str, Dict[str, List[str]]],
|
||||
rules: Dict[str, List[List[str]]],
|
||||
) -> Tuple[List[str], bool]:
|
||||
# returns (forms, is_known: bool)
|
||||
oov_forms = []
|
||||
for old, new in rules:
|
||||
if string.endswith(old):
|
||||
|
@ -121,7 +98,31 @@ class DutchLemmatizer(Lemmatizer):
|
|||
if not form:
|
||||
pass
|
||||
elif form in index:
|
||||
return [form], True # True = Is known (is lemma)
|
||||
forms = [form]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
else:
|
||||
oov_forms.append(form)
|
||||
return list(set(oov_forms)), False
|
||||
forms = list(set(oov_forms))
|
||||
# Back-off through remaining return value candidates.
|
||||
if forms:
|
||||
for form in forms:
|
||||
if form in exceptions:
|
||||
forms = [form]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
if looked_up_lemma:
|
||||
forms = [looked_up_lemma]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
else:
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
elif looked_up_lemma:
|
||||
forms = [looked_up_lemma]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
else:
|
||||
forms = [string]
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Callable
|
||||
from thinc.api import Config
|
||||
from typing import Optional
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
|
||||
from .punctuation import TOKENIZER_SUFFIXES
|
||||
|
@ -7,42 +8,16 @@ from .stop_words import STOP_WORDS
|
|||
from .lex_attrs import LEX_ATTRS
|
||||
from .lemmatizer import PolishLemmatizer
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
from ...lookups import load_lookups
|
||||
from ...lookups import Lookups
|
||||
from ...language import Language
|
||||
from ...util import registry
|
||||
|
||||
|
||||
DEFAULT_CONFIG = """
|
||||
[nlp]
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.pl.PolishLemmatizer"
|
||||
"""
|
||||
|
||||
TOKENIZER_EXCEPTIONS = {
|
||||
exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
|
||||
}
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.pl.PolishLemmatizer")
|
||||
def create_lemmatizer() -> Callable[[Language], PolishLemmatizer]:
|
||||
# fmt: off
|
||||
tables = [
|
||||
"lemma_lookup_adj", "lemma_lookup_adp", "lemma_lookup_adv",
|
||||
"lemma_lookup_aux", "lemma_lookup_noun", "lemma_lookup_num",
|
||||
"lemma_lookup_part", "lemma_lookup_pron", "lemma_lookup_verb"
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
def lemmatizer_factory(nlp: Language) -> PolishLemmatizer:
|
||||
lookups = load_lookups(lang=nlp.lang, tables=tables)
|
||||
return PolishLemmatizer(lookups=lookups)
|
||||
|
||||
return lemmatizer_factory
|
||||
|
||||
|
||||
class PolishDefaults(Language.Defaults):
|
||||
config = Config().from_str(DEFAULT_CONFIG)
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
prefixes = TOKENIZER_PREFIXES
|
||||
infixes = TOKENIZER_INFIXES
|
||||
|
@ -56,4 +31,22 @@ class Polish(Language):
|
|||
Defaults = PolishDefaults
|
||||
|
||||
|
||||
@Polish.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={"model": None, "mode": "lookup", "lookups": None},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
):
|
||||
lookups = PolishLemmatizer.load_lookups(nlp.lang, mode, lookups)
|
||||
return PolishLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
|
||||
__all__ = ["Polish"]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Optional, List, Dict
|
||||
from typing import List, Dict
|
||||
|
||||
from ...lemmatizer import Lemmatizer
|
||||
from ...parts_of_speech import NAMES
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...tokens import Token
|
||||
|
||||
|
||||
class PolishLemmatizer(Lemmatizer):
|
||||
|
@ -9,12 +9,30 @@ class PolishLemmatizer(Lemmatizer):
|
|||
# dictionary (morfeusz.sgjp.pl/en) by Institute of Computer Science PAS.
|
||||
# It utilizes some prefix based improvements for verb and adjectives
|
||||
# lemmatization, as well as case-sensitive lemmatization for nouns.
|
||||
def __call__(
|
||||
self, string: str, univ_pos: str, morphology: Optional[dict] = None
|
||||
) -> List[str]:
|
||||
if isinstance(univ_pos, int):
|
||||
univ_pos = NAMES.get(univ_pos, "X")
|
||||
univ_pos = univ_pos.upper()
|
||||
|
||||
@classmethod
|
||||
def get_lookups_config(cls, mode: str) -> Dict:
|
||||
if mode == "lookup":
|
||||
return {
|
||||
"required_tables": [
|
||||
"lemma_lookup_adj",
|
||||
"lemma_lookup_adp",
|
||||
"lemma_lookup_adv",
|
||||
"lemma_lookup_aux",
|
||||
"lemma_lookup_noun",
|
||||
"lemma_lookup_num",
|
||||
"lemma_lookup_part",
|
||||
"lemma_lookup_pron",
|
||||
"lemma_lookup_verb",
|
||||
]
|
||||
}
|
||||
else:
|
||||
return super().get_lookups_config(mode)
|
||||
|
||||
def lookup_lemmatize(self, token: Token) -> List[str]:
|
||||
string = token.text
|
||||
univ_pos = token.pos_
|
||||
morphology = token.morph.to_dict()
|
||||
lookup_pos = univ_pos.lower()
|
||||
if univ_pos == "PROPN":
|
||||
lookup_pos = "noun"
|
||||
|
@ -71,15 +89,3 @@ class PolishLemmatizer(Lemmatizer):
|
|||
return [lookup_table[string]]
|
||||
return [string.lower()]
|
||||
return [lookup_table.get(string, string)]
|
||||
|
||||
def lookup(self, string: str, orth: Optional[int] = None) -> str:
|
||||
return string.lower()
|
||||
|
||||
def lemmatize(
|
||||
self,
|
||||
string: str,
|
||||
index: Dict[str, List[str]],
|
||||
exceptions: Dict[str, Dict[str, List[str]]],
|
||||
rules: Dict[str, List[List[str]]],
|
||||
) -> List[str]:
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -1,32 +1,16 @@
|
|||
from typing import Callable
|
||||
from thinc.api import Config
|
||||
from typing import Optional
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from .stop_words import STOP_WORDS
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .lemmatizer import RussianLemmatizer
|
||||
from ...util import registry
|
||||
from ...language import Language
|
||||
|
||||
|
||||
DEFAULT_CONFIG = """
|
||||
[nlp]
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.ru.RussianLemmatizer"
|
||||
"""
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.ru.RussianLemmatizer")
|
||||
def create_lemmatizer() -> Callable[[Language], RussianLemmatizer]:
|
||||
def lemmatizer_factory(nlp: Language) -> RussianLemmatizer:
|
||||
return RussianLemmatizer()
|
||||
|
||||
return lemmatizer_factory
|
||||
from ...lookups import Lookups
|
||||
|
||||
|
||||
class RussianDefaults(Language.Defaults):
|
||||
config = Config().from_str(DEFAULT_CONFIG)
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
lex_attr_getters = LEX_ATTRS
|
||||
stop_words = STOP_WORDS
|
||||
|
@ -37,4 +21,21 @@ class Russian(Language):
|
|||
Defaults = RussianDefaults
|
||||
|
||||
|
||||
@Russian.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={"model": None, "mode": "pymorphy2", "lookups": None},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
):
|
||||
return RussianLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
|
||||
__all__ = ["Russian"]
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
from typing import Optional, Tuple, Dict, List
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from ...symbols import ADJ, DET, NOUN, NUM, PRON, PROPN, PUNCT, VERB, POS
|
||||
from ...lemmatizer import Lemmatizer
|
||||
from ...lookups import Lookups
|
||||
from ...pipeline import Lemmatizer
|
||||
from ...symbols import POS
|
||||
from ...tokens import Token
|
||||
from ...vocab import Vocab
|
||||
|
||||
|
||||
PUNCT_RULES = {"«": '"', "»": '"'}
|
||||
|
@ -11,8 +15,17 @@ PUNCT_RULES = {"«": '"', "»": '"'}
|
|||
class RussianLemmatizer(Lemmatizer):
|
||||
_morph = None
|
||||
|
||||
def __init__(self, lookups: Optional[Lookups] = None) -> None:
|
||||
super(RussianLemmatizer, self).__init__(lookups)
|
||||
def __init__(
|
||||
self,
|
||||
vocab: Vocab,
|
||||
model: Optional[Model],
|
||||
name: str = "lemmatizer",
|
||||
*,
|
||||
mode: str = "pymorphy2",
|
||||
lookups: Optional[Lookups] = None,
|
||||
) -> None:
|
||||
super().__init__(vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
try:
|
||||
from pymorphy2 import MorphAnalyzer
|
||||
except ImportError:
|
||||
|
@ -25,10 +38,10 @@ class RussianLemmatizer(Lemmatizer):
|
|||
if RussianLemmatizer._morph is None:
|
||||
RussianLemmatizer._morph = MorphAnalyzer()
|
||||
|
||||
def __call__(
|
||||
self, string: str, univ_pos: str, morphology: Optional[dict] = None
|
||||
) -> List[str]:
|
||||
univ_pos = self.normalize_univ_pos(univ_pos)
|
||||
def pymorphy2_lemmatize(self, token: Token) -> List[str]:
|
||||
string = token.text
|
||||
univ_pos = token.pos_
|
||||
morphology = token.morph.to_dict()
|
||||
if univ_pos == "PUNCT":
|
||||
return [PUNCT_RULES.get(string, string)]
|
||||
if univ_pos not in ("ADJ", "DET", "NOUN", "NUM", "PRON", "PROPN", "VERB"):
|
||||
|
@ -81,25 +94,8 @@ class RussianLemmatizer(Lemmatizer):
|
|||
return [string.lower()]
|
||||
return list(set([analysis.normal_form for analysis in filtered_analyses]))
|
||||
|
||||
@staticmethod
|
||||
def normalize_univ_pos(univ_pos: str) -> Optional[str]:
|
||||
if isinstance(univ_pos, str):
|
||||
return univ_pos.upper()
|
||||
symbols_to_str = {
|
||||
ADJ: "ADJ",
|
||||
DET: "DET",
|
||||
NOUN: "NOUN",
|
||||
NUM: "NUM",
|
||||
PRON: "PRON",
|
||||
PROPN: "PROPN",
|
||||
PUNCT: "PUNCT",
|
||||
VERB: "VERB",
|
||||
}
|
||||
if univ_pos in symbols_to_str:
|
||||
return symbols_to_str[univ_pos]
|
||||
return None
|
||||
|
||||
def lookup(self, string: str, orth: Optional[int] = None) -> str:
|
||||
def lookup_lemmatize(self, token: Token) -> List[str]:
|
||||
string = token.text
|
||||
analyses = self._morph.parse(string)
|
||||
if len(analyses) == 1:
|
||||
return analyses[0].normal_form
|
||||
|
|
|
@ -1,32 +1,16 @@
|
|||
from typing import Callable
|
||||
from thinc.api import Config
|
||||
from typing import Optional
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from ...util import registry
|
||||
from ...language import Language
|
||||
from .lemmatizer import UkrainianLemmatizer
|
||||
|
||||
|
||||
DEFAULT_CONFIG = """
|
||||
[nlp]
|
||||
|
||||
[nlp.lemmatizer]
|
||||
@lemmatizers = "spacy.uk.UkrainianLemmatizer"
|
||||
"""
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.uk.UkrainianLemmatizer")
|
||||
def create_ukrainian_lemmatizer() -> Callable[[Language], UkrainianLemmatizer]:
|
||||
def lemmatizer_factory(nlp: Language) -> UkrainianLemmatizer:
|
||||
return UkrainianLemmatizer()
|
||||
|
||||
return lemmatizer_factory
|
||||
from ...language import Language
|
||||
from ...lookups import Lookups
|
||||
|
||||
|
||||
class UkrainianDefaults(Language.Defaults):
|
||||
config = Config().from_str(DEFAULT_CONFIG)
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
lex_attr_getters = LEX_ATTRS
|
||||
stop_words = STOP_WORDS
|
||||
|
@ -37,4 +21,21 @@ class Ukrainian(Language):
|
|||
Defaults = UkrainianDefaults
|
||||
|
||||
|
||||
@Ukrainian.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={"model": None, "mode": "pymorphy2", "lookups": None},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
):
|
||||
return UkrainianLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
|
||||
|
||||
|
||||
__all__ = ["Ukrainian"]
|
||||
|
|
|
@ -1,187 +1,30 @@
|
|||
from typing import Optional, List, Tuple, Dict
|
||||
from typing import Optional
|
||||
|
||||
from ...symbols import ADJ, DET, NOUN, NUM, PRON, PROPN, PUNCT, VERB, POS
|
||||
from thinc.api import Model
|
||||
|
||||
from ..ru.lemmatizer import RussianLemmatizer
|
||||
from ...lookups import Lookups
|
||||
from ...lemmatizer import Lemmatizer
|
||||
from ...vocab import Vocab
|
||||
|
||||
|
||||
PUNCT_RULES = {"«": '"', "»": '"'}
|
||||
|
||||
|
||||
class UkrainianLemmatizer(Lemmatizer):
|
||||
_morph = None
|
||||
|
||||
def __init__(self, lookups: Optional[Lookups] = None) -> None:
|
||||
super(UkrainianLemmatizer, self).__init__(lookups)
|
||||
class UkrainianLemmatizer(RussianLemmatizer):
|
||||
def __init__(
|
||||
self,
|
||||
vocab: Vocab,
|
||||
model: Optional[Model],
|
||||
name: str = "lemmatizer",
|
||||
*,
|
||||
mode: str = "pymorphy2",
|
||||
lookups: Optional[Lookups] = None,
|
||||
) -> None:
|
||||
super().__init__(vocab, model, name, mode=mode, lookups=lookups)
|
||||
try:
|
||||
from pymorphy2 import MorphAnalyzer
|
||||
|
||||
if UkrainianLemmatizer._morph is None:
|
||||
UkrainianLemmatizer._morph = MorphAnalyzer(lang="uk")
|
||||
except (ImportError, TypeError):
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The Ukrainian lemmatizer requires the pymorphy2 library and "
|
||||
'dictionaries: try to fix it with "pip uninstall pymorphy2" and'
|
||||
'"pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"'
|
||||
) from None
|
||||
|
||||
def __call__(
|
||||
self, string: str, univ_pos: str, morphology: Optional[dict] = None
|
||||
) -> List[str]:
|
||||
univ_pos = self.normalize_univ_pos(univ_pos)
|
||||
if univ_pos == "PUNCT":
|
||||
return [PUNCT_RULES.get(string, string)]
|
||||
if univ_pos not in ("ADJ", "DET", "NOUN", "NUM", "PRON", "PROPN", "VERB"):
|
||||
# Skip unchangeable pos
|
||||
return [string.lower()]
|
||||
analyses = self._morph.parse(string)
|
||||
filtered_analyses = []
|
||||
for analysis in analyses:
|
||||
if not analysis.is_known:
|
||||
# Skip suggested parse variant for unknown word for pymorphy
|
||||
continue
|
||||
analysis_pos, _ = oc2ud(str(analysis.tag))
|
||||
if analysis_pos == univ_pos or (
|
||||
analysis_pos in ("NOUN", "PROPN") and univ_pos in ("NOUN", "PROPN")
|
||||
):
|
||||
filtered_analyses.append(analysis)
|
||||
if not len(filtered_analyses):
|
||||
return [string.lower()]
|
||||
if morphology is None or (len(morphology) == 1 and POS in morphology):
|
||||
return list(set([analysis.normal_form for analysis in filtered_analyses]))
|
||||
if univ_pos in ("ADJ", "DET", "NOUN", "PROPN"):
|
||||
features_to_compare = ["Case", "Number", "Gender"]
|
||||
elif univ_pos == "NUM":
|
||||
features_to_compare = ["Case", "Gender"]
|
||||
elif univ_pos == "PRON":
|
||||
features_to_compare = ["Case", "Number", "Gender", "Person"]
|
||||
else: # VERB
|
||||
features_to_compare = [
|
||||
"Aspect",
|
||||
"Gender",
|
||||
"Mood",
|
||||
"Number",
|
||||
"Tense",
|
||||
"VerbForm",
|
||||
"Voice",
|
||||
]
|
||||
analyses, filtered_analyses = filtered_analyses, []
|
||||
for analysis in analyses:
|
||||
_, analysis_morph = oc2ud(str(analysis.tag))
|
||||
for feature in features_to_compare:
|
||||
if (
|
||||
feature in morphology
|
||||
and feature in analysis_morph
|
||||
and morphology[feature].lower() != analysis_morph[feature].lower()
|
||||
):
|
||||
break
|
||||
else:
|
||||
filtered_analyses.append(analysis)
|
||||
if not len(filtered_analyses):
|
||||
return [string.lower()]
|
||||
return list(set([analysis.normal_form for analysis in filtered_analyses]))
|
||||
|
||||
@staticmethod
|
||||
def normalize_univ_pos(univ_pos: str) -> Optional[str]:
|
||||
if isinstance(univ_pos, str):
|
||||
return univ_pos.upper()
|
||||
symbols_to_str = {
|
||||
ADJ: "ADJ",
|
||||
DET: "DET",
|
||||
NOUN: "NOUN",
|
||||
NUM: "NUM",
|
||||
PRON: "PRON",
|
||||
PROPN: "PROPN",
|
||||
PUNCT: "PUNCT",
|
||||
VERB: "VERB",
|
||||
}
|
||||
if univ_pos in symbols_to_str:
|
||||
return symbols_to_str[univ_pos]
|
||||
return None
|
||||
|
||||
def lookup(self, string: str, orth: Optional[int] = None) -> str:
|
||||
analyses = self._morph.parse(string)
|
||||
if len(analyses) == 1:
|
||||
return analyses[0].normal_form
|
||||
return string
|
||||
|
||||
|
||||
def oc2ud(oc_tag: str) -> Tuple[str, Dict[str, str]]:
|
||||
gram_map = {
|
||||
"_POS": {
|
||||
"ADJF": "ADJ",
|
||||
"ADJS": "ADJ",
|
||||
"ADVB": "ADV",
|
||||
"Apro": "DET",
|
||||
"COMP": "ADJ", # Can also be an ADV - unchangeable
|
||||
"CONJ": "CCONJ", # Can also be a SCONJ - both unchangeable ones
|
||||
"GRND": "VERB",
|
||||
"INFN": "VERB",
|
||||
"INTJ": "INTJ",
|
||||
"NOUN": "NOUN",
|
||||
"NPRO": "PRON",
|
||||
"NUMR": "NUM",
|
||||
"NUMB": "NUM",
|
||||
"PNCT": "PUNCT",
|
||||
"PRCL": "PART",
|
||||
"PREP": "ADP",
|
||||
"PRTF": "VERB",
|
||||
"PRTS": "VERB",
|
||||
"VERB": "VERB",
|
||||
},
|
||||
"Animacy": {"anim": "Anim", "inan": "Inan"},
|
||||
"Aspect": {"impf": "Imp", "perf": "Perf"},
|
||||
"Case": {
|
||||
"ablt": "Ins",
|
||||
"accs": "Acc",
|
||||
"datv": "Dat",
|
||||
"gen1": "Gen",
|
||||
"gen2": "Gen",
|
||||
"gent": "Gen",
|
||||
"loc2": "Loc",
|
||||
"loct": "Loc",
|
||||
"nomn": "Nom",
|
||||
"voct": "Voc",
|
||||
},
|
||||
"Degree": {"COMP": "Cmp", "Supr": "Sup"},
|
||||
"Gender": {"femn": "Fem", "masc": "Masc", "neut": "Neut"},
|
||||
"Mood": {"impr": "Imp", "indc": "Ind"},
|
||||
"Number": {"plur": "Plur", "sing": "Sing"},
|
||||
"NumForm": {"NUMB": "Digit"},
|
||||
"Person": {"1per": "1", "2per": "2", "3per": "3", "excl": "2", "incl": "1"},
|
||||
"Tense": {"futr": "Fut", "past": "Past", "pres": "Pres"},
|
||||
"Variant": {"ADJS": "Brev", "PRTS": "Brev"},
|
||||
"VerbForm": {
|
||||
"GRND": "Conv",
|
||||
"INFN": "Inf",
|
||||
"PRTF": "Part",
|
||||
"PRTS": "Part",
|
||||
"VERB": "Fin",
|
||||
},
|
||||
"Voice": {"actv": "Act", "pssv": "Pass"},
|
||||
"Abbr": {"Abbr": "Yes"},
|
||||
}
|
||||
pos = "X"
|
||||
morphology = dict()
|
||||
unmatched = set()
|
||||
grams = oc_tag.replace(" ", ",").split(",")
|
||||
for gram in grams:
|
||||
match = False
|
||||
for categ, gmap in sorted(gram_map.items()):
|
||||
if gram in gmap:
|
||||
match = True
|
||||
if categ == "_POS":
|
||||
pos = gmap[gram]
|
||||
else:
|
||||
morphology[categ] = gmap[gram]
|
||||
if not match:
|
||||
unmatched.add(gram)
|
||||
while len(unmatched) > 0:
|
||||
gram = unmatched.pop()
|
||||
if gram in ("Name", "Patr", "Surn", "Geox", "Orgn"):
|
||||
pos = "PROPN"
|
||||
elif gram == "Auxt":
|
||||
pos = "AUX"
|
||||
elif gram == "Pltm":
|
||||
morphology["Number"] = "Ptan"
|
||||
return pos, morphology
|
||||
if UkrainianLemmatizer._morph is None:
|
||||
UkrainianLemmatizer._morph = MorphAnalyzer(lang="uk")
|
||||
|
|
|
@ -29,7 +29,6 @@ from .lang.punctuation import TOKENIZER_INFIXES
|
|||
from .tokens import Doc
|
||||
from .lookups import load_lookups
|
||||
from .tokenizer import Tokenizer
|
||||
from .lemmatizer import Lemmatizer
|
||||
from .errors import Errors, Warnings
|
||||
from .schemas import ConfigSchema
|
||||
from .git_info import GIT_VERSION
|
||||
|
@ -87,22 +86,6 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
|
|||
return tokenizer_factory
|
||||
|
||||
|
||||
@registry.lemmatizers("spacy.Lemmatizer.v1")
|
||||
def create_lemmatizer() -> Callable[["Language"], "Lemmatizer"]:
|
||||
"""Registered function to create a lemmatizer. Returns a factory that takes
|
||||
the nlp object and returns a Lemmatizer instance with data loaded in from
|
||||
spacy-lookups-data, if the package is installed.
|
||||
"""
|
||||
# TODO: Will be replaced when the lemmatizer becomes a pipeline component
|
||||
tables = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
|
||||
|
||||
def lemmatizer_factory(nlp: "Language") -> "Lemmatizer":
|
||||
lookups = load_lookups(lang=nlp.lang, tables=tables, strict=False)
|
||||
return Lemmatizer(lookups=lookups)
|
||||
|
||||
return lemmatizer_factory
|
||||
|
||||
|
||||
class Language:
|
||||
"""A text-processing pipeline. Usually you'll load this once per process,
|
||||
and pass the instance around your application.
|
||||
|
@ -128,7 +111,6 @@ class Language:
|
|||
max_length: int = 10 ** 6,
|
||||
meta: Dict[str, Any] = {},
|
||||
create_tokenizer: Optional[Callable[["Language"], Callable[[str], Doc]]] = None,
|
||||
create_lemmatizer: Optional[Callable[["Language"], Callable]] = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Initialise a Language object.
|
||||
|
@ -146,8 +128,6 @@ class Language:
|
|||
100,000 characters in one text.
|
||||
create_tokenizer (Callable): Function that takes the nlp object and
|
||||
returns a tokenizer.
|
||||
create_lemmatizer (Callable): Function that takes the nlp object and
|
||||
returns a lemmatizer.
|
||||
|
||||
DOCS: https://spacy.io/api/language#init
|
||||
"""
|
||||
|
@ -166,13 +146,9 @@ class Language:
|
|||
|
||||
if vocab is True:
|
||||
vectors_name = meta.get("vectors", {}).get("name")
|
||||
if not create_lemmatizer:
|
||||
lemma_cfg = {"lemmatizer": self._config["nlp"]["lemmatizer"]}
|
||||
create_lemmatizer = registry.make_from_config(lemma_cfg)["lemmatizer"]
|
||||
vocab = create_vocab(
|
||||
self.lang,
|
||||
self.Defaults,
|
||||
lemmatizer=create_lemmatizer(self),
|
||||
vectors_name=vectors_name,
|
||||
load_data=self._config["nlp"]["load_vocab_data"],
|
||||
)
|
||||
|
@ -1451,7 +1427,6 @@ class Language:
|
|||
filled["components"] = orig_pipeline
|
||||
config["components"] = orig_pipeline
|
||||
create_tokenizer = resolved["nlp"]["tokenizer"]
|
||||
create_lemmatizer = resolved["nlp"]["lemmatizer"]
|
||||
before_creation = resolved["nlp"]["before_creation"]
|
||||
after_creation = resolved["nlp"]["after_creation"]
|
||||
after_pipeline_creation = resolved["nlp"]["after_pipeline_creation"]
|
||||
|
@ -1467,7 +1442,6 @@ class Language:
|
|||
nlp = lang_cls(
|
||||
vocab=vocab,
|
||||
create_tokenizer=create_tokenizer,
|
||||
create_lemmatizer=create_lemmatizer,
|
||||
)
|
||||
if after_creation is not None:
|
||||
nlp = after_creation(nlp)
|
||||
|
|
|
@ -1,145 +0,0 @@
|
|||
from typing import Optional, Callable, List, Dict
|
||||
|
||||
from .lookups import Lookups
|
||||
from .parts_of_speech import NAMES as UPOS_NAMES
|
||||
|
||||
|
||||
class Lemmatizer:
|
||||
"""
|
||||
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
|
||||
lookup tables.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lookups: Optional[Lookups] = None,
|
||||
is_base_form: Optional[Callable] = None,
|
||||
) -> None:
|
||||
"""Initialize a Lemmatizer.
|
||||
|
||||
lookups (Lookups): The lookups object containing the (optional) tables
|
||||
"lemma_rules", "lemma_index", "lemma_exc" and "lemma_lookup".
|
||||
"""
|
||||
self.lookups = lookups if lookups is not None else Lookups()
|
||||
self.is_base_form = is_base_form
|
||||
|
||||
def __call__(
|
||||
self, string: str, univ_pos: str, morphology: Optional[dict] = None
|
||||
) -> List[str]:
|
||||
"""Lemmatize a string.
|
||||
|
||||
string (str): The string to lemmatize, e.g. the token text.
|
||||
univ_pos (str / int): The token's universal part-of-speech tag.
|
||||
morphology (dict): The token's morphological features following the
|
||||
Universal Dependencies scheme.
|
||||
RETURNS (list): The available lemmas for the string.
|
||||
"""
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
if "lemma_rules" not in self.lookups:
|
||||
return [lookup_table.get(string, string)]
|
||||
if isinstance(univ_pos, int):
|
||||
univ_pos = UPOS_NAMES.get(univ_pos, "X")
|
||||
univ_pos = univ_pos.lower()
|
||||
if univ_pos in ("", "eol", "space"):
|
||||
return [string.lower()]
|
||||
# See Issue #435 for example of where this logic is requied.
|
||||
if callable(self.is_base_form) and self.is_base_form(univ_pos, morphology):
|
||||
return [string.lower()]
|
||||
index_table = self.lookups.get_table("lemma_index", {})
|
||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||
if not any(
|
||||
(
|
||||
index_table.get(univ_pos),
|
||||
exc_table.get(univ_pos),
|
||||
rules_table.get(univ_pos),
|
||||
)
|
||||
):
|
||||
if univ_pos == "propn":
|
||||
return [string]
|
||||
else:
|
||||
return [string.lower()]
|
||||
lemmas = self.lemmatize(
|
||||
string,
|
||||
index_table.get(univ_pos, {}),
|
||||
exc_table.get(univ_pos, {}),
|
||||
rules_table.get(univ_pos, []),
|
||||
)
|
||||
return lemmas
|
||||
|
||||
def noun(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "noun", morphology)
|
||||
|
||||
def verb(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "verb", morphology)
|
||||
|
||||
def adj(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "adj", morphology)
|
||||
|
||||
def det(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "det", morphology)
|
||||
|
||||
def pron(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "pron", morphology)
|
||||
|
||||
def adp(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "adp", morphology)
|
||||
|
||||
def num(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "num", morphology)
|
||||
|
||||
def punct(self, string: str, morphology: Optional[dict] = None) -> List[str]:
|
||||
return self(string, "punct", morphology)
|
||||
|
||||
def lookup(self, string: str, orth: Optional[int] = None) -> str:
|
||||
"""Look up a lemma in the table, if available. If no lemma is found,
|
||||
the original string is returned.
|
||||
|
||||
string (str): The original string.
|
||||
orth (int): Optional hash of the string to look up. If not set, the
|
||||
string will be used and hashed.
|
||||
RETURNS (str): The lemma if the string was found, otherwise the
|
||||
original string.
|
||||
"""
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
key = orth if orth is not None else string
|
||||
if key in lookup_table:
|
||||
return lookup_table[key]
|
||||
return string
|
||||
|
||||
def lemmatize(
|
||||
self,
|
||||
string: str,
|
||||
index: Dict[str, List[str]],
|
||||
exceptions: Dict[str, Dict[str, List[str]]],
|
||||
rules: Dict[str, List[List[str]]],
|
||||
) -> List[str]:
|
||||
orig = string
|
||||
string = string.lower()
|
||||
forms = []
|
||||
oov_forms = []
|
||||
for old, new in rules:
|
||||
if string.endswith(old):
|
||||
form = string[: len(string) - len(old)] + new
|
||||
if not form:
|
||||
pass
|
||||
elif form in index or not form.isalpha():
|
||||
forms.append(form)
|
||||
else:
|
||||
oov_forms.append(form)
|
||||
# Remove duplicates but preserve the ordering of applied "rules"
|
||||
forms = list(dict.fromkeys(forms))
|
||||
# Put exceptions at the front of the list, so they get priority.
|
||||
# This is a dodgy heuristic -- but it's the best we can do until we get
|
||||
# frequencies on this. We can at least prune out problematic exceptions,
|
||||
# if they shadow more frequent analyses.
|
||||
for form in exceptions.get(string, []):
|
||||
if form not in forms:
|
||||
forms.insert(0, form)
|
||||
if not forms:
|
||||
forms.extend(oov_forms)
|
||||
if not forms:
|
||||
forms.append(orig)
|
||||
return forms
|
304
spacy/lookups.py
304
spacy/lookups.py
|
@ -28,6 +28,8 @@ def load_lookups(
|
|||
# TODO: import spacy_lookups_data instead of going via entry points here?
|
||||
lookups = Lookups()
|
||||
if lang not in registry.lookups:
|
||||
if strict and len(tables) > 0:
|
||||
raise ValueError(Errors.E955.format(table=", ".join(tables), lang=lang))
|
||||
return lookups
|
||||
data = registry.lookups.get(lang)
|
||||
for table in tables:
|
||||
|
@ -41,152 +43,6 @@ def load_lookups(
|
|||
return lookups
|
||||
|
||||
|
||||
class Lookups:
|
||||
"""Container for large lookup tables and dictionaries, e.g. lemmatization
|
||||
data or tokenizer exception lists. Lookups are available via vocab.lookups,
|
||||
so they can be accessed before the pipeline components are applied (e.g.
|
||||
in the tokenizer and lemmatizer), as well as within the pipeline components
|
||||
via doc.vocab.lookups.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the Lookups object.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#init
|
||||
"""
|
||||
self._tables = {}
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
"""Check if the lookups contain a table of a given name. Delegates to
|
||||
Lookups.has_table.
|
||||
|
||||
name (str): Name of the table.
|
||||
RETURNS (bool): Whether a table of that name is in the lookups.
|
||||
"""
|
||||
return self.has_table(name)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""RETURNS (int): The number of tables in the lookups."""
|
||||
return len(self._tables)
|
||||
|
||||
@property
|
||||
def tables(self) -> List[str]:
|
||||
"""RETURNS (List[str]): Names of all tables in the lookups."""
|
||||
return list(self._tables.keys())
|
||||
|
||||
def add_table(self, name: str, data: dict = SimpleFrozenDict()) -> "Table":
|
||||
"""Add a new table to the lookups. Raises an error if the table exists.
|
||||
|
||||
name (str): Unique name of table.
|
||||
data (dict): Optional data to add to the table.
|
||||
RETURNS (Table): The newly added table.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#add_table
|
||||
"""
|
||||
if name in self.tables:
|
||||
raise ValueError(Errors.E158.format(name=name))
|
||||
table = Table(name=name, data=data)
|
||||
self._tables[name] = table
|
||||
return table
|
||||
|
||||
def get_table(self, name: str, default: Any = UNSET) -> "Table":
|
||||
"""Get a table. Raises an error if the table doesn't exist and no
|
||||
default value is provided.
|
||||
|
||||
name (str): Name of the table.
|
||||
default (Any): Optional default value to return if table doesn't exist.
|
||||
RETURNS (Table): The table.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#get_table
|
||||
"""
|
||||
if name not in self._tables:
|
||||
if default == UNSET:
|
||||
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
|
||||
return default
|
||||
return self._tables[name]
|
||||
|
||||
def remove_table(self, name: str) -> "Table":
|
||||
"""Remove a table. Raises an error if the table doesn't exist.
|
||||
|
||||
name (str): Name of the table to remove.
|
||||
RETURNS (Table): The removed table.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#remove_table
|
||||
"""
|
||||
if name not in self._tables:
|
||||
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
|
||||
return self._tables.pop(name)
|
||||
|
||||
def has_table(self, name: str) -> bool:
|
||||
"""Check if the lookups contain a table of a given name.
|
||||
|
||||
name (str): Name of the table.
|
||||
RETURNS (bool): Whether a table of that name exists.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#has_table
|
||||
"""
|
||||
return name in self._tables
|
||||
|
||||
def to_bytes(self, **kwargs) -> bytes:
|
||||
"""Serialize the lookups to a bytestring.
|
||||
|
||||
RETURNS (bytes): The serialized Lookups.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#to_bytes
|
||||
"""
|
||||
return srsly.msgpack_dumps(self._tables)
|
||||
|
||||
def from_bytes(self, bytes_data: bytes, **kwargs) -> "Lookups":
|
||||
"""Load the lookups from a bytestring.
|
||||
|
||||
bytes_data (bytes): The data to load.
|
||||
RETURNS (Lookups): The loaded Lookups.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#from_bytes
|
||||
"""
|
||||
self._tables = {}
|
||||
for key, value in srsly.msgpack_loads(bytes_data).items():
|
||||
self._tables[key] = Table(key, value)
|
||||
return self
|
||||
|
||||
def to_disk(
|
||||
self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs
|
||||
) -> None:
|
||||
"""Save the lookups to a directory as lookups.bin. Expects a path to a
|
||||
directory, which will be created if it doesn't exist.
|
||||
|
||||
path (str / Path): The file path.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#to_disk
|
||||
"""
|
||||
if len(self._tables):
|
||||
path = ensure_path(path)
|
||||
if not path.exists():
|
||||
path.mkdir()
|
||||
filepath = path / filename
|
||||
with filepath.open("wb") as file_:
|
||||
file_.write(self.to_bytes())
|
||||
|
||||
def from_disk(
|
||||
self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs
|
||||
) -> "Lookups":
|
||||
"""Load lookups from a directory containing a lookups.bin. Will skip
|
||||
loading if the file doesn't exist.
|
||||
|
||||
path (str / Path): The directory path.
|
||||
RETURNS (Lookups): The loaded lookups.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#from_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
filepath = path / filename
|
||||
if filepath.exists():
|
||||
with filepath.open("rb") as file_:
|
||||
data = file_.read()
|
||||
return self.from_bytes(data)
|
||||
return self
|
||||
|
||||
|
||||
class Table(OrderedDict):
|
||||
"""A table in the lookups. Subclass of builtin dict that implements a
|
||||
slightly more consistent and unified API.
|
||||
|
@ -303,3 +159,159 @@ class Table(OrderedDict):
|
|||
self.clear()
|
||||
self.update(data)
|
||||
return self
|
||||
|
||||
|
||||
class Lookups:
|
||||
"""Container for large lookup tables and dictionaries, e.g. lemmatization
|
||||
data or tokenizer exception lists. Lookups are available via vocab.lookups,
|
||||
so they can be accessed before the pipeline components are applied (e.g.
|
||||
in the tokenizer and lemmatizer), as well as within the pipeline components
|
||||
via doc.vocab.lookups.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the Lookups object.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#init
|
||||
"""
|
||||
self._tables = {}
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
"""Check if the lookups contain a table of a given name. Delegates to
|
||||
Lookups.has_table.
|
||||
|
||||
name (str): Name of the table.
|
||||
RETURNS (bool): Whether a table of that name is in the lookups.
|
||||
"""
|
||||
return self.has_table(name)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""RETURNS (int): The number of tables in the lookups."""
|
||||
return len(self._tables)
|
||||
|
||||
@property
|
||||
def tables(self) -> List[str]:
|
||||
"""RETURNS (List[str]): Names of all tables in the lookups."""
|
||||
return list(self._tables.keys())
|
||||
|
||||
def add_table(self, name: str, data: dict = SimpleFrozenDict()) -> Table:
|
||||
"""Add a new table to the lookups. Raises an error if the table exists.
|
||||
|
||||
name (str): Unique name of table.
|
||||
data (dict): Optional data to add to the table.
|
||||
RETURNS (Table): The newly added table.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#add_table
|
||||
"""
|
||||
if name in self.tables:
|
||||
raise ValueError(Errors.E158.format(name=name))
|
||||
table = Table(name=name, data=data)
|
||||
self._tables[name] = table
|
||||
return table
|
||||
|
||||
def set_table(self, name: str, table: Table) -> None:
|
||||
"""Set a table.
|
||||
|
||||
name (str): Name of the table to set.
|
||||
table (Table): The Table to set.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#set_table
|
||||
"""
|
||||
self._tables[name] = table
|
||||
|
||||
def get_table(self, name: str, default: Any = UNSET) -> Table:
|
||||
"""Get a table. Raises an error if the table doesn't exist and no
|
||||
default value is provided.
|
||||
|
||||
name (str): Name of the table.
|
||||
default (Any): Optional default value to return if table doesn't exist.
|
||||
RETURNS (Table): The table.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#get_table
|
||||
"""
|
||||
if name not in self._tables:
|
||||
if default == UNSET:
|
||||
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
|
||||
return default
|
||||
return self._tables[name]
|
||||
|
||||
def remove_table(self, name: str) -> Table:
|
||||
"""Remove a table. Raises an error if the table doesn't exist.
|
||||
|
||||
name (str): Name of the table to remove.
|
||||
RETURNS (Table): The removed table.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#remove_table
|
||||
"""
|
||||
if name not in self._tables:
|
||||
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
|
||||
return self._tables.pop(name)
|
||||
|
||||
def has_table(self, name: str) -> bool:
|
||||
"""Check if the lookups contain a table of a given name.
|
||||
|
||||
name (str): Name of the table.
|
||||
RETURNS (bool): Whether a table of that name exists.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#has_table
|
||||
"""
|
||||
return name in self._tables
|
||||
|
||||
def to_bytes(self, **kwargs) -> bytes:
|
||||
"""Serialize the lookups to a bytestring.
|
||||
|
||||
RETURNS (bytes): The serialized Lookups.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#to_bytes
|
||||
"""
|
||||
return srsly.msgpack_dumps(self._tables)
|
||||
|
||||
def from_bytes(self, bytes_data: bytes, **kwargs) -> "Lookups":
|
||||
"""Load the lookups from a bytestring.
|
||||
|
||||
bytes_data (bytes): The data to load.
|
||||
RETURNS (Lookups): The loaded Lookups.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#from_bytes
|
||||
"""
|
||||
self._tables = {}
|
||||
for key, value in srsly.msgpack_loads(bytes_data).items():
|
||||
self._tables[key] = Table(key, value)
|
||||
return self
|
||||
|
||||
def to_disk(
|
||||
self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs
|
||||
) -> None:
|
||||
"""Save the lookups to a directory as lookups.bin. Expects a path to a
|
||||
directory, which will be created if it doesn't exist.
|
||||
|
||||
path (str / Path): The file path.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#to_disk
|
||||
"""
|
||||
if len(self._tables):
|
||||
path = ensure_path(path)
|
||||
if not path.exists():
|
||||
path.mkdir()
|
||||
filepath = path / filename
|
||||
with filepath.open("wb") as file_:
|
||||
file_.write(self.to_bytes())
|
||||
|
||||
def from_disk(
|
||||
self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs
|
||||
) -> "Lookups":
|
||||
"""Load lookups from a directory containing a lookups.bin. Will skip
|
||||
loading if the file doesn't exist.
|
||||
|
||||
path (str / Path): The directory path.
|
||||
RETURNS (Lookups): The loaded lookups.
|
||||
|
||||
DOCS: https://spacy.io/api/lookups#from_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
filepath = path / filename
|
||||
if filepath.exists():
|
||||
with filepath.open("rb") as file_:
|
||||
data = file_.read()
|
||||
return self.from_bytes(data)
|
||||
return self
|
||||
|
|
|
@ -27,12 +27,6 @@ cdef class Morphology:
|
|||
cdef MorphAnalysisC create_morph_tag(self, field_feature_pairs) except *
|
||||
cdef int insert(self, MorphAnalysisC tag) except -1
|
||||
|
||||
cdef int assign_untagged(self, TokenC* token) except -1
|
||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
||||
|
||||
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
|
||||
|
||||
|
||||
cdef int check_feature(const MorphAnalysisC* morph, attr_t feature) nogil
|
||||
cdef list list_features(const MorphAnalysisC* morph)
|
||||
|
|
|
@ -31,43 +31,15 @@ cdef class Morphology:
|
|||
VALUE_SEP = ","
|
||||
EMPTY_MORPH = "_" # not an empty string so that the PreshMap key is not 0
|
||||
|
||||
def __init__(self, StringStore strings, tag_map, lemmatizer, exc=None):
|
||||
def __init__(self, StringStore strings):
|
||||
self.mem = Pool()
|
||||
self.strings = strings
|
||||
self.tags = PreshMap()
|
||||
self.load_tag_map(tag_map)
|
||||
self.lemmatizer = lemmatizer
|
||||
|
||||
self._cache = PreshMapArray(self.n_tags)
|
||||
self._exc = {}
|
||||
if exc is not None:
|
||||
self.load_morph_exceptions(exc)
|
||||
|
||||
def load_tag_map(self, tag_map):
|
||||
self.tag_map = {}
|
||||
self.reverse_index = {}
|
||||
# Add special space symbol. We prefix with underscore, to make sure it
|
||||
# always sorts to the end.
|
||||
if '_SP' in tag_map:
|
||||
space_attrs = tag_map.get('_SP')
|
||||
else:
|
||||
space_attrs = tag_map.get('SP', {POS: SPACE})
|
||||
if '_SP' not in tag_map:
|
||||
self.strings.add('_SP')
|
||||
tag_map = dict(tag_map)
|
||||
tag_map['_SP'] = space_attrs
|
||||
for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
|
||||
attrs = self.normalize_attrs(attrs)
|
||||
self.add(attrs)
|
||||
self.tag_map[tag_str] = dict(attrs)
|
||||
self.reverse_index[self.strings.add(tag_str)] = i
|
||||
self.tag_names = tuple(sorted(self.tag_map.keys()))
|
||||
self.n_tags = len(self.tag_map)
|
||||
self._cache = PreshMapArray(self.n_tags)
|
||||
|
||||
def __reduce__(self):
|
||||
return (Morphology, (self.strings, self.tag_map, self.lemmatizer,
|
||||
self.exc), None, None)
|
||||
tags = set([self.get(self.strings[s]) for s in self.strings])
|
||||
tags -= set([""])
|
||||
return (unpickle_morphology, (self.strings, sorted(tags)), None, None)
|
||||
|
||||
def add(self, features):
|
||||
"""Insert a morphological analysis in the morphology table, if not
|
||||
|
@ -185,115 +157,6 @@ cdef class Morphology:
|
|||
else:
|
||||
return self.strings[tag.key]
|
||||
|
||||
def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology):
|
||||
if orth not in self.strings:
|
||||
return orth
|
||||
cdef unicode py_string = self.strings[orth]
|
||||
if self.lemmatizer is None:
|
||||
return self.strings.add(py_string.lower())
|
||||
cdef list lemma_strings
|
||||
cdef unicode lemma_string
|
||||
# Normalize features into a dict keyed by the field, to make life easier
|
||||
# for the lemmatizer. Handles string-to-int conversion too.
|
||||
string_feats = {}
|
||||
for key, value in morphology.items():
|
||||
if value is True:
|
||||
name, value = self.strings.as_string(key).split('_', 1)
|
||||
string_feats[name] = value
|
||||
else:
|
||||
string_feats[self.strings.as_string(key)] = self.strings.as_string(value)
|
||||
lemma_strings = self.lemmatizer(py_string, univ_pos, string_feats)
|
||||
lemma_string = lemma_strings[0]
|
||||
lemma = self.strings.add(lemma_string)
|
||||
return lemma
|
||||
|
||||
def add_special_case(self, unicode tag_str, unicode orth_str, attrs,
|
||||
force=False):
|
||||
"""Add a special-case rule to the morphological analyser. Tokens whose
|
||||
tag and orth match the rule will receive the specified properties.
|
||||
|
||||
tag (str): The part-of-speech tag to key the exception.
|
||||
orth (str): The word-form to key the exception.
|
||||
"""
|
||||
attrs = dict(attrs)
|
||||
attrs = self.normalize_attrs(attrs)
|
||||
self.add(attrs)
|
||||
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
||||
self._exc[(tag_str, self.strings.add(orth_str))] = attrs
|
||||
|
||||
cdef int assign_untagged(self, TokenC* token) except -1:
|
||||
"""Set morphological attributes on a token without a POS tag. Uses
|
||||
the lemmatizer's lookup() method, which looks up the string in the
|
||||
table provided by the language data as lemma_lookup (if available).
|
||||
"""
|
||||
if token.lemma == 0:
|
||||
orth_str = self.strings[token.lex.orth]
|
||||
lemma = self.lemmatizer.lookup(orth_str, orth=token.lex.orth)
|
||||
token.lemma = self.strings.add(lemma)
|
||||
|
||||
cdef int assign_tag(self, TokenC* token, tag_str) except -1:
|
||||
cdef attr_t tag = self.strings.as_int(tag_str)
|
||||
if tag in self.reverse_index:
|
||||
tag_id = self.reverse_index[tag]
|
||||
self.assign_tag_id(token, tag_id)
|
||||
else:
|
||||
token.tag = tag
|
||||
|
||||
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
|
||||
if tag_id > self.n_tags:
|
||||
raise ValueError(Errors.E014.format(tag=tag_id))
|
||||
# Ensure spaces get tagged as space.
|
||||
# It seems pretty arbitrary to put this logic here, but there's really
|
||||
# nowhere better. I guess the justification is that this is where the
|
||||
# specific word and the tag interact. Still, we should have a better
|
||||
# way to enforce this rule, or figure out why the statistical model fails.
|
||||
# Related to Issue #220
|
||||
if Lexeme.c_check_flag(token.lex, IS_SPACE):
|
||||
tag_id = self.reverse_index[self.strings.add('_SP')]
|
||||
tag_str = self.tag_names[tag_id]
|
||||
features = dict(self.tag_map.get(tag_str, {}))
|
||||
if features:
|
||||
pos = self.strings.as_int(features.pop(POS))
|
||||
else:
|
||||
pos = 0
|
||||
cdef attr_t lemma = <attr_t>self._cache.get(tag_id, token.lex.orth)
|
||||
if lemma == 0:
|
||||
# Ugh, self.lemmatize has opposite arg order from self.lemmatizer :(
|
||||
lemma = self.lemmatize(pos, token.lex.orth, features)
|
||||
self._cache.set(tag_id, token.lex.orth, <void*>lemma)
|
||||
token.lemma = lemma
|
||||
token.pos = <univ_pos_t>pos
|
||||
token.tag = self.strings[tag_str]
|
||||
token.morph = self.add(features)
|
||||
if (self.tag_names[tag_id], token.lex.orth) in self._exc:
|
||||
self._assign_tag_from_exceptions(token, tag_id)
|
||||
|
||||
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1:
|
||||
key = (self.tag_names[tag_id], token.lex.orth)
|
||||
cdef dict attrs
|
||||
attrs = self._exc[key]
|
||||
token.pos = attrs.get(POS, token.pos)
|
||||
token.lemma = attrs.get(LEMMA, token.lemma)
|
||||
|
||||
def load_morph_exceptions(self, dict morph_rules):
|
||||
self._exc = {}
|
||||
# Map (form, pos) to attributes
|
||||
for tag, exc in morph_rules.items():
|
||||
for orth, attrs in exc.items():
|
||||
attrs = self.normalize_attrs(attrs)
|
||||
self.add_special_case(self.strings.as_string(tag), self.strings.as_string(orth), attrs)
|
||||
|
||||
@property
|
||||
def exc(self):
|
||||
# generate the serializable exc in the MORPH_RULES format from the
|
||||
# internal tuple-key format
|
||||
morph_rules = {}
|
||||
for (tag, orth) in sorted(self._exc):
|
||||
if not tag in morph_rules:
|
||||
morph_rules[tag] = {}
|
||||
morph_rules[tag][self.strings[orth]] = self._exc[(tag, orth)]
|
||||
return morph_rules
|
||||
|
||||
@staticmethod
|
||||
def feats_to_dict(feats):
|
||||
if not feats or feats == Morphology.EMPTY_MORPH:
|
||||
|
@ -338,3 +201,9 @@ cdef int get_n_by_field(attr_t* results, const MorphAnalysisC* morph, attr_t fie
|
|||
results[n_results] = morph.features[i]
|
||||
n_results += 1
|
||||
return n_results
|
||||
|
||||
def unpickle_morphology(strings, tags):
|
||||
cdef Morphology morphology = Morphology(strings)
|
||||
for tag in tags:
|
||||
morphology.add(tag)
|
||||
return morphology
|
||||
|
|
|
@ -3,9 +3,10 @@ from .dep_parser import DependencyParser
|
|||
from .entity_linker import EntityLinker
|
||||
from .ner import EntityRecognizer
|
||||
from .entityruler import EntityRuler
|
||||
from .lemmatizer import Lemmatizer
|
||||
from .morphologizer import Morphologizer
|
||||
from .pipe import Pipe
|
||||
from spacy.pipeline.senter import SentenceRecognizer
|
||||
from .senter import SentenceRecognizer
|
||||
from .sentencizer import Sentencizer
|
||||
from .simple_ner import SimpleNER
|
||||
from .tagger import Tagger
|
||||
|
@ -20,6 +21,7 @@ __all__ = [
|
|||
"EntityRecognizer",
|
||||
"EntityRuler",
|
||||
"Morphologizer",
|
||||
"Lemmatizer",
|
||||
"Pipe",
|
||||
"SentenceRecognizer",
|
||||
"Sentencizer",
|
||||
|
|
|
@ -17,13 +17,18 @@ MatcherPatternType = List[Dict[Union[int, str], Any]]
|
|||
AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]
|
||||
|
||||
|
||||
@Language.factory("attribute_ruler")
|
||||
@Language.factory(
|
||||
"attribute_ruler", default_config={"pattern_dicts": None, "validate": False}
|
||||
)
|
||||
def make_attribute_ruler(
|
||||
nlp: Language,
|
||||
name: str,
|
||||
pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
|
||||
pattern_dicts: Optional[Iterable[AttributeRulerPatternType]],
|
||||
validate: bool,
|
||||
):
|
||||
return AttributeRuler(nlp.vocab, name, pattern_dicts=pattern_dicts)
|
||||
return AttributeRuler(
|
||||
nlp.vocab, name, pattern_dicts=pattern_dicts, validate=validate
|
||||
)
|
||||
|
||||
|
||||
class AttributeRuler(Pipe):
|
||||
|
@ -39,6 +44,7 @@ class AttributeRuler(Pipe):
|
|||
name: str = "attribute_ruler",
|
||||
*,
|
||||
pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
|
||||
validate: bool = False,
|
||||
) -> None:
|
||||
"""Initialize the AttributeRuler.
|
||||
|
||||
|
@ -54,7 +60,7 @@ class AttributeRuler(Pipe):
|
|||
"""
|
||||
self.name = name
|
||||
self.vocab = vocab
|
||||
self.matcher = Matcher(self.vocab)
|
||||
self.matcher = Matcher(self.vocab, validate=validate)
|
||||
self.attrs = []
|
||||
self._attrs_unnormed = [] # store for reference
|
||||
self.indices = []
|
||||
|
|
|
@ -20,7 +20,7 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
|
|||
assigns=["doc.ents", "token.ent_type", "token.ent_iob"],
|
||||
default_config={
|
||||
"phrase_matcher_attr": None,
|
||||
"validation": False,
|
||||
"validate": False,
|
||||
"overwrite_ents": False,
|
||||
"ent_id_sep": DEFAULT_ENT_ID_SEP,
|
||||
},
|
||||
|
@ -31,7 +31,7 @@ def make_entity_ruler(
|
|||
nlp: Language,
|
||||
name: str,
|
||||
phrase_matcher_attr: Optional[Union[int, str]],
|
||||
validation: bool,
|
||||
validate: bool,
|
||||
overwrite_ents: bool,
|
||||
ent_id_sep: str,
|
||||
):
|
||||
|
@ -39,7 +39,7 @@ def make_entity_ruler(
|
|||
nlp,
|
||||
name,
|
||||
phrase_matcher_attr=phrase_matcher_attr,
|
||||
validate=validation,
|
||||
validate=validate,
|
||||
overwrite_ents=overwrite_ents,
|
||||
ent_id_sep=ent_id_sep,
|
||||
)
|
||||
|
|
330
spacy/pipeline/lemmatizer.py
Normal file
330
spacy/pipeline/lemmatizer.py
Normal file
|
@ -0,0 +1,330 @@
|
|||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from thinc.api import Model
|
||||
|
||||
from .pipe import Pipe
|
||||
from ..errors import Errors
|
||||
from ..language import Language
|
||||
from ..lookups import Lookups, load_lookups
|
||||
from ..scorer import Scorer
|
||||
from ..tokens import Doc, Token
|
||||
from ..vocab import Vocab
|
||||
from .. import util
|
||||
|
||||
|
||||
@Language.factory(
|
||||
"lemmatizer",
|
||||
assigns=["token.lemma"],
|
||||
default_config={
|
||||
"model": None,
|
||||
"mode": "lookup",
|
||||
"lookups": None,
|
||||
"overwrite": False,
|
||||
},
|
||||
scores=["lemma_acc"],
|
||||
default_score_weights={"lemma_acc": 1.0},
|
||||
)
|
||||
def make_lemmatizer(
|
||||
nlp: Language,
|
||||
model: Optional[Model],
|
||||
name: str,
|
||||
mode: str,
|
||||
lookups: Optional[Lookups],
|
||||
overwrite: bool = False,
|
||||
):
|
||||
lookups = Lemmatizer.load_lookups(nlp.lang, mode, lookups)
|
||||
return Lemmatizer(
|
||||
nlp.vocab, model, name, mode=mode, lookups=lookups, overwrite=overwrite
|
||||
)
|
||||
|
||||
|
||||
class Lemmatizer(Pipe):
|
||||
"""
|
||||
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
|
||||
lookup tables.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def get_lookups_config(cls, mode: str) -> Dict:
|
||||
"""Returns the lookups configuration settings for a given mode for use
|
||||
in Lemmatizer.load_lookups.
|
||||
|
||||
mode (str): The lemmatizer mode.
|
||||
RETURNS (dict): The lookups configuration settings for this mode.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
|
||||
"""
|
||||
if mode == "lookup":
|
||||
return {
|
||||
"required_tables": ["lemma_lookup"],
|
||||
}
|
||||
elif mode == "rule":
|
||||
return {
|
||||
"required_tables": ["lemma_rules"],
|
||||
"optional_tables": ["lemma_exc", "lemma_index"],
|
||||
}
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def load_lookups(cls, lang: str, mode: str, lookups: Optional[Lookups],) -> Lookups:
|
||||
"""Load and validate lookups tables. If the provided lookups is None,
|
||||
load the default lookups tables according to the language and mode
|
||||
settings. Confirm that all required tables for the language and mode
|
||||
are present.
|
||||
|
||||
lang (str): The language code.
|
||||
mode (str): The lemmatizer mode.
|
||||
lookups (Lookups): The provided lookups, may be None if the default
|
||||
lookups should be loaded.
|
||||
RETURNS (Lookups): The Lookups object.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
|
||||
"""
|
||||
config = cls.get_lookups_config(mode)
|
||||
required_tables = config.get("required_tables", [])
|
||||
optional_tables = config.get("optional_tables", [])
|
||||
if lookups is None:
|
||||
lookups = load_lookups(lang=lang, tables=required_tables)
|
||||
optional_lookups = load_lookups(
|
||||
lang=lang, tables=optional_tables, strict=False
|
||||
)
|
||||
for table in optional_lookups.tables:
|
||||
lookups.set_table(table, optional_lookups.get_table(table))
|
||||
for table in required_tables:
|
||||
if table not in lookups:
|
||||
raise ValueError(
|
||||
Errors.E1004.format(
|
||||
mode=mode, tables=required_tables, found=lookups.tables
|
||||
)
|
||||
)
|
||||
return lookups
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab: Vocab,
|
||||
model: Optional[Model],
|
||||
name: str = "lemmatizer",
|
||||
*,
|
||||
mode: str = "lookup",
|
||||
lookups: Optional[Lookups] = None,
|
||||
overwrite: bool = False,
|
||||
) -> None:
|
||||
"""Initialize a Lemmatizer.
|
||||
|
||||
vocab (Vocab): The vocab.
|
||||
model (Model): A model (not yet implemented).
|
||||
name (str): The component name. Defaults to "lemmatizer".
|
||||
mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup".
|
||||
lookups (Lookups): The lookups object containing the (optional) tables
|
||||
such as "lemma_rules", "lemma_index", "lemma_exc" and
|
||||
"lemma_lookup". Defaults to None
|
||||
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
|
||||
`False`.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
self._mode = mode
|
||||
self.lookups = lookups if lookups is not None else Lookups()
|
||||
self.overwrite = overwrite
|
||||
if self.mode == "lookup":
|
||||
self.lemmatize = self.lookup_lemmatize
|
||||
elif self.mode == "rule":
|
||||
self.lemmatize = self.rule_lemmatize
|
||||
else:
|
||||
try:
|
||||
self.lemmatize = getattr(self, f"{self.mode}_lemmatize")
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E1003.format(mode=mode))
|
||||
self.cache = {}
|
||||
|
||||
@property
|
||||
def mode(self):
|
||||
return self._mode
|
||||
|
||||
def __call__(self, doc: Doc) -> Doc:
|
||||
"""Apply the lemmatizer to one document.
|
||||
|
||||
doc (Doc): The Doc to process.
|
||||
RETURNS (Doc): The processed Doc.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#call
|
||||
"""
|
||||
for token in doc:
|
||||
if self.overwrite or token.lemma == 0:
|
||||
token.lemma_ = self.lemmatize(token)[0]
|
||||
return doc
|
||||
|
||||
def pipe(self, stream, *, batch_size=128):
|
||||
"""Apply the pipe to a stream of documents. This usually happens under
|
||||
the hood when the nlp object is called on a text and all components are
|
||||
applied to the Doc.
|
||||
|
||||
stream (Iterable[Doc]): A stream of documents.
|
||||
batch_size (int): The number of documents to buffer.
|
||||
YIELDS (Doc): Processed documents in order.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#pipe
|
||||
"""
|
||||
for doc in stream:
|
||||
doc = self(doc)
|
||||
yield doc
|
||||
|
||||
def lookup_lemmatize(self, token: Token) -> List[str]:
|
||||
"""Lemmatize using a lookup-based approach.
|
||||
|
||||
token (Token): The token to lemmatize.
|
||||
RETURNS (list): The available lemmas for the string.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
|
||||
"""
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
result = lookup_table.get(token.text, token.text)
|
||||
if isinstance(result, str):
|
||||
result = [result]
|
||||
return result
|
||||
|
||||
def rule_lemmatize(self, token: Token) -> List[str]:
|
||||
"""Lemmatize using a rule-based approach.
|
||||
|
||||
token (Token): The token to lemmatize.
|
||||
RETURNS (list): The available lemmas for the string.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
|
||||
"""
|
||||
cache_key = (token.orth, token.pos, token.morph)
|
||||
if cache_key in self.cache:
|
||||
return self.cache[cache_key]
|
||||
string = token.text
|
||||
univ_pos = token.pos_.lower()
|
||||
if univ_pos in ("", "eol", "space"):
|
||||
return [string.lower()]
|
||||
# See Issue #435 for example of where this logic is requied.
|
||||
if self.is_base_form(token):
|
||||
return [string.lower()]
|
||||
index_table = self.lookups.get_table("lemma_index", {})
|
||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||
if not any(
|
||||
(
|
||||
index_table.get(univ_pos),
|
||||
exc_table.get(univ_pos),
|
||||
rules_table.get(univ_pos),
|
||||
)
|
||||
):
|
||||
if univ_pos == "propn":
|
||||
return [string]
|
||||
else:
|
||||
return [string.lower()]
|
||||
|
||||
index = index_table.get(univ_pos, {})
|
||||
exceptions = exc_table.get(univ_pos, {})
|
||||
rules = rules_table.get(univ_pos, {})
|
||||
orig = string
|
||||
string = string.lower()
|
||||
forms = []
|
||||
oov_forms = []
|
||||
for old, new in rules:
|
||||
if string.endswith(old):
|
||||
form = string[: len(string) - len(old)] + new
|
||||
if not form:
|
||||
pass
|
||||
elif form in index or not form.isalpha():
|
||||
forms.append(form)
|
||||
else:
|
||||
oov_forms.append(form)
|
||||
# Remove duplicates but preserve the ordering of applied "rules"
|
||||
forms = list(dict.fromkeys(forms))
|
||||
# Put exceptions at the front of the list, so they get priority.
|
||||
# This is a dodgy heuristic -- but it's the best we can do until we get
|
||||
# frequencies on this. We can at least prune out problematic exceptions,
|
||||
# if they shadow more frequent analyses.
|
||||
for form in exceptions.get(string, []):
|
||||
if form not in forms:
|
||||
forms.insert(0, form)
|
||||
if not forms:
|
||||
forms.extend(oov_forms)
|
||||
if not forms:
|
||||
forms.append(orig)
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
|
||||
def is_base_form(self, token: Token) -> bool:
|
||||
"""Check whether the token is a base form that does not need further
|
||||
analysis for lemmatization.
|
||||
|
||||
token (Token): The token.
|
||||
RETURNS (bool): Whether the token is a base form.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#is_base_form
|
||||
"""
|
||||
return False
|
||||
|
||||
def score(self, examples, **kwargs) -> Dict[str, Any]:
|
||||
"""Score a batch of examples.
|
||||
|
||||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores.
|
||||
|
||||
DOCS: https://spacy.io/api/lemmatizer#score
|
||||
"""
|
||||
return Scorer.score_token_attr(examples, "lemma", **kwargs)
|
||||
|
||||
def to_disk(self, path, *, exclude=tuple()):
|
||||
"""Save the current state to a directory.
|
||||
|
||||
path (unicode or Path): A path to a directory, which will be created if
|
||||
it doesn't exist.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://spacy.io/api/vocab#to_disk
|
||||
"""
|
||||
serialize = {}
|
||||
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
|
||||
serialize["lookups"] = lambda p: self.lookups.to_disk(p)
|
||||
util.to_disk(path, serialize, exclude)
|
||||
|
||||
def from_disk(self, path, *, exclude=tuple()):
|
||||
"""Loads state from a directory. Modifies the object in place and
|
||||
returns it.
|
||||
|
||||
path (unicode or Path): A path to a directory.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Vocab): The modified `Vocab` object.
|
||||
|
||||
DOCS: https://spacy.io/api/vocab#to_disk
|
||||
"""
|
||||
deserialize = {}
|
||||
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
|
||||
deserialize["lookups"] = lambda p: self.lookups.from_disk(p)
|
||||
util.from_disk(path, deserialize, exclude)
|
||||
|
||||
def to_bytes(self, *, exclude=tuple()) -> bytes:
|
||||
"""Serialize the current state to a binary string.
|
||||
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized form of the `Vocab` object.
|
||||
|
||||
DOCS: https://spacy.io/api/vocab#to_bytes
|
||||
"""
|
||||
serialize = {}
|
||||
serialize["vocab"] = self.vocab.to_bytes
|
||||
serialize["lookups"] = self.lookups.to_bytes
|
||||
return util.to_bytes(serialize, exclude)
|
||||
|
||||
def from_bytes(self, bytes_data: bytes, *, exclude=tuple()):
|
||||
"""Load state from a binary string.
|
||||
|
||||
bytes_data (bytes): The data to load from.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Vocab): The `Vocab` object.
|
||||
|
||||
DOCS: https://spacy.io/api/vocab#from_bytes
|
||||
"""
|
||||
deserialize = {}
|
||||
deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
|
||||
deserialize["lookups"] = lambda b: self.lookups.from_bytes(b)
|
||||
util.from_bytes(bytes_data, deserialize, exclude)
|
|
@ -38,12 +38,12 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
|
|||
@Language.factory(
|
||||
"tagger",
|
||||
assigns=["token.tag"],
|
||||
default_config={"model": DEFAULT_TAGGER_MODEL, "set_morphology": False},
|
||||
scores=["tag_acc", "pos_acc", "lemma_acc"],
|
||||
default_config={"model": DEFAULT_TAGGER_MODEL},
|
||||
scores=["tag_acc"],
|
||||
default_score_weights={"tag_acc": 1.0},
|
||||
)
|
||||
def make_tagger(nlp: Language, name: str, model: Model, set_morphology: bool):
|
||||
return Tagger(nlp.vocab, model, name, set_morphology=set_morphology)
|
||||
def make_tagger(nlp: Language, name: str, model: Model):
|
||||
return Tagger(nlp.vocab, model, name)
|
||||
|
||||
|
||||
class Tagger(Pipe):
|
||||
|
@ -51,13 +51,14 @@ class Tagger(Pipe):
|
|||
|
||||
DOCS: https://spacy.io/api/tagger
|
||||
"""
|
||||
def __init__(self, vocab, model, name="tagger", *, set_morphology=False):
|
||||
def __init__(self, vocab, model, name="tagger", *, labels=None):
|
||||
"""Initialize a part-of-speech tagger.
|
||||
|
||||
vocab (Vocab): The shared vocabulary.
|
||||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
labels (List): The set of labels. Defaults to None.
|
||||
set_morphology (bool): Whether to set morphological features.
|
||||
|
||||
DOCS: https://spacy.io/api/tagger#init
|
||||
|
@ -66,7 +67,7 @@ class Tagger(Pipe):
|
|||
self.model = model
|
||||
self.name = name
|
||||
self._rehearsal_model = None
|
||||
cfg = {"set_morphology": set_morphology}
|
||||
cfg = {"labels": labels or []}
|
||||
self.cfg = dict(sorted(cfg.items()))
|
||||
|
||||
@property
|
||||
|
@ -79,7 +80,7 @@ class Tagger(Pipe):
|
|||
|
||||
DOCS: https://spacy.io/api/tagger#labels
|
||||
"""
|
||||
return tuple(self.vocab.morphology.tag_names)
|
||||
return tuple(self.cfg["labels"])
|
||||
|
||||
def __call__(self, doc):
|
||||
"""Apply the pipe to a Doc.
|
||||
|
@ -149,9 +150,7 @@ class Tagger(Pipe):
|
|||
if isinstance(docs, Doc):
|
||||
docs = [docs]
|
||||
cdef Doc doc
|
||||
cdef int idx = 0
|
||||
cdef Vocab vocab = self.vocab
|
||||
assign_morphology = self.cfg.get("set_morphology", True)
|
||||
for i, doc in enumerate(docs):
|
||||
doc_tag_ids = batch_tag_ids[i]
|
||||
if hasattr(doc_tag_ids, "get"):
|
||||
|
@ -159,15 +158,7 @@ class Tagger(Pipe):
|
|||
for j, tag_id in enumerate(doc_tag_ids):
|
||||
# Don't clobber preset POS tags
|
||||
if doc.c[j].tag == 0:
|
||||
if doc.c[j].pos == 0 and assign_morphology:
|
||||
# Don't clobber preset lemmas
|
||||
lemma = doc.c[j].lemma
|
||||
vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
|
||||
if lemma != 0 and lemma != doc.c[j].lex.orth:
|
||||
doc.c[j].lemma = lemma
|
||||
else:
|
||||
doc.c[j].tag = self.vocab.strings[self.labels[tag_id]]
|
||||
idx += 1
|
||||
doc.is_tagged = True
|
||||
|
||||
def update(self, examples, *, drop=0., sgd=None, losses=None, set_annotations=False):
|
||||
|
@ -278,55 +269,26 @@ class Tagger(Pipe):
|
|||
|
||||
DOCS: https://spacy.io/api/tagger#begin_training
|
||||
"""
|
||||
lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"]
|
||||
if not any(table in self.vocab.lookups for table in lemma_tables):
|
||||
warnings.warn(Warnings.W022)
|
||||
lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
|
||||
if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
|
||||
langs = ", ".join(util.LEXEME_NORM_LANGS)
|
||||
warnings.warn(Warnings.W033.format(model="part-of-speech tagger", langs=langs))
|
||||
orig_tag_map = dict(self.vocab.morphology.tag_map)
|
||||
new_tag_map = {}
|
||||
tags = set()
|
||||
for example in get_examples():
|
||||
try:
|
||||
y = example.y
|
||||
except AttributeError:
|
||||
raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example))) from None
|
||||
for token in y:
|
||||
tag = token.tag_
|
||||
if tag in orig_tag_map:
|
||||
new_tag_map[tag] = orig_tag_map[tag]
|
||||
else:
|
||||
new_tag_map[tag] = {POS: X}
|
||||
|
||||
cdef Vocab vocab = self.vocab
|
||||
if new_tag_map:
|
||||
if "_SP" in orig_tag_map:
|
||||
new_tag_map["_SP"] = orig_tag_map["_SP"]
|
||||
vocab.morphology.load_tag_map(new_tag_map)
|
||||
tags.add(token.tag_)
|
||||
for tag in sorted(tags):
|
||||
self.add_label(tag)
|
||||
self.set_output(len(self.labels))
|
||||
doc_sample = [Doc(self.vocab, words=["hello", "world"])]
|
||||
if pipeline is not None:
|
||||
for name, component in pipeline:
|
||||
if component is self:
|
||||
break
|
||||
if hasattr(component, "pipe"):
|
||||
doc_sample = list(component.pipe(doc_sample))
|
||||
else:
|
||||
doc_sample = [component(doc) for doc in doc_sample]
|
||||
self.model.initialize(X=doc_sample)
|
||||
# Get batch of example docs, example outputs to call begin_training().
|
||||
# This lets the model infer shapes.
|
||||
self.model.initialize()
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
return sgd
|
||||
|
||||
def add_label(self, label, values=None):
|
||||
def add_label(self, label):
|
||||
"""Add a new label to the pipe.
|
||||
|
||||
label (str): The label to add.
|
||||
values (Dict[int, str]): Optional values to map to the label, e.g. a
|
||||
tag map dictionary.
|
||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||
|
||||
DOCS: https://spacy.io/api/tagger#add_label
|
||||
|
@ -335,22 +297,8 @@ class Tagger(Pipe):
|
|||
raise ValueError(Errors.E187)
|
||||
if label in self.labels:
|
||||
return 0
|
||||
if self.model.has_dim("nO"):
|
||||
# Here's how the model resizing will work, once the
|
||||
# neuron-to-tag mapping is no longer controlled by
|
||||
# the Morphology class, which sorts the tag names.
|
||||
# The sorting makes adding labels difficult.
|
||||
# smaller = self.model._layers[-1]
|
||||
# larger = Softmax(len(self.labels)+1, smaller.nI)
|
||||
# copy_array(larger.W[:smaller.nO], smaller.W)
|
||||
# copy_array(larger.b[:smaller.nO], smaller.b)
|
||||
# self.model._layers[-1] = larger
|
||||
raise ValueError(TempErrors.T003)
|
||||
tag_map = dict(self.vocab.morphology.tag_map)
|
||||
if values is None:
|
||||
values = {POS: "X"}
|
||||
tag_map[label] = values
|
||||
self.vocab.morphology.load_tag_map(tag_map)
|
||||
self.cfg["labels"].append(label)
|
||||
self.vocab.strings.add(label)
|
||||
return 1
|
||||
|
||||
def score(self, examples, **kwargs):
|
||||
|
@ -362,11 +310,7 @@ class Tagger(Pipe):
|
|||
|
||||
DOCS: https://spacy.io/api/tagger#score
|
||||
"""
|
||||
scores = {}
|
||||
scores.update(Scorer.score_token_attr(examples, "tag", **kwargs))
|
||||
scores.update(Scorer.score_token_attr(examples, "pos", **kwargs))
|
||||
scores.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
|
||||
return scores
|
||||
return Scorer.score_token_attr(examples, "tag", **kwargs)
|
||||
|
||||
def to_bytes(self, *, exclude=tuple()):
|
||||
"""Serialize the pipe to a bytestring.
|
||||
|
@ -380,10 +324,6 @@ class Tagger(Pipe):
|
|||
serialize["model"] = self.model.to_bytes
|
||||
serialize["vocab"] = self.vocab.to_bytes
|
||||
serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
|
||||
tag_map = dict(sorted(self.vocab.morphology.tag_map.items()))
|
||||
serialize["tag_map"] = lambda: srsly.msgpack_dumps(tag_map)
|
||||
morph_rules = dict(self.vocab.morphology.exc)
|
||||
serialize["morph_rules"] = lambda: srsly.msgpack_dumps(morph_rules)
|
||||
return util.to_bytes(serialize, exclude)
|
||||
|
||||
def from_bytes(self, bytes_data, *, exclude=tuple()):
|
||||
|
@ -401,21 +341,8 @@ class Tagger(Pipe):
|
|||
except AttributeError:
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
def load_tag_map(b):
|
||||
tag_map = srsly.msgpack_loads(b)
|
||||
self.vocab.morphology.load_tag_map(tag_map)
|
||||
|
||||
def load_morph_rules(b):
|
||||
morph_rules = srsly.msgpack_loads(b)
|
||||
self.vocab.morphology.load_morph_exceptions(morph_rules)
|
||||
|
||||
self.vocab.morphology = Morphology(self.vocab.strings, dict(),
|
||||
lemmatizer=self.vocab.morphology.lemmatizer)
|
||||
|
||||
deserialize = {
|
||||
"vocab": lambda b: self.vocab.from_bytes(b),
|
||||
"tag_map": load_tag_map,
|
||||
"morph_rules": load_morph_rules,
|
||||
"cfg": lambda b: self.cfg.update(srsly.json_loads(b)),
|
||||
"model": lambda b: load_model(b),
|
||||
}
|
||||
|
@ -430,12 +357,8 @@ class Tagger(Pipe):
|
|||
|
||||
DOCS: https://spacy.io/api/tagger#to_disk
|
||||
"""
|
||||
tag_map = dict(sorted(self.vocab.morphology.tag_map.items()))
|
||||
morph_rules = dict(self.vocab.morphology.exc)
|
||||
serialize = {
|
||||
"vocab": lambda p: self.vocab.to_disk(p),
|
||||
"tag_map": lambda p: srsly.write_msgpack(p, tag_map),
|
||||
"morph_rules": lambda p: srsly.write_msgpack(p, morph_rules),
|
||||
"model": lambda p: self.model.to_disk(p),
|
||||
"cfg": lambda p: srsly.write_json(p, self.cfg),
|
||||
}
|
||||
|
@ -457,22 +380,9 @@ class Tagger(Pipe):
|
|||
except AttributeError:
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
def load_tag_map(p):
|
||||
tag_map = srsly.read_msgpack(p)
|
||||
self.vocab.morphology.load_tag_map(tag_map)
|
||||
|
||||
def load_morph_rules(p):
|
||||
morph_rules = srsly.read_msgpack(p)
|
||||
self.vocab.morphology.load_morph_exceptions(morph_rules)
|
||||
|
||||
self.vocab.morphology = Morphology(self.vocab.strings, dict(),
|
||||
lemmatizer=self.vocab.morphology.lemmatizer)
|
||||
|
||||
deserialize = {
|
||||
"vocab": lambda p: self.vocab.from_disk(p),
|
||||
"cfg": lambda p: self.cfg.update(deserialize_config(p)),
|
||||
"tag_map": load_tag_map,
|
||||
"morph_rules": load_morph_rules,
|
||||
"model": load_model,
|
||||
}
|
||||
util.from_disk(path, deserialize, exclude)
|
||||
|
|
|
@ -220,7 +220,6 @@ class ConfigSchemaNlp(BaseModel):
|
|||
lang: StrictStr = Field(..., title="The base language to use")
|
||||
pipeline: List[StrictStr] = Field(..., title="The pipeline component names in order")
|
||||
tokenizer: Callable = Field(..., title="The tokenizer to use")
|
||||
lemmatizer: Callable = Field(..., title="The lemmatizer to use")
|
||||
load_vocab_data: StrictBool = Field(..., title="Whether to load additional vocab data from spacy-lookups-data")
|
||||
before_creation: Optional[Callable[[Type["Language"]], Type["Language"]]] = Field(..., title="Optional callback to modify Language class before initialization")
|
||||
after_creation: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after creation and before the pipeline is constructed")
|
||||
|
|
|
@ -201,7 +201,7 @@ def ru_tokenizer():
|
|||
@pytest.fixture
|
||||
def ru_lemmatizer():
|
||||
pytest.importorskip("pymorphy2")
|
||||
return get_lang_class("ru")().vocab.morphology.lemmatizer
|
||||
return get_lang_class("ru")().add_pipe("lemmatizer")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
|
|
@ -1,21 +1,12 @@
|
|||
import pytest
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.tokens import Doc
|
||||
from spacy.lemmatizer import Lemmatizer
|
||||
from spacy.lookups import Lookups
|
||||
from spacy import util
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lemmatizer():
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_lookup", {"dogs": "dog", "boxen": "box", "mice": "mouse"})
|
||||
return Lemmatizer(lookups)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vocab(lemmatizer):
|
||||
return Vocab(lemmatizer=lemmatizer)
|
||||
def vocab():
|
||||
return Vocab()
|
||||
|
||||
|
||||
def test_empty_doc(vocab):
|
||||
|
@ -30,14 +21,6 @@ def test_single_word(vocab):
|
|||
assert doc.text == "a"
|
||||
|
||||
|
||||
def test_lookup_lemmatization(vocab):
|
||||
doc = Doc(vocab, words=["dogs", "dogses"])
|
||||
assert doc[0].text == "dogs"
|
||||
assert doc[0].lemma_ == "dog"
|
||||
assert doc[1].text == "dogses"
|
||||
assert doc[1].lemma_ == "dogses"
|
||||
|
||||
|
||||
def test_create_from_words_and_text(vocab):
|
||||
# no whitespace in words
|
||||
words = ["'", "dogs", "'", "run"]
|
||||
|
|
|
@ -1,23 +1,17 @@
|
|||
import pytest
|
||||
from spacy.symbols import POS, PRON, VERB
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def i_has(en_tokenizer):
|
||||
doc = en_tokenizer("I has")
|
||||
tag_map = {
|
||||
"PRP": {POS: PRON, "PronType": "prs"},
|
||||
"VBZ": {
|
||||
POS: VERB,
|
||||
doc[0].morph_ = {"PronType": "prs"}
|
||||
doc[1].morph_ = {
|
||||
"VerbForm": "fin",
|
||||
"Tense": "pres",
|
||||
"Number": "sing",
|
||||
"Person": "three",
|
||||
},
|
||||
}
|
||||
en_tokenizer.vocab.morphology.load_tag_map(tag_map)
|
||||
doc[0].tag_ = "PRP"
|
||||
doc[1].tag_ = "VBZ"
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
|
|
|
@ -124,7 +124,6 @@ def test_doc_retokenize_spans_merge_tokens_default_attrs(en_tokenizer):
|
|||
assert doc[0].text == "The players"
|
||||
assert doc[0].tag_ == "NN"
|
||||
assert doc[0].pos_ == "NOUN"
|
||||
assert doc[0].lemma_ == "The players"
|
||||
doc = get_doc(
|
||||
tokens.vocab,
|
||||
words=[t.text for t in tokens],
|
||||
|
@ -143,11 +142,9 @@ def test_doc_retokenize_spans_merge_tokens_default_attrs(en_tokenizer):
|
|||
assert doc[0].text == "The players"
|
||||
assert doc[0].tag_ == "NN"
|
||||
assert doc[0].pos_ == "NOUN"
|
||||
assert doc[0].lemma_ == "The players"
|
||||
assert doc[1].text == "start ."
|
||||
assert doc[1].tag_ == "VBZ"
|
||||
assert doc[1].pos_ == "VERB"
|
||||
assert doc[1].lemma_ == "start ."
|
||||
|
||||
|
||||
def test_doc_retokenize_spans_merge_heads(en_tokenizer):
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
from spacy.symbols import POS, PRON, VERB, DET, NOUN, PUNCT
|
||||
from ...util import get_doc
|
||||
|
||||
|
||||
def test_en_tagger_load_morph_exc(en_tokenizer):
|
||||
text = "I like his style."
|
||||
tags = ["PRP", "VBP", "PRP$", "NN", "."]
|
||||
tag_map = {
|
||||
"PRP": {POS: PRON},
|
||||
"VBP": {POS: VERB},
|
||||
"PRP$": {POS: DET},
|
||||
"NN": {POS: NOUN},
|
||||
".": {POS: PUNCT},
|
||||
}
|
||||
morph_exc = {"VBP": {"like": {"lemma": "luck"}}}
|
||||
en_tokenizer.vocab.morphology.load_tag_map(tag_map)
|
||||
en_tokenizer.vocab.morphology.load_morph_exceptions(morph_exc)
|
||||
tokens = en_tokenizer(text)
|
||||
doc = get_doc(tokens.vocab, words=[t.text for t in tokens], tags=tags)
|
||||
assert doc[1].tag_ == "VBP"
|
||||
assert doc[1].lemma_ == "luck"
|
|
@ -3,15 +3,16 @@ import pytest
|
|||
from ...util import get_doc
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="TODO: investigate why lemmatizer fails here")
|
||||
def test_ru_doc_lemmatization(ru_tokenizer):
|
||||
def test_ru_doc_lemmatization(ru_lemmatizer):
|
||||
words = ["мама", "мыла", "раму"]
|
||||
tags = [
|
||||
"NOUN__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing",
|
||||
"VERB__Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
|
||||
"NOUN__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing",
|
||||
pos = ["NOUN", "VERB", "NOUN"]
|
||||
morphs = [
|
||||
"Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing",
|
||||
"Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
|
||||
"Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing",
|
||||
]
|
||||
doc = get_doc(ru_tokenizer.vocab, words=words, tags=tags)
|
||||
doc = get_doc(ru_lemmatizer.vocab, words=words, pos=pos, morphs=morphs)
|
||||
doc = ru_lemmatizer(doc)
|
||||
lemmas = [token.lemma_ for token in doc]
|
||||
assert lemmas == ["мама", "мыть", "рама"]
|
||||
|
||||
|
@ -27,43 +28,51 @@ def test_ru_doc_lemmatization(ru_tokenizer):
|
|||
],
|
||||
)
|
||||
def test_ru_lemmatizer_noun_lemmas(ru_lemmatizer, text, lemmas):
|
||||
assert sorted(ru_lemmatizer.noun(text)) == lemmas
|
||||
doc = get_doc(ru_lemmatizer.vocab, words=[text], pos=["NOUN"])
|
||||
result_lemmas = ru_lemmatizer.pymorphy2_lemmatize(doc[0])
|
||||
assert sorted(result_lemmas) == lemmas
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,pos,morphology,lemma",
|
||||
"text,pos,morph,lemma",
|
||||
[
|
||||
("рой", "NOUN", None, "рой"),
|
||||
("рой", "VERB", None, "рыть"),
|
||||
("клей", "NOUN", None, "клей"),
|
||||
("клей", "VERB", None, "клеить"),
|
||||
("три", "NUM", None, "три"),
|
||||
("кос", "NOUN", {"Number": "Sing"}, "кос"),
|
||||
("кос", "NOUN", {"Number": "Plur"}, "коса"),
|
||||
("кос", "ADJ", None, "косой"),
|
||||
("потом", "NOUN", None, "пот"),
|
||||
("потом", "ADV", None, "потом"),
|
||||
("рой", "NOUN", "", "рой"),
|
||||
("рой", "VERB", "", "рыть"),
|
||||
("клей", "NOUN", "", "клей"),
|
||||
("клей", "VERB", "", "клеить"),
|
||||
("три", "NUM", "", "три"),
|
||||
("кос", "NOUN", "Number=Sing", "кос"),
|
||||
("кос", "NOUN", "Number=Plur", "коса"),
|
||||
("кос", "ADJ", "", "косой"),
|
||||
("потом", "NOUN", "", "пот"),
|
||||
("потом", "ADV", "", "потом"),
|
||||
],
|
||||
)
|
||||
def test_ru_lemmatizer_works_with_different_pos_homonyms(
|
||||
ru_lemmatizer, text, pos, morphology, lemma
|
||||
ru_lemmatizer, text, pos, morph, lemma
|
||||
):
|
||||
assert ru_lemmatizer(text, pos, morphology) == [lemma]
|
||||
doc = get_doc(ru_lemmatizer.vocab, words=[text], pos=[pos], morphs=[morph])
|
||||
result_lemmas = ru_lemmatizer.pymorphy2_lemmatize(doc[0])
|
||||
assert result_lemmas == [lemma]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,morphology,lemma",
|
||||
"text,morph,lemma",
|
||||
[
|
||||
("гвоздики", {"Gender": "Fem"}, "гвоздика"),
|
||||
("гвоздики", {"Gender": "Masc"}, "гвоздик"),
|
||||
("вина", {"Gender": "Fem"}, "вина"),
|
||||
("вина", {"Gender": "Neut"}, "вино"),
|
||||
("гвоздики", "Gender=Fem", "гвоздика"),
|
||||
("гвоздики", "Gender=Masc", "гвоздик"),
|
||||
("вина", "Gender=Fem", "вина"),
|
||||
("вина", "Gender=Neut", "вино"),
|
||||
],
|
||||
)
|
||||
def test_ru_lemmatizer_works_with_noun_homonyms(ru_lemmatizer, text, morphology, lemma):
|
||||
assert ru_lemmatizer.noun(text, morphology) == [lemma]
|
||||
def test_ru_lemmatizer_works_with_noun_homonyms(ru_lemmatizer, text, morph, lemma):
|
||||
doc = get_doc(ru_lemmatizer.vocab, words=[text], pos=["NOUN"], morphs=[morph])
|
||||
result_lemmas = ru_lemmatizer.pymorphy2_lemmatize(doc[0])
|
||||
assert result_lemmas == [lemma]
|
||||
|
||||
|
||||
def test_ru_lemmatizer_punct(ru_lemmatizer):
|
||||
assert ru_lemmatizer.punct("«") == ['"']
|
||||
assert ru_lemmatizer.punct("»") == ['"']
|
||||
doc = get_doc(ru_lemmatizer.vocab, words=["«"], pos=["PUNCT"])
|
||||
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
|
||||
doc = get_doc(ru_lemmatizer.vocab, words=["»"], pos=["PUNCT"])
|
||||
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
|
||||
|
|
34
spacy/tests/lang/test_lemmatizers.py
Normal file
34
spacy/tests/lang/test_lemmatizers.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import pytest
|
||||
from spacy import registry
|
||||
from spacy.lookups import Lookups
|
||||
from spacy.util import get_lang_class
|
||||
|
||||
|
||||
# fmt: off
|
||||
# Only include languages with no external dependencies
|
||||
# excluded: ru, uk
|
||||
# excluded for custom tables: pl
|
||||
LANGUAGES = ["el", "en", "fr", "nl"]
|
||||
# fmt: on
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lang", LANGUAGES)
|
||||
def test_lemmatizer_initialize(lang, capfd):
|
||||
@registry.assets("lemmatizer_init_lookups")
|
||||
def lemmatizer_init_lookups():
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_lookup", {"cope": "cope"})
|
||||
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
|
||||
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
|
||||
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
||||
return lookups
|
||||
|
||||
"""Test that languages can be initialized."""
|
||||
nlp = get_lang_class(lang)()
|
||||
nlp.add_pipe(
|
||||
"lemmatizer", config={"lookups": {"@assets": "lemmatizer_init_lookups"}}
|
||||
)
|
||||
# Check for stray print statements (see #3342)
|
||||
doc = nlp("test") # noqa: F841
|
||||
captured = capfd.readouterr()
|
||||
assert not captured.out
|
|
@ -1,14 +1,11 @@
|
|||
import pytest
|
||||
from spacy.morphology import Morphology
|
||||
from spacy.strings import StringStore, get_string_id
|
||||
from spacy.lemmatizer import Lemmatizer
|
||||
from spacy.lookups import Lookups
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def morphology():
|
||||
lemmatizer = Lemmatizer(Lookups())
|
||||
return Morphology(StringStore(), {}, lemmatizer)
|
||||
return Morphology(StringStore())
|
||||
|
||||
|
||||
def test_init(morphology):
|
||||
|
|
|
@ -2,21 +2,18 @@ import pytest
|
|||
import pickle
|
||||
from spacy.morphology import Morphology
|
||||
from spacy.strings import StringStore
|
||||
from spacy.lemmatizer import Lemmatizer
|
||||
from spacy.lookups import Lookups
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def morphology():
|
||||
tag_map = {"A": {"POS": "X"}, "B": {"POS": "NOUN"}}
|
||||
exc = {"A": {"a": {"POS": "VERB"}}}
|
||||
lemmatizer = Lemmatizer(Lookups())
|
||||
return Morphology(StringStore(), tag_map, lemmatizer, exc=exc)
|
||||
morphology = Morphology(StringStore())
|
||||
morphology.add("Feat1=Val1|Feat2=Val2")
|
||||
morphology.add("Feat3=Val3|Feat4=Val4")
|
||||
return morphology
|
||||
|
||||
|
||||
def test_morphology_pickle_roundtrip(morphology):
|
||||
b = pickle.dumps(morphology)
|
||||
reloaded_morphology = pickle.loads(b)
|
||||
|
||||
assert morphology.tag_map == reloaded_morphology.tag_map
|
||||
assert morphology.exc == reloaded_morphology.exc
|
||||
assert reloaded_morphology.get(morphology.strings["Feat1=Val1|Feat2=Val2"]) == "Feat1=Val1|Feat2=Val2"
|
||||
assert reloaded_morphology.get(morphology.strings["Feat3=Val3|Feat4=Val4"]) == "Feat3=Val3|Feat4=Val4"
|
||||
|
|
|
@ -82,10 +82,10 @@ def test_parser_merge_pp(en_tokenizer):
|
|||
text = "A phrase with another phrase occurs"
|
||||
heads = [1, 4, -1, 1, -2, 0]
|
||||
deps = ["det", "nsubj", "prep", "det", "pobj", "ROOT"]
|
||||
tags = ["DT", "NN", "IN", "DT", "NN", "VBZ"]
|
||||
pos = ["DET", "NOUN", "ADP", "DET", "NOUN", "VERB"]
|
||||
tokens = en_tokenizer(text)
|
||||
doc = get_doc(
|
||||
tokens.vocab, words=[t.text for t in tokens], deps=deps, heads=heads, tags=tags
|
||||
tokens.vocab, words=[t.text for t in tokens], deps=deps, heads=heads, pos=pos,
|
||||
)
|
||||
with doc.retokenize() as retokenizer:
|
||||
for np in doc.noun_chunks:
|
||||
|
|
109
spacy/tests/pipeline/test_lemmatizer.py
Normal file
109
spacy/tests/pipeline/test_lemmatizer.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
import pytest
|
||||
|
||||
from spacy import util, registry
|
||||
from spacy.lang.en import English
|
||||
from spacy.lookups import Lookups, load_lookups
|
||||
|
||||
from ..util import make_tempdir
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nlp():
|
||||
return English()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lemmatizer(nlp):
|
||||
@registry.assets("cope_lookups")
|
||||
def cope_lookups():
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_lookup", {"cope": "cope"})
|
||||
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
|
||||
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
|
||||
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
||||
return lookups
|
||||
|
||||
lemmatizer = nlp.add_pipe(
|
||||
"lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}}
|
||||
)
|
||||
return lemmatizer
|
||||
|
||||
|
||||
def test_lemmatizer_init(nlp):
|
||||
@registry.assets("cope_lookups")
|
||||
def cope_lookups():
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_lookup", {"cope": "cope"})
|
||||
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
|
||||
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
|
||||
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
||||
return lookups
|
||||
|
||||
lemmatizer = nlp.add_pipe(
|
||||
"lemmatizer", config={"mode": "lookup", "lookups": {"@assets": "cope_lookups"}}
|
||||
)
|
||||
assert isinstance(lemmatizer.lookups, Lookups)
|
||||
assert lemmatizer.mode == "lookup"
|
||||
# replace any tables from spacy-lookups-data
|
||||
lemmatizer.lookups = Lookups()
|
||||
doc = nlp("coping")
|
||||
# lookup with no tables sets text as lemma
|
||||
assert doc[0].lemma_ == "coping"
|
||||
|
||||
nlp.remove_pipe("lemmatizer")
|
||||
|
||||
@registry.assets("empty_lookups")
|
||||
def empty_lookups():
|
||||
return Lookups()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
nlp.add_pipe(
|
||||
"lemmatizer",
|
||||
config={"mode": "lookup", "lookups": {"@assets": "empty_lookups"}},
|
||||
)
|
||||
|
||||
|
||||
def test_lemmatizer_config(nlp, lemmatizer):
|
||||
doc = nlp.make_doc("coping")
|
||||
doc[0].pos_ = "VERB"
|
||||
assert doc[0].lemma_ == ""
|
||||
doc = lemmatizer(doc)
|
||||
assert doc[0].text == "coping"
|
||||
assert doc[0].lemma_ == "cope"
|
||||
|
||||
doc = nlp.make_doc("coping")
|
||||
doc[0].pos_ = "VERB"
|
||||
assert doc[0].lemma_ == ""
|
||||
doc = lemmatizer(doc)
|
||||
assert doc[0].text == "coping"
|
||||
assert doc[0].lemma_ == "cope"
|
||||
|
||||
|
||||
def test_lemmatizer_serialize(nlp, lemmatizer):
|
||||
@registry.assets("cope_lookups")
|
||||
def cope_lookups():
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_lookup", {"cope": "cope"})
|
||||
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
|
||||
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
|
||||
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
||||
return lookups
|
||||
|
||||
nlp2 = English()
|
||||
lemmatizer2 = nlp2.add_pipe(
|
||||
"lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}}
|
||||
)
|
||||
lemmatizer2.from_bytes(lemmatizer.to_bytes())
|
||||
assert lemmatizer.to_bytes() == lemmatizer2.to_bytes()
|
||||
assert lemmatizer.lookups.tables == lemmatizer2.lookups.tables
|
||||
|
||||
# Also test the results are still the same after IO
|
||||
with make_tempdir() as tmp_dir:
|
||||
nlp.to_disk(tmp_dir)
|
||||
nlp2 = util.load_model_from_path(tmp_dir)
|
||||
doc2 = nlp2.make_doc("coping")
|
||||
doc2[0].pos_ = "VERB"
|
||||
assert doc2[0].lemma_ == ""
|
||||
doc2 = lemmatizer(doc2)
|
||||
assert doc2[0].text == "coping"
|
||||
assert doc2[0].lemma_ == "cope"
|
|
@ -23,13 +23,12 @@ def test_tagger_begin_training_tag_map():
|
|||
nlp = Language()
|
||||
tagger = nlp.add_pipe("tagger")
|
||||
orig_tag_count = len(tagger.labels)
|
||||
tagger.add_label("A", {"POS": "NOUN"})
|
||||
tagger.add_label("A")
|
||||
nlp.begin_training()
|
||||
assert nlp.vocab.morphology.tag_map["A"] == {POS: NOUN}
|
||||
assert orig_tag_count + 1 == len(nlp.get_pipe("tagger").labels)
|
||||
|
||||
|
||||
TAG_MAP = {"N": {"pos": "NOUN"}, "V": {"pos": "VERB"}, "J": {"pos": "ADJ"}}
|
||||
TAGS = ("N", "V", "J")
|
||||
|
||||
MORPH_RULES = {"V": {"like": {"lemma": "luck"}}}
|
||||
|
||||
|
@ -42,15 +41,12 @@ TRAIN_DATA = [
|
|||
def test_overfitting_IO():
|
||||
# Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly
|
||||
nlp = English()
|
||||
nlp.vocab.morphology.load_tag_map(TAG_MAP)
|
||||
nlp.vocab.morphology.load_morph_exceptions(MORPH_RULES)
|
||||
tagger = nlp.add_pipe("tagger", config={"set_morphology": True})
|
||||
nlp.vocab.morphology.load_tag_map(TAG_MAP)
|
||||
tagger = nlp.add_pipe("tagger")
|
||||
train_examples = []
|
||||
for t in TRAIN_DATA:
|
||||
train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
|
||||
for tag, values in TAG_MAP.items():
|
||||
tagger.add_label(tag, values)
|
||||
for tag in TAGS:
|
||||
tagger.add_label(tag)
|
||||
optimizer = nlp.begin_training()
|
||||
|
||||
for i in range(50):
|
||||
|
@ -65,7 +61,6 @@ def test_overfitting_IO():
|
|||
assert doc[1].tag_ is "V"
|
||||
assert doc[2].tag_ is "J"
|
||||
assert doc[3].tag_ is "N"
|
||||
assert doc[1].lemma_ == "luck"
|
||||
|
||||
# Also test the results are still the same after IO
|
||||
with make_tempdir() as tmp_dir:
|
||||
|
@ -76,4 +71,3 @@ def test_overfitting_IO():
|
|||
assert doc2[1].tag_ is "V"
|
||||
assert doc2[2].tag_ is "J"
|
||||
assert doc2[3].tag_ is "N"
|
||||
assert doc[1].lemma_ == "luck"
|
||||
|
|
|
@ -8,10 +8,8 @@ from spacy.attrs import IS_PUNCT, ORTH, LOWER
|
|||
from spacy.symbols import POS, VERB
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.lang.en import English
|
||||
from spacy.lemmatizer import Lemmatizer
|
||||
from spacy.lookups import Lookups
|
||||
from spacy.tokens import Doc, Span
|
||||
from spacy.lang.en.lemmatizer import is_base_form
|
||||
|
||||
from ..util import get_doc, make_tempdir
|
||||
|
||||
|
@ -157,16 +155,15 @@ def test_issue590(en_vocab):
|
|||
assert len(matches) == 2
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Old vocab-based lemmatization")
|
||||
def test_issue595():
|
||||
"""Test lemmatization of base forms"""
|
||||
words = ["Do", "n't", "feed", "the", "dog"]
|
||||
tag_map = {"VB": {POS: VERB, "VerbForm": "inf"}}
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_rules", {"verb": [["ed", "e"]]})
|
||||
lookups.add_table("lemma_index", {"verb": {}})
|
||||
lookups.add_table("lemma_exc", {"verb": {}})
|
||||
lemmatizer = Lemmatizer(lookups, is_base_form=is_base_form)
|
||||
vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
|
||||
vocab = Vocab()
|
||||
doc = Doc(vocab, words=words)
|
||||
doc[2].tag_ = "VB"
|
||||
assert doc[2].text == "feed"
|
||||
|
@ -389,6 +386,7 @@ def test_issue891(en_tokenizer, text):
|
|||
assert tokens[1].text == "/"
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Old vocab-based lemmatization")
|
||||
@pytest.mark.parametrize(
|
||||
"text,tag,lemma",
|
||||
[("anus", "NN", "anus"), ("princess", "NN", "princess"), ("inner", "JJ", "inner")],
|
||||
|
|
|
@ -6,7 +6,6 @@ from spacy.lang.en import English
|
|||
from spacy.lang.lex_attrs import LEX_ATTRS
|
||||
from spacy.matcher import Matcher
|
||||
from spacy.tokenizer import Tokenizer
|
||||
from spacy.lemmatizer import Lemmatizer
|
||||
from spacy.lookups import Lookups
|
||||
from spacy.symbols import ORTH, LEMMA, POS, VERB
|
||||
|
||||
|
@ -57,6 +56,7 @@ def test_issue1242():
|
|||
assert len(docs[1]) == 1
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="v3 no longer supports LEMMA/POS in tokenizer special cases")
|
||||
def test_issue1250():
|
||||
"""Test cached special cases."""
|
||||
special_case = [{ORTH: "reimbur", LEMMA: "reimburse", POS: "VERB"}]
|
||||
|
@ -87,20 +87,6 @@ def test_issue1375():
|
|||
assert doc[1].nbor(1).text == "2"
|
||||
|
||||
|
||||
def test_issue1387():
|
||||
tag_map = {"VBG": {POS: VERB, "VerbForm": "part"}}
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
|
||||
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
|
||||
lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
|
||||
lemmatizer = Lemmatizer(lookups)
|
||||
vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
|
||||
doc = Doc(vocab, words=["coping"])
|
||||
doc[0].tag_ = "VBG"
|
||||
assert doc[0].text == "coping"
|
||||
assert doc[0].lemma_ == "cope"
|
||||
|
||||
|
||||
def test_issue1434():
|
||||
"""Test matches occur when optional element at end of short doc."""
|
||||
pattern = [{"ORTH": "Hello"}, {"IS_ALPHA": True, "OP": "?"}]
|
||||
|
|
|
@ -130,8 +130,6 @@ def test_issue1727():
|
|||
vectors = Vectors(data=data, keys=["I", "am", "Matt"])
|
||||
tagger = nlp.create_pipe("tagger")
|
||||
tagger.add_label("PRP")
|
||||
with pytest.warns(UserWarning):
|
||||
tagger.begin_training()
|
||||
assert tagger.cfg.get("pretrained_dims", 0) == 0
|
||||
tagger.vocab.vectors = vectors
|
||||
with make_tempdir() as path:
|
||||
|
|
|
@ -19,8 +19,8 @@ def test_issue2564():
|
|||
"""Test the tagger sets is_tagged correctly when used via Language.pipe."""
|
||||
nlp = Language()
|
||||
tagger = nlp.add_pipe("tagger")
|
||||
with pytest.warns(UserWarning):
|
||||
tagger.begin_training() # initialise weights
|
||||
tagger.add_label("A")
|
||||
tagger.begin_training()
|
||||
doc = nlp("hello world")
|
||||
assert doc.is_tagged
|
||||
docs = nlp.pipe(["hello", "world"])
|
||||
|
|
|
@ -241,11 +241,11 @@ def test_issue3449():
|
|||
assert t3[5].text == "I"
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::UserWarning")
|
||||
def test_issue3456():
|
||||
# this crashed because of a padding error in layer.ops.unflatten in thinc
|
||||
nlp = English()
|
||||
nlp.add_pipe("tagger")
|
||||
tagger = nlp.add_pipe("tagger")
|
||||
tagger.add_label("A")
|
||||
nlp.begin_training()
|
||||
list(nlp.pipe(["hi", ""]))
|
||||
|
||||
|
|
|
@ -149,13 +149,15 @@ def test_issue3540(en_vocab):
|
|||
gold_text = ["I", "live", "in", "NewYork", "right", "now"]
|
||||
assert [token.text for token in doc] == gold_text
|
||||
gold_lemma = ["I", "live", "in", "NewYork", "right", "now"]
|
||||
for i, lemma in enumerate(gold_lemma):
|
||||
doc[i].lemma_ = lemma
|
||||
assert [token.lemma_ for token in doc] == gold_lemma
|
||||
vectors_1 = [token.vector for token in doc]
|
||||
assert len(vectors_1) == len(doc)
|
||||
|
||||
with doc.retokenize() as retokenizer:
|
||||
heads = [(doc[3], 1), doc[2]]
|
||||
attrs = {"POS": ["PROPN", "PROPN"], "DEP": ["pobj", "compound"]}
|
||||
attrs = {"POS": ["PROPN", "PROPN"], "LEMMA": ["New", "York"], "DEP": ["pobj", "compound"]}
|
||||
retokenizer.split(doc[3], ["New", "York"], heads=heads, attrs=attrs)
|
||||
|
||||
gold_text = ["I", "live", "in", "New", "York", "right", "now"]
|
||||
|
|
|
@ -271,6 +271,7 @@ def test_issue4267():
|
|||
assert token.ent_iob == 2
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="lemmatizer lookups no longer in vocab")
|
||||
def test_issue4272():
|
||||
"""Test that lookup table can be accessed from Token.lemma if no POS tags
|
||||
are available."""
|
||||
|
|
|
@ -62,7 +62,6 @@ def tagger():
|
|||
# need to add model for two reasons:
|
||||
# 1. no model leads to error in serialization,
|
||||
# 2. the affected line is the one for model serialization
|
||||
with pytest.warns(UserWarning):
|
||||
tagger.begin_training(pipeline=nlp.pipeline)
|
||||
return tagger
|
||||
|
||||
|
|
|
@ -44,8 +44,8 @@ def blank_parser(en_vocab):
|
|||
def taggers(en_vocab):
|
||||
cfg = {"model": DEFAULT_TAGGER_MODEL}
|
||||
model = registry.make_from_config(cfg, validate=True)["model"]
|
||||
tagger1 = Tagger(en_vocab, model, set_morphology=True)
|
||||
tagger2 = Tagger(en_vocab, model, set_morphology=True)
|
||||
tagger1 = Tagger(en_vocab, model)
|
||||
tagger2 = Tagger(en_vocab, model)
|
||||
return tagger1, tagger2
|
||||
|
||||
|
||||
|
@ -125,8 +125,8 @@ def test_serialize_tagger_roundtrip_disk(en_vocab, taggers):
|
|||
tagger2.to_disk(file_path2)
|
||||
cfg = {"model": DEFAULT_TAGGER_MODEL}
|
||||
model = registry.make_from_config(cfg, validate=True)["model"]
|
||||
tagger1_d = Tagger(en_vocab, model, set_morphology=True).from_disk(file_path1)
|
||||
tagger2_d = Tagger(en_vocab, model, set_morphology=True).from_disk(file_path2)
|
||||
tagger1_d = Tagger(en_vocab, model).from_disk(file_path1)
|
||||
tagger2_d = Tagger(en_vocab, model).from_disk(file_path2)
|
||||
assert tagger1_d.to_bytes() == tagger2_d.to_bytes()
|
||||
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@ from ..util import make_tempdir
|
|||
|
||||
test_strings = [([], []), (["rats", "are", "cute"], ["i", "like", "rats"])]
|
||||
test_strings_attrs = [(["rats", "are", "cute"], "Hello")]
|
||||
default_strings = ("_SP", "POS=SPACE")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("text", ["rat"])
|
||||
|
@ -34,10 +33,8 @@ def test_serialize_vocab_roundtrip_bytes(strings1, strings2):
|
|||
assert vocab1.to_bytes() == vocab1_b
|
||||
new_vocab1 = Vocab().from_bytes(vocab1_b)
|
||||
assert new_vocab1.to_bytes() == vocab1_b
|
||||
assert len(new_vocab1.strings) == len(strings1) + 2 # adds _SP and POS=SPACE
|
||||
assert sorted([s for s in new_vocab1.strings]) == sorted(
|
||||
strings1 + list(default_strings)
|
||||
)
|
||||
assert len(new_vocab1.strings) == len(strings1)
|
||||
assert sorted([s for s in new_vocab1.strings]) == sorted(strings1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("strings1,strings2", test_strings)
|
||||
|
@ -52,16 +49,12 @@ def test_serialize_vocab_roundtrip_disk(strings1, strings2):
|
|||
vocab1_d = Vocab().from_disk(file_path1)
|
||||
vocab2_d = Vocab().from_disk(file_path2)
|
||||
# check strings rather than lexemes, which are only reloaded on demand
|
||||
assert strings1 == [s for s in vocab1_d.strings if s not in default_strings]
|
||||
assert strings2 == [s for s in vocab2_d.strings if s not in default_strings]
|
||||
assert strings1 == [s for s in vocab1_d.strings]
|
||||
assert strings2 == [s for s in vocab2_d.strings]
|
||||
if strings1 == strings2:
|
||||
assert [s for s in vocab1_d.strings if s not in default_strings] == [
|
||||
s for s in vocab2_d.strings if s not in default_strings
|
||||
]
|
||||
assert [s for s in vocab1_d.strings] == [s for s in vocab2_d.strings]
|
||||
else:
|
||||
assert [s for s in vocab1_d.strings if s not in default_strings] != [
|
||||
s for s in vocab2_d.strings if s not in default_strings
|
||||
]
|
||||
assert [s for s in vocab1_d.strings] != [s for s in vocab2_d.strings]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("strings,lex_attr", test_strings_attrs)
|
||||
|
@ -80,7 +73,7 @@ def test_deserialize_vocab_seen_entries(strings, lex_attr):
|
|||
# Reported in #2153
|
||||
vocab = Vocab(strings=strings)
|
||||
vocab.from_bytes(vocab.to_bytes())
|
||||
assert len(vocab.strings) == len(strings) + 2 # adds _SP and POS=SPACE
|
||||
assert len(vocab.strings) == len(strings)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("strings,lex_attr", test_strings_attrs)
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
import pytest
|
||||
from spacy.tokens import Doc
|
||||
from spacy.language import Language
|
||||
from spacy.lookups import Lookups
|
||||
from spacy.lemmatizer import Lemmatizer
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="We probably don't want to support this anymore in v3?")
|
||||
def test_lemmatizer_reflects_lookups_changes():
|
||||
"""Test for an issue that'd cause lookups available in a model loaded from
|
||||
disk to not be reflected in the lemmatizer."""
|
||||
nlp = Language()
|
||||
assert Doc(nlp.vocab, words=["foo"])[0].lemma_ == "foo"
|
||||
table = nlp.vocab.lookups.add_table("lemma_lookup")
|
||||
table["foo"] = "bar"
|
||||
assert Doc(nlp.vocab, words=["foo"])[0].lemma_ == "bar"
|
||||
table = nlp.vocab.lookups.get_table("lemma_lookup")
|
||||
table["hello"] = "world"
|
||||
# The update to the table should be reflected in the lemmatizer
|
||||
assert Doc(nlp.vocab, words=["hello"])[0].lemma_ == "world"
|
||||
new_nlp = Language()
|
||||
table = new_nlp.vocab.lookups.add_table("lemma_lookup")
|
||||
table["hello"] = "hi"
|
||||
assert Doc(new_nlp.vocab, words=["hello"])[0].lemma_ == "hi"
|
||||
nlp_bytes = nlp.to_bytes()
|
||||
new_nlp.from_bytes(nlp_bytes)
|
||||
# Make sure we have the previously saved lookup table
|
||||
assert "lemma_lookup" in new_nlp.vocab.lookups
|
||||
assert len(new_nlp.vocab.lookups.get_table("lemma_lookup")) == 2
|
||||
assert new_nlp.vocab.lookups.get_table("lemma_lookup")["hello"] == "world"
|
||||
assert Doc(new_nlp.vocab, words=["foo"])[0].lemma_ == "bar"
|
||||
assert Doc(new_nlp.vocab, words=["hello"])[0].lemma_ == "world"
|
||||
|
||||
|
||||
def test_tagger_warns_no_lookups():
|
||||
nlp = Language()
|
||||
nlp.vocab.lookups = Lookups()
|
||||
assert not len(nlp.vocab.lookups)
|
||||
tagger = nlp.add_pipe("tagger")
|
||||
with pytest.warns(UserWarning):
|
||||
tagger.begin_training()
|
||||
with pytest.warns(UserWarning):
|
||||
nlp.begin_training()
|
||||
nlp.vocab.lookups.add_table("lemma_lookup")
|
||||
nlp.vocab.lookups.add_table("lexeme_norm")
|
||||
nlp.vocab.lookups.get_table("lexeme_norm")["a"] = "A"
|
||||
with pytest.warns(None) as record:
|
||||
nlp.begin_training()
|
||||
assert not record.list
|
||||
|
||||
|
||||
def test_lemmatizer_without_is_base_form_implementation():
|
||||
# Norwegian example from #5658
|
||||
lookups = Lookups()
|
||||
lookups.add_table("lemma_rules", {"noun": []})
|
||||
lookups.add_table("lemma_index", {"noun": {}})
|
||||
lookups.add_table("lemma_exc", {"noun": {"formuesskatten": ["formuesskatt"]}})
|
||||
|
||||
lemmatizer = Lemmatizer(lookups, is_base_form=None)
|
||||
assert lemmatizer(
|
||||
"Formuesskatten",
|
||||
"noun",
|
||||
{"Definite": "def", "Gender": "masc", "Number": "sing"},
|
||||
) == ["formuesskatt"]
|
|
@ -112,16 +112,15 @@ def test_tokenizer_validate_special_case(tokenizer, text, tokens):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,tokens", [("lorem", [{"orth": "lo", "tag": "NN"}, {"orth": "rem"}])]
|
||||
"text,tokens", [("lorem", [{"orth": "lo", "norm": "LO"}, {"orth": "rem"}])]
|
||||
)
|
||||
def test_tokenizer_add_special_case_tag(text, tokens):
|
||||
vocab = Vocab(tag_map={"NN": {"pos": "NOUN"}})
|
||||
vocab = Vocab()
|
||||
tokenizer = Tokenizer(vocab, {}, None, None, None)
|
||||
tokenizer.add_special_case(text, tokens)
|
||||
doc = tokenizer(text)
|
||||
assert doc[0].text == tokens[0]["orth"]
|
||||
assert doc[0].tag_ == tokens[0]["tag"]
|
||||
assert doc[0].pos_ == "NOUN"
|
||||
assert doc[0].norm_ == tokens[0]["norm"]
|
||||
assert doc[1].text == tokens[1]["orth"]
|
||||
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from .span cimport Span
|
|||
from .token cimport Token
|
||||
from ..lexeme cimport Lexeme, EMPTY_LEXEME
|
||||
from ..structs cimport LexemeC, TokenC
|
||||
from ..attrs cimport TAG, MORPH
|
||||
from ..attrs cimport MORPH
|
||||
from ..vocab cimport Vocab
|
||||
|
||||
from .underscore import is_writable_attr
|
||||
|
@ -365,8 +365,6 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
|
|||
doc[token_index + i]._.set(ext_attr_key, ext_attr_value)
|
||||
# NB: We need to call get_string_id here because only the keys are
|
||||
# "intified" (since we support "KEY": [value, value] syntax here).
|
||||
elif attr_name == TAG:
|
||||
doc.vocab.morphology.assign_tag(token, get_string_id(attr_value))
|
||||
else:
|
||||
# Set attributes on both token and lexeme to take care of token
|
||||
# attribute vs. lexical attribute without having to enumerate
|
||||
|
@ -431,8 +429,6 @@ def set_token_attrs(Token py_token, attrs):
|
|||
if attr_name == "_": # Set extension attributes
|
||||
for ext_attr_key, ext_attr_value in attr_value.items():
|
||||
py_token._.set(ext_attr_key, ext_attr_value)
|
||||
elif attr_name == TAG:
|
||||
doc.vocab.morphology.assign_tag(token, attr_value)
|
||||
else:
|
||||
# Set attributes on both token and lexeme to take care of token
|
||||
# attribute vs. lexical attribute without having to enumerate
|
||||
|
|
|
@ -832,13 +832,6 @@ cdef class Doc:
|
|||
rel_head_index=abs_head_index-i
|
||||
)
|
||||
)
|
||||
# Do TAG first. This lets subsequent loop override stuff like POS, LEMMA
|
||||
if TAG in attrs:
|
||||
col = attrs.index(TAG)
|
||||
for i in range(length):
|
||||
value = values[col * stride + i]
|
||||
if value != 0:
|
||||
self.vocab.morphology.assign_tag(&tokens[i], value)
|
||||
# Verify ENT_IOB are proper integers
|
||||
if ENT_IOB in attrs:
|
||||
iob_strings = Token.iob_strings()
|
||||
|
@ -857,7 +850,6 @@ cdef class Doc:
|
|||
for i in range(length):
|
||||
token = &self.c[i]
|
||||
for j in range(n_attrs):
|
||||
if attr_ids[j] != TAG:
|
||||
value = values[j * stride + i]
|
||||
if attr_ids[j] == MORPH:
|
||||
# add morph to morphology table
|
||||
|
|
|
@ -332,10 +332,6 @@ cdef class Token:
|
|||
inflectional suffixes.
|
||||
"""
|
||||
def __get__(self):
|
||||
if self.c.lemma == 0:
|
||||
lemma_ = self.vocab.morphology.lemmatizer.lookup(self.orth_, orth=self.orth)
|
||||
return self.vocab.strings[lemma_]
|
||||
else:
|
||||
return self.c.lemma
|
||||
|
||||
def __set__(self, attr_t lemma):
|
||||
|
@ -355,7 +351,7 @@ cdef class Token:
|
|||
return self.c.tag
|
||||
|
||||
def __set__(self, attr_t tag):
|
||||
self.vocab.morphology.assign_tag(self.c, tag)
|
||||
self.c.tag = tag
|
||||
|
||||
property dep:
|
||||
"""RETURNS (uint64): ID of syntactic dependency label."""
|
||||
|
@ -888,9 +884,6 @@ cdef class Token:
|
|||
with no inflectional suffixes.
|
||||
"""
|
||||
def __get__(self):
|
||||
if self.c.lemma == 0:
|
||||
return self.vocab.morphology.lemmatizer.lookup(self.orth_, orth=self.orth)
|
||||
else:
|
||||
return self.vocab.strings[self.c.lemma]
|
||||
|
||||
def __set__(self, unicode lemma_):
|
||||
|
|
|
@ -9,11 +9,10 @@ from .lexeme cimport EMPTY_LEXEME, OOV_RANK
|
|||
from .lexeme cimport Lexeme
|
||||
from .typedefs cimport attr_t
|
||||
from .tokens.token cimport Token
|
||||
from .attrs cimport LANG, ORTH, TAG, POS
|
||||
from .attrs cimport LANG, ORTH
|
||||
|
||||
from .compat import copy_reg
|
||||
from .errors import Errors
|
||||
from .lemmatizer import Lemmatizer
|
||||
from .attrs import intify_attrs, NORM, IS_STOP
|
||||
from .vectors import Vectors
|
||||
from .util import registry
|
||||
|
@ -23,7 +22,7 @@ from .lang.norm_exceptions import BASE_NORMS
|
|||
from .lang.lex_attrs import LEX_ATTRS, is_stop, get_lang
|
||||
|
||||
|
||||
def create_vocab(lang, defaults, lemmatizer=None, vectors_name=None, load_data=True):
|
||||
def create_vocab(lang, defaults, vectors_name=None, load_data=True):
|
||||
# If the spacy-lookups-data package is installed, we pre-populate the lookups
|
||||
# with lexeme data, if available
|
||||
if load_data:
|
||||
|
@ -43,7 +42,6 @@ def create_vocab(lang, defaults, lemmatizer=None, vectors_name=None, load_data=T
|
|||
)
|
||||
return Vocab(
|
||||
lex_attr_getters=lex_attrs,
|
||||
lemmatizer=lemmatizer,
|
||||
lookups=lookups,
|
||||
writing_system=defaults.writing_system,
|
||||
get_noun_chunks=defaults.syntax_iterators.get("noun_chunks"),
|
||||
|
@ -58,17 +56,13 @@ cdef class Vocab:
|
|||
|
||||
DOCS: https://spacy.io/api/vocab
|
||||
"""
|
||||
def __init__(self, lex_attr_getters=None, lemmatizer=None,
|
||||
strings=tuple(), lookups=None, tag_map={},
|
||||
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
|
||||
oov_prob=-20., vectors_name=None, writing_system={},
|
||||
get_noun_chunks=None, **deprecated_kwargs):
|
||||
"""Create the vocabulary.
|
||||
|
||||
lex_attr_getters (dict): A dictionary mapping attribute IDs to
|
||||
functions to compute them. Defaults to `None`.
|
||||
tag_map (dict): Dictionary mapping fine-grained tags to coarse-grained
|
||||
parts-of-speech, and optionally morphological attributes.
|
||||
lemmatizer (object): A lemmatizer. Defaults to `None`.
|
||||
strings (StringStore): StringStore that maps strings to integers, and
|
||||
vice versa.
|
||||
lookups (Lookups): Container for large lookup tables and dictionaries.
|
||||
|
@ -78,8 +72,6 @@ cdef class Vocab:
|
|||
lex_attr_getters = lex_attr_getters if lex_attr_getters is not None else {}
|
||||
if lookups in (None, True, False):
|
||||
lookups = Lookups()
|
||||
if lemmatizer in (None, True, False):
|
||||
lemmatizer = Lemmatizer(lookups)
|
||||
self.cfg = {'oov_prob': oov_prob}
|
||||
self.mem = Pool()
|
||||
self._by_orth = PreshMap()
|
||||
|
@ -89,7 +81,7 @@ cdef class Vocab:
|
|||
for string in strings:
|
||||
_ = self[string]
|
||||
self.lex_attr_getters = lex_attr_getters
|
||||
self.morphology = Morphology(self.strings, tag_map, lemmatizer)
|
||||
self.morphology = Morphology(self.strings)
|
||||
self.vectors = Vectors(name=vectors_name)
|
||||
self.lookups = lookups
|
||||
self.writing_system = writing_system
|
||||
|
@ -268,12 +260,6 @@ cdef class Vocab:
|
|||
# Set the special tokens up to have arbitrary attributes
|
||||
lex = <LexemeC*>self.get_by_orth(self.mem, props[ORTH])
|
||||
token.lex = lex
|
||||
if TAG in props:
|
||||
self.morphology.assign_tag(token, props[TAG])
|
||||
elif POS in props:
|
||||
# Don't allow POS to be set without TAG -- this causes problems,
|
||||
# see #1773
|
||||
props.pop(POS)
|
||||
for attr_id, value in props.items():
|
||||
Token.set_struct_attr(token, attr_id, value)
|
||||
# NORM is the only one that overlaps between the two
|
||||
|
|
|
@ -25,8 +25,8 @@ how the component should be configured. You can override its settings via the
|
|||
>
|
||||
> ```python
|
||||
> config = {
|
||||
> "validation": True,
|
||||
> "pattern_dicts": None,
|
||||
> "validate": True,
|
||||
> }
|
||||
> nlp.add_pipe("attribute_ruler", config=config)
|
||||
> ```
|
||||
|
@ -34,7 +34,7 @@ how the component should be configured. You can override its settings via the
|
|||
| Setting | Type | Description | Default |
|
||||
| --------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ------- |
|
||||
| `pattern_dicts` | `Iterable[dict]` | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](#add) (`patterns`/`attrs`/`index`) to add as patterns. | `None` |
|
||||
| `validation` | bool | Whether patterns should be validated, passed to `Matcher` as `validate`. | `False` |
|
||||
| `validate` | bool | Whether patterns should be validated (passed to the `Matcher`). | `False` |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/attributeruler.py
|
||||
|
@ -65,8 +65,8 @@ pattern_dicts = \[
|
|||
| `vocab` | `Vocab` | The shared nlp object to pass the vocab to the matchers and process phrase patterns. |
|
||||
| `name` | str | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current entity ruler while creating phrase patterns with the nlp object. |
|
||||
| _keyword-only_ | | |
|
||||
| `pattern_dicts` | `Iterable[Dict]]` | Optional patterns to load in on initialization. |
|
||||
| `validate` | bool | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. |
|
||||
| `pattern_dicts` | `Iterable[Dict]]` | Optional patterns to load in on initialization. Defaults to `None`. |
|
||||
| `validate` | bool | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. |
|
||||
|
||||
## AttributeRuler.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
|
|
@ -446,7 +446,8 @@ Debug a Thinc [`Model`](https://thinc.ai/docs/api-model) by running it on a
|
|||
sample text and checking how it updates its internal weights and parameters.
|
||||
|
||||
```bash
|
||||
$ python -m spacy debug model [config_path] [component] [--layers] [-DIM] [-PAR] [-GRAD] [-ATTR] [-P0] [-P1] [-P2] [P3] [--gpu_id]
|
||||
$ python -m spacy debug model [config_path] [component] [--layers] [-DIM]
|
||||
[-PAR] [-GRAD] [-ATTR] [-P0] [-P1] [-P2] [P3] [--gpu-id]
|
||||
```
|
||||
|
||||
<Accordion title="Example outputs" spaced>
|
||||
|
@ -641,18 +642,19 @@ $ python -m spacy pretrain [texts_loc] [output_dir] [config_path]
|
|||
|
||||
## Evaluate {#evaluate new="2"}
|
||||
|
||||
<!-- TODO: document new evaluate command -->
|
||||
|
||||
Evaluate a model's accuracy and speed on JSON-formatted annotated data. Will
|
||||
print the results and optionally export
|
||||
[displaCy visualizations](/usage/visualizers) of a sample set of parses to
|
||||
`.html` files. Visualizations for the dependency parse and NER will be exported
|
||||
as separate files if the respective component is present in the model's
|
||||
pipeline.
|
||||
Evaluate a model. Expects a loadable spaCy model and evaluation data in the
|
||||
[binary `.spacy` format](/api/data-formats#binary-training). The
|
||||
`--gold-preproc` option sets up the evaluation examples with gold-standard
|
||||
sentences and tokens for the predictions. Gold preprocessing helps the
|
||||
annotations align to the tokenization, and may result in sequences of more
|
||||
consistent length. However, it may reduce runtime accuracy due to train/test
|
||||
skew. To render a sample of dependency parses in a HTML file using the
|
||||
[displaCy visualizations](/usage/visualizers), set as output directory as the
|
||||
`--displacy-path` argument.
|
||||
|
||||
```bash
|
||||
$ python -m spacy evaluate [model] [data_path] [--output] [--displacy-path]
|
||||
[--displacy-limit] [--gpu-id] [--gold-preproc]
|
||||
$ python -m spacy evaluate [model] [data_path] [--output] [--gold-preproc]
|
||||
[--gpu-id] [--displacy-path] [--displacy-limit]
|
||||
```
|
||||
|
||||
| Argument | Type | Description |
|
||||
|
@ -660,10 +662,10 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--displacy-path]
|
|||
| `model` | positional | Model to evaluate. Can be a package or a path to a model data directory. |
|
||||
| `data_path` | positional | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). |
|
||||
| `--output`, `-o` | option | Output JSON file for metrics. If not set, no metrics will be exported. |
|
||||
| `--gold-preproc`, `-G` | flag | Use gold preprocessing. |
|
||||
| `--gpu-id`, `-g` | option | GPU to use, if any. Defaults to `-1` for CPU. |
|
||||
| `--displacy-path`, `-dp` | option | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. |
|
||||
| `--displacy-limit`, `-dl` | option | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. |
|
||||
| `--gpu-id`, `-g` | option | GPU to use, if any. Defaults to `-1` for CPU. |
|
||||
| `--gold-preproc`, `-G` | flag | Use gold preprocessing. |
|
||||
| **CREATES** | `stdout`, JSON, HTML | Training results and optional metrics and visualizations. |
|
||||
|
||||
## Package {#package}
|
||||
|
|
|
@ -27,7 +27,7 @@ how the component should be configured. You can override its settings via the
|
|||
> ```python
|
||||
> config = {
|
||||
> "phrase_matcher_attr": None,
|
||||
> "validation": True,
|
||||
> "validate": True,
|
||||
> "overwrite_ents": False,
|
||||
> "ent_id_sep": "||",
|
||||
> }
|
||||
|
@ -37,7 +37,7 @@ how the component should be configured. You can override its settings via the
|
|||
| Setting | Type | Description | Default |
|
||||
| --------------------- | ---- | ------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
|
||||
| `phrase_matcher_attr` | str | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. | `None` |
|
||||
| `validation` | bool | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. | `False` |
|
||||
| `validate` | bool | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). | `False` |
|
||||
| `overwrite_ents` | bool | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. | `False` |
|
||||
| `ent_id_sep` | str | Separator used internally for entity IDs. | `"||"` |
|
||||
|
||||
|
|
|
@ -1,102 +1,263 @@
|
|||
---
|
||||
title: Lemmatizer
|
||||
teaser: Assign the base forms of words
|
||||
tag: class
|
||||
source: spacy/lemmatizer.py
|
||||
source: spacy/pipeline/lemmatizer.py
|
||||
new: 3
|
||||
teaser: 'Pipeline component for lemmatization'
|
||||
api_base_class: /api/pipe
|
||||
api_string_name: lemmatizer
|
||||
api_trainable: false
|
||||
---
|
||||
|
||||
<!-- TODO: rewrite once it's converted to pipe -->
|
||||
## Config and implementation
|
||||
|
||||
The `Lemmatizer` supports simple part-of-speech-sensitive suffix rules and
|
||||
lookup tables.
|
||||
The default config is defined by the pipeline component factory and describes
|
||||
how the component should be configured. You can override its settings via the
|
||||
`config` argument on [`nlp.add_pipe`](/api/language#add_pipe) or in your
|
||||
[`config.cfg` for training](/usage/training#config).
|
||||
|
||||
For examples of the lookups data formats used by the lookup and rule-based
|
||||
lemmatizers, see the
|
||||
[`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) repo.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> config = {"mode": "rule"}
|
||||
> nlp.add_pipe("lemmatizer", config=config)
|
||||
> ```
|
||||
|
||||
| Setting | Type | Description | Default |
|
||||
| ----------- | ------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- |
|
||||
| `mode` | str | The lemmatizer mode, e.g. "lookup" or "rule". | `"lookup"` |
|
||||
| `lookups` | [`Lookups`](/api/lookups) | The lookups object containing the tables such as `"lemma_rules"`, `"lemma_index"`, `"lemma_exc"` and `"lemma_lookup"`. If `None`, default tables are loaded from `spacy-lookups-data`. | `None` |
|
||||
| `overwrite` | bool | Whether to overwrite existing lemmas. | `False` |
|
||||
| `model` | [`Model`](https://thinc.ai/docs/api-model) | **Not yet implemented:** the model to use. | `None` |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/lemmatizer.py
|
||||
```
|
||||
|
||||
## Lemmatizer.\_\_init\_\_ {#init tag="method"}
|
||||
|
||||
Initialize a `Lemmatizer`. Typically, this happens under the hood within spaCy
|
||||
when a `Language` subclass and its `Vocab` is initialized.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.lemmatizer import Lemmatizer
|
||||
> from spacy.lookups import Lookups
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("lemma_rules", {"noun": [["s", ""]]})
|
||||
> lemmatizer = Lemmatizer(lookups)
|
||||
> ```
|
||||
> # Construction via add_pipe with default model
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer")
|
||||
>
|
||||
> For examples of the data format, see the
|
||||
> [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) repo.
|
||||
> # Construction via add_pipe with custom settings
|
||||
> config = {"mode": "rule", overwrite=True}
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer", config=config)
|
||||
> ```
|
||||
|
||||
Create a new pipeline instance. In your application, you would normally use a
|
||||
shortcut for this and instantiate the component using its string name and
|
||||
[`nlp.add_pipe`](/api/language#add_pipe).
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `lookups` <Tag variant="new">2.2</Tag> | [`Lookups`](/api/lookups) | The lookups object containing the (optional) tables `"lemma_rules"`, `"lemma_index"`, `"lemma_exc"` and `"lemma_lookup"`. |
|
||||
| -------------- | ------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | [`Vocab`](/api/vocab) | The vocab. |
|
||||
| `model` | [`Model`](https://thinc.ai/docs/api-model) | A model (not yet implemented). |
|
||||
| `name` | str | String name of the component instance. Used to add entries to the `losses` during training. |
|
||||
| _keyword-only_ | | |
|
||||
| mode | str | The lemmatizer mode, e.g. "lookup" or "rule". Defaults to "lookup". |
|
||||
| lookups | [`Lookups`](/api/lookups) | A lookups object containing the tables such as "lemma_rules", "lemma_index", "lemma_exc" and "lemma_lookup". Defaults to `None`. |
|
||||
| overwrite | bool | Whether to overwrite existing lemmas. |
|
||||
|
||||
## Lemmatizer.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
Lemmatize a string.
|
||||
Apply the pipe to one document. The document is modified in place, and returned.
|
||||
This usually happens under the hood when the `nlp` object is called on a text
|
||||
and all pipeline components are applied to the `Doc` in order.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.lemmatizer import Lemmatizer
|
||||
> from spacy.lookups import Lookups
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("lemma_rules", {"noun": [["s", ""]]})
|
||||
> lemmatizer = Lemmatizer(lookups)
|
||||
> lemmas = lemmatizer("ducks", "NOUN")
|
||||
> assert lemmas == ["duck"]
|
||||
> doc = nlp("This is a sentence.")
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer")
|
||||
> # This usually happens under the hood
|
||||
> processed = lemmatizer(doc)
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------------ | ------------- | -------------------------------------------------------------------------------------------------------- |
|
||||
| `string` | str | The string to lemmatize, e.g. the token text. |
|
||||
| `univ_pos` | str / int | The token's universal part-of-speech tag. |
|
||||
| `morphology` | dict / `None` | Morphological features following the [Universal Dependencies](http://universaldependencies.org/) scheme. |
|
||||
| **RETURNS** | list | The available lemmas for the string. |
|
||||
| ----------- | ----- | ------------------------ |
|
||||
| `doc` | `Doc` | The document to process. |
|
||||
| **RETURNS** | `Doc` | The processed document. |
|
||||
|
||||
## Lemmatizer.lookup {#lookup tag="method" new="2"}
|
||||
## Lemmatizer.pipe {#pipe tag="method"}
|
||||
|
||||
Look up a lemma in the lookup table, if available. If no lemma is found, the
|
||||
Apply the pipe to a stream of documents. This usually happens under the hood
|
||||
when the `nlp` object is called on a text and all pipeline components are
|
||||
applied to the `Doc` in order.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer")
|
||||
> for doc in lemmatizer.pipe(docs, batch_size=50):
|
||||
> pass
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------- | --------------- | ------------------------------------------------------ |
|
||||
| `stream` | `Iterable[Doc]` | A stream of documents. |
|
||||
| _keyword-only_ | | |
|
||||
| `batch_size` | int | The number of texts to buffer. Defaults to `128`. |
|
||||
| **YIELDS** | `Doc` | Processed documents in the order of the original text. |
|
||||
|
||||
## Lemmatizer.lookup_lemmatize {#lookup_lemmatize tag="method"}
|
||||
|
||||
Lemmatize a token using a lookup-based approach. If no lemma is found, the
|
||||
original string is returned. Languages can provide a
|
||||
[lookup table](/usage/adding-languages#lemmatizer) via the `Lookups`.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lookups = Lookups()
|
||||
> lookups.add_table("lemma_lookup", {"going": "go"})
|
||||
> assert lemmatizer.lookup("going") == "go"
|
||||
> ```
|
||||
| Name | Type | Description |
|
||||
| ----------- | --------------------- | ------------------------------------- |
|
||||
| `token` | [`Token`](/api/token) | The token to lemmatize. |
|
||||
| **RETURNS** | `List[str]` | A list containing one or more lemmas. |
|
||||
|
||||
## Lemmatizer.rule_lemmatize {#rule_lemmatize tag="method"}
|
||||
|
||||
Lemmatize a token using a rule-based approach. Typically relies on POS tags.
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ---- | ----------------------------------------------------------------------------------------------------------- |
|
||||
| `string` | str | The string to look up. |
|
||||
| `orth` | int | Optional hash of the string to look up. If not set, the string will be used and hashed. Defaults to `None`. |
|
||||
| **RETURNS** | str | The lemma if the string was found, otherwise the original string. |
|
||||
| ----------- | --------------------- | ------------------------------------- |
|
||||
| `token` | [`Token`](/api/token) | The token to lemmatize. |
|
||||
| **RETURNS** | `List[str]` | A list containing one or more lemmas. |
|
||||
|
||||
## Lemmatizer.is_base_form {#is_base_form tag="method"}
|
||||
|
||||
Check whether we're dealing with an uninflected paradigm, so we can avoid
|
||||
lemmatization entirely.
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | --------------------- | ------------------------------------------------------------------------------------------------------- |
|
||||
| `token` | [`Token`](/api/token) | The token to analyze. |
|
||||
| **RETURNS** | bool | Whether the token's attributes (e.g., part-of-speech tag, morphological features) describe a base form. |
|
||||
|
||||
## Lemmatizer.get_lookups_config {#get_lookups_config tag="classmethod"}
|
||||
|
||||
Returns the lookups configuration settings for a given mode for use in
|
||||
[`Lemmatizer.load_lookups`](#load_lookups).
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ---- | ------------------------------------------------- |
|
||||
| `mode` | str | The lemmatizer mode. |
|
||||
| **RETURNS** | dict | The lookups configuration settings for this mode. |
|
||||
|
||||
## Lemmatizer.load_lookups {#load_lookups tag="classmethod"}
|
||||
|
||||
Load and validate lookups tables. If the provided lookups is `None`, load the
|
||||
default lookups tables according to the language and mode settings. Confirm that
|
||||
all required tables for the language and mode are present.
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ------------------------- | ---------------------------------------------------------------------------- |
|
||||
| `lang` | str | The language. |
|
||||
| `mode` | str | The lemmatizer mode. |
|
||||
| `lookups` | [`Lookups`](/api/lookups) | The provided lookups, may be `None` if the default lookups should be loaded. |
|
||||
| **RETURNS** | [`Lookups`](/api/lookups) | The lookups object. |
|
||||
|
||||
## Lemmatizer.to_disk {#to_disk tag="method"}
|
||||
|
||||
Serialize the pipe to disk.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> pos = "verb"
|
||||
> morph = {"VerbForm": "inf"}
|
||||
> is_base_form = lemmatizer.is_base_form(pos, morph)
|
||||
> assert is_base_form == True
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer")
|
||||
> lemmatizer.to_disk("/path/to/lemmatizer")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------------ | --------- | --------------------------------------------------------------------------------------- |
|
||||
| `univ_pos` | str / int | The token's universal part-of-speech tag. |
|
||||
| `morphology` | dict | The token's morphological features. |
|
||||
| **RETURNS** | bool | Whether the token's part-of-speech tag and morphological features describe a base form. |
|
||||
| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
|
||||
| `path` | str / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
|
||||
| _keyword-only_ | | |
|
||||
| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
|
||||
|
||||
## Lemmatizer.from_disk {#from_disk tag="method"}
|
||||
|
||||
Load the pipe from disk. Modifies the object in place and returns it.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer")
|
||||
> lemmatizer.from_disk("/path/to/lemmatizer")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------- | --------------- | -------------------------------------------------------------------------- |
|
||||
| `path` | str / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
|
||||
| _keyword-only_ | | |
|
||||
| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
|
||||
| **RETURNS** | `Lemmatizer` | The modified `Lemmatizer` object. |
|
||||
|
||||
## Lemmatizer.to_bytes {#to_bytes tag="method"}
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer")
|
||||
> lemmatizer_bytes = lemmatizer.to_bytes()
|
||||
> ```
|
||||
|
||||
Serialize the pipe to a bytestring.
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------- | --------------- | ------------------------------------------------------------------------- |
|
||||
| _keyword-only_ | | |
|
||||
| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
|
||||
| **RETURNS** | bytes | The serialized form of the `Lemmatizer` object. |
|
||||
|
||||
## Lemmatizer.from_bytes {#from_bytes tag="method"}
|
||||
|
||||
Load the pipe from a bytestring. Modifies the object in place and returns it.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> lemmatizer_bytes = lemmatizer.to_bytes()
|
||||
> lemmatizer = nlp.add_pipe("lemmatizer")
|
||||
> lemmatizer.from_bytes(lemmatizer_bytes)
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------- | --------------- | ------------------------------------------------------------------------- |
|
||||
| `bytes_data` | bytes | The data to load from. |
|
||||
| _keyword-only_ | | |
|
||||
| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
|
||||
| **RETURNS** | `Lemmatizer` | The `Lemmatizer` object. |
|
||||
|
||||
## Lemmatizer.mode {#mode tag="property"}
|
||||
|
||||
The lemmatizer mode.
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ----- | -------------------- |
|
||||
| **RETURNS** | `str` | The lemmatizer mode. |
|
||||
|
||||
## Attributes {#attributes}
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------------------------------- | ------------------------- | --------------------------------------------------------------- |
|
||||
| `lookups` <Tag variant="new">2.2</Tag> | [`Lookups`](/api/lookups) | The lookups object containing the rules and data, if available. |
|
||||
| --------- | --------------------------------- | ------------------- |
|
||||
| `vocab` | The shared [`Vocab`](/api/vocab). |
|
||||
| `lookups` | [`Lookups`](/api/lookups) | The lookups object. |
|
||||
|
||||
## Serialization fields {#serialization-fields}
|
||||
|
||||
During serialization, spaCy will export several data fields used to restore
|
||||
different aspects of the object. If needed, you can exclude them from
|
||||
serialization by passing in the string names via the `exclude` argument.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> data = lemmatizer.to_disk("/path", exclude=["vocab"])
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| --------- | ---------------------------------------------------- |
|
||||
| `vocab` | The shared [`Vocab`](/api/vocab). |
|
||||
| `lookups` | The lookups. You usually don't want to exclude this. |
|
||||
|
|
|
@ -11,22 +11,19 @@ this class.
|
|||
|
||||
## Morphology.\_\_init\_\_ {#init tag="method"}
|
||||
|
||||
Create a Morphology object using the tag map, lemmatizer and exceptions.
|
||||
Create a Morphology object.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.morphology import Morphology
|
||||
>
|
||||
> morphology = Morphology(strings, tag_map, lemmatizer)
|
||||
> morphology = Morphology(strings)
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------------ | ----------------- | ---------------------------------------------------------------------------------------------------------- |
|
||||
| --------- | ------------- | ----------------- |
|
||||
| `strings` | `StringStore` | The string store. |
|
||||
| `tag_map` | `Dict[str, Dict]` | The tag map. |
|
||||
| `lemmatizer` | `Lemmatizer` | The lemmatizer. |
|
||||
| `exc` | `Dict[str, Dict]` | A dictionary of exceptions in the format `{tag: {orth: {"POS": "X", "Feat1": "Val1, "Feat2": "Val2", ...}` |
|
||||
|
||||
## Morphology.add {#add tag="method"}
|
||||
|
||||
|
@ -62,52 +59,6 @@ Get the FEATS string for the hash of the morphological analysis.
|
|||
| ------- | ---- | --------------------------------------- |
|
||||
| `morph` | int | The hash of the morphological analysis. |
|
||||
|
||||
## Morphology.load_tag_map {#load_tag_map tag="method"}
|
||||
|
||||
Replace the current tag map with the provided tag map.
|
||||
|
||||
| Name | Type | Description |
|
||||
| --------- | ----------------- | ------------ |
|
||||
| `tag_map` | `Dict[str, Dict]` | The tag map. |
|
||||
|
||||
## Morphology.load_morph_exceptions {#load_morph_exceptions tag="method"}
|
||||
|
||||
Replace the current morphological exceptions with the provided exceptions.
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------------- | ----------------- | ----------------------------- |
|
||||
| `morph_rules` | `Dict[str, Dict]` | The morphological exceptions. |
|
||||
|
||||
## Morphology.add_special_case {#add_special_case tag="method"}
|
||||
|
||||
Add a special-case rule to the morphological analyzer. Tokens whose tag and orth
|
||||
match the rule will receive the specified properties.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> attrs = {"POS": "DET", "Definite": "Def"}
|
||||
> morphology.add_special_case("DT", "the", attrs)
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ---------- | ---- | ---------------------------------------------- |
|
||||
| `tag_str` | str | The fine-grained tag. |
|
||||
| `orth_str` | str | The token text. |
|
||||
| `attrs` | dict | The features to assign for this token and tag. |
|
||||
|
||||
## Morphology.exc {#exc tag="property"}
|
||||
|
||||
The current morphological exceptions.
|
||||
|
||||
| Name | Type | Description |
|
||||
| ---------- | ---- | --------------------------------------------------- |
|
||||
| **YIELDS** | dict | The current dictionary of morphological exceptions. |
|
||||
|
||||
## Morphology.lemmatize {#lemmatize tag="method"}
|
||||
|
||||
TODO
|
||||
|
||||
## Morphology.feats_to_dict {#feats_to_dict tag="staticmethod"}
|
||||
|
||||
Convert a string FEATS representation to a dictionary of features and values in
|
||||
|
|
|
@ -47,7 +47,7 @@ https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tagger.pyx
|
|||
>
|
||||
> # Construction via create_pipe with custom model
|
||||
> config = {"model": {"@architectures": "my_tagger"}}
|
||||
> parser = nlp.add_pipe("tagger", config=config)
|
||||
> tagger = nlp.add_pipe("tagger", config=config)
|
||||
>
|
||||
> # Construction from class
|
||||
> from spacy.pipeline import Tagger
|
||||
|
@ -285,15 +285,13 @@ Add a new label to the pipe.
|
|||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.symbols import POS
|
||||
> tagger = nlp.add_pipe("tagger")
|
||||
> tagger.add_label("MY_LABEL", {POS: "NOUN"})
|
||||
> tagger.add_label("MY_LABEL")
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ---------------- | --------------------------------------------------------------- |
|
||||
| ----------- | ---- | --------------------------------------------------- |
|
||||
| `label` | str | The label to add. |
|
||||
| `values` | `Dict[int, str]` | Optional values to map to the label, e.g. a tag map dictionary. |
|
||||
| **RETURNS** | int | `0` if the label is already present, otherwise `1`. |
|
||||
|
||||
## Tagger.to_disk {#to_disk tag="method"}
|
||||
|
@ -369,9 +367,7 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
|
|||
|
||||
## Tagger.labels {#labels tag="property"}
|
||||
|
||||
The labels currently added to the component. Note that even for a blank
|
||||
component, this will always include the built-in coarse-grained part-of-speech
|
||||
tags by default, e.g. `VERB`, `NOUN` and so on.
|
||||
The labels currently added to the component.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
@ -397,8 +393,7 @@ serialization by passing in the string names via the `exclude` argument.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| --------- | ------------------------------------------------------------------------------------------ |
|
||||
| ------- | -------------------------------------------------------------- |
|
||||
| `vocab` | The shared [`Vocab`](/api/vocab). |
|
||||
| `cfg` | The config file. You usually don't want to exclude this. |
|
||||
| `model` | The binary model data. You usually don't want to exclude this. |
|
||||
| `tag_map` | The [tag map](/usage/adding-languages#tag-map) mapping fine-grained to coarse-grained tag. |
|
||||
|
|
|
@ -24,8 +24,6 @@ Create the vocabulary.
|
|||
| Name | Type | Description |
|
||||
| -------------------------------------------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `lex_attr_getters` | dict | A dictionary mapping attribute IDs to functions to compute them. Defaults to `None`. |
|
||||
| `tag_map` | dict | A dictionary mapping fine-grained tags to coarse-grained parts-of-speech, and optionally morphological attributes. |
|
||||
| `lemmatizer` | object | A lemmatizer. Defaults to `None`. |
|
||||
| `strings` | `StringStore` / list | A [`StringStore`](/api/stringstore) that maps strings to hash values, and vice versa, or a list of strings. |
|
||||
| `lookups` | `Lookups` | A [`Lookups`](/api/lookups) that stores the `lemma_\*`, `lexeme_norm` and other large lookup tables. Defaults to `None`. |
|
||||
| `lookups_extra` <Tag variant="new">2.3</Tag> | `Lookups` | A [`Lookups`](/api/lookups) that stores the optional `lexeme_cluster`/`lexeme_prob`/`lexeme_sentiment`/`lexeme_settings` lookup tables. Defaults to `None`. |
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
<svg class="o-svg" xmlns="http://www.w3.org/2000/svg" width="931" height="456" viewBox="-1 -1 932 480" preserveAspectRatio="xMinYMin meet">
|
||||
<style>
|
||||
.svg__langdata__text-large, .svg__langdata__text-small, .svg__langdata__text-tiny {
|
||||
font-family: Arial, sans-serif;
|
||||
fill: #1a1e23
|
||||
}
|
||||
.svg__langdata__text-large { font-size: 20px }
|
||||
.svg__langdata__text-small, .svg__langdata__text-tiny { font-weight: bold; font-size: 15px; }
|
||||
</style>
|
||||
<path fill="none" stroke="#b85450" stroke-width="3" stroke-miterlimit="10" d="M610 404h-69.8" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#b85450" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M534.2 404l8-4-2 4 2 4z"/>
|
||||
<path fill="#f8cecc" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M642.7 361.3H708l33 43.6-33 43.5H643L610 405z"/>
|
||||
<text class="svg__langdata__text-large" transform="translate(630 410)" width="80" height="22">Tokenizer</text>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M780 303H621v-56.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M621 240.2l4 8-4-2-4 2z"/>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M855 253v-20.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M855 226.2l4 8-4-2-4 2z"/>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M780 303h-45v-56.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M735 240.2l4 8-4-2-4 2z"/>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M780 303H504v-56.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M504 240.2l4 8-4-2-4 2z"/>
|
||||
<ellipse cx="855" cy="303" fill="#f5f5f5" stroke="#666" stroke-width="2" rx="74.8" ry="49.8"/>
|
||||
<text class="svg__langdata__text-large" transform="translate(815 308)" width="119" height="46">Base data</text>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389 100l.4 39.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389.5 145.8l-4-8 4 2 4-2z"/>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389 100v17h232v22.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M621 145.8l-4-8 4 2 4-2z"/>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389 100v17H280v22.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M280 145.8l-4-8 4 2 4-2z"/>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389 100v17h115v22.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M504 145.8l-4-8 4 2 4-2z"/>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389 100v17h346v22.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M735 145.8l-4-8 4 2 4-2z"/>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389 100v17H163v22.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M163 145.8l-4-8 4 2 4-2z"/>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M389 100v17H46v22.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M46 145.8l-4-8 4 2 4-2z"/>
|
||||
<ellipse cx="389" cy="50" fill="#dae8fc" stroke="#09a3d5" stroke-width="2" rx="74.8" ry="49.8"/>
|
||||
<text class="svg__langdata__text-large" transform="translate(346.5 42)" width="81" height="46">Language <tspan dy="1.45em" dx="-3.7em">data</tspan></text>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M435 193h15.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M456.8 193l-8 4 2-4-2-4z"/>
|
||||
<ellipse cx="390" cy="193" fill="#dae8fc" stroke="#09a3d5" stroke-width="2" rx="45" ry="45"/>
|
||||
<text class="svg__langdata__text-small" transform="translate(368 187.5)" width="39" height="30">stop <tspan dx="-2.8em" dy="1.25em">words</tspan></text>
|
||||
<path fill="none" stroke="#9673a6" stroke-width="3" stroke-miterlimit="10" d="M472 225l-1.5 133.8" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#9673a6" stroke="#9673a6" stroke-width="2" stroke-miterlimit="10" d="M470.4 364.8l-4-8 4 2 4-2z"/>
|
||||
<ellipse cx="504" cy="193" fill="#f5f5f5" stroke="#09a3d5" stroke-width="2" rx="45" ry="45"/>
|
||||
<text class="svg__langdata__text-small" transform="translate(473 187.5)" width="85" height="30">lexical <tspan dx="-4em" dy="1.25em">attributes</tspan></text>
|
||||
<path fill="none" stroke="#b85450" stroke-width="3" stroke-miterlimit="10" d="M653 225l5.6 127.8" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#b85450" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M659 358.8l-4.5-8 4 2 4-2.2z"/>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M576 193h-18.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M551.2 193l8-4-2 4 2 4z"/>
|
||||
<ellipse cx="621" cy="193" fill="#dae8fc" stroke="#09a3d5" stroke-width="2" rx="45" ry="45"/>
|
||||
<text class="svg__langdata__text-small" transform="translate(582 187.5)" width="85" height="30">tokenizer <tspan dx="-5.2em" dy="1.25em">exceptions</tspan></text>
|
||||
<path fill="none" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M690 193h-15.8"/>
|
||||
<path fill="#09a3d5" stroke="#09a3d5" stroke-width="2" stroke-miterlimit="10" d="M668.2 193l8-4-2 4 2 4z"/>
|
||||
<path fill="none" stroke="#b85450" stroke-width="3" stroke-miterlimit="10" d="M703 225l-10.3 127.8" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#b85450" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M692.2 358.8l-3.4-8.3 4 2.3 4-1.7z"/>
|
||||
<ellipse cx="735" cy="193" fill="#f5f5f5" stroke="#09a3d5" stroke-width="2" rx="45" ry="45"/>
|
||||
<text class="svg__langdata__text-small" transform="translate(705 182)" width="53" height="46">prefixes, <tspan dy="1.25em" dx="-4.4em">suffixes,</tspan> <tspan dy="1.25em" dx="-4em">infixes</tspan>
|
||||
</text>
|
||||
<path fill="none" stroke="#d79b00" stroke-width="3" stroke-miterlimit="10" d="M280 238v114.8" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#d79b00" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M280 358.8l-4-8 4 2 4-2z"/>
|
||||
<ellipse cx="280" cy="193" fill="#dae8fc" stroke="#09a3d5" stroke-width="2" rx="45" ry="45"/>
|
||||
<text class="svg__langdata__text-small" transform="translate(254 187.5)" width="71" height="30">lemma <tspan dy="1.25em" dx="-3em">data</tspan></text>
|
||||
<path fill="none" stroke="#d79b00" stroke-width="3" stroke-miterlimit="10" d="M346 404h53.8" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#d79b00" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M405.8 404l-8 4 2-4-2-4z"/>
|
||||
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M247.7 361.3H313l33 43.6-33 43.5h-65.3L215 405z"/>
|
||||
<text class="svg__langdata__text-large" transform="translate(228 410)" width="100" height="22">Lemmatizer</text>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M823 193h-34.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M782.2 193l8-4-2 4 2 4z"/>
|
||||
<ellipse cx="855" cy="193" fill="#f5f5f5" stroke="#666" stroke-width="2" rx="31.5" ry="31.5"/>
|
||||
<text class="svg__langdata__text-tiny" transform="translate(829 189)" width="50" height="30">char <tspan dy="1.1em" dx="-3.1em">classes</tspan></text>
|
||||
<path fill="#e1d5e7" stroke="#9673a6" stroke-width="2" d="M408 367h124v74H408z"/>
|
||||
<text class="svg__langdata__text-large" transform="translate(443.5 410)" width="51" height="22">Token</text>
|
||||
<path fill="none" stroke="#666" stroke-width="3" stroke-miterlimit="10" d="M131 225l-21 122.2" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M109 353l-2.5-8.5 3.6 2.7 4.4-1.3z"/>
|
||||
<ellipse cx="163" cy="193" fill="#dae8fc" stroke="#09a3d5" stroke-width="2" rx="45" ry="45"/>
|
||||
<text class="svg__langdata__text-small" transform="translate(139 187.5)" width="45" height="30">morph <tspan dy="1.25em" dx="-3.2em">rules</tspan></text>
|
||||
<path fill="none" stroke="#666" stroke-width="3" stroke-miterlimit="10" d="M78 225l15.4 122" stroke-dasharray="1 6" stroke-linecap="round"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M94.2 353l-5-7.5 4.2 1.5 3.7-2.5z"/>
|
||||
<ellipse cx="46" cy="193" fill="#dae8fc" stroke="#09a3d5" stroke-width="2" rx="45" ry="45"/>
|
||||
<text class="svg__langdata__text-small" transform="translate(33 187.5)" width="27" height="30">tag <tspan dy="1.25em" dx="-2em">map</tspan></text>
|
||||
<ellipse cx="101" cy="405" fill="#f5f5f5" stroke="#666" stroke-width="2" rx="74.5" ry="49.5"/>
|
||||
<text class="svg__langdata__text-large" transform="translate(49.5 410)" width="100" height="22">Morphology</text>
|
||||
</svg>
|
Before Width: | Height: | Size: 9.1 KiB |
|
@ -1,123 +1,305 @@
|
|||
<svg class="o-svg" xmlns="http://www.w3.org/2000/svg" width="600" height="380" viewBox="-20 -10 550 400">
|
||||
<style>
|
||||
.svg__tokenization__text { fill: #1a1e23; font: 18px Arial, sans-serif }
|
||||
.svg__tokenization__text-small { fill: #fff; font: 600 13px Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace }
|
||||
</style>
|
||||
<path fill="none" stroke="#c00" stroke-width="2" stroke-miterlimit="10" d="M71 39v12H16v11M71 39v12h20v11"/>
|
||||
<path fill="#f8cecc" stroke="#c00" stroke-width="2" d="M1 1h140v38.2H1z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" width="43" height="19" transform="translate(48.5 9.5)">“Let’s</text>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M175 39v23"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M150 1h50v38.2h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(164.5 9.5)" width="19" height="19">go</text>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M235 39v23"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M210 1h50v38.2h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(226.5 9.5)" width="15" height="19">to</text>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M341 39v23"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M270 1h141v38.2H270z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(320.5 9.5)" width="38" height="19">N.Y.!”</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M16 100v20"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M1 62h30v38.2H1z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(11.5 70.5)" width="7" height="19">“</text>
|
||||
<path fill="none" stroke="#c00" stroke-width="2" stroke-miterlimit="10" d="M91 100v11H66v9M91 100v11h29v9"/>
|
||||
<path fill="#f8cecc" stroke="#c00" stroke-width="2" d="M41 62h100v38.2H41z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(72.5 70.5)" width="35" height="19">Let’s</text>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M175 100v20"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M150 62h50v38.2h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(164.5 70.5)" width="19" height="19">go</text>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M235 100v20"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M210 62h50v38.2h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(226.5 70.5)" width="15" height="19">to</text>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M341 100v20"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M270 62h141v38.2H270z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(320.5 70.5)" width="38" height="19">N.Y.!”</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M1 120h30v38H1z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(11.5 128.5)" width="7" height="19">“</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M66 158v24"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M41 120h50v38H41z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(53.5 128.5)" width="23" height="19">Let</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M175 158v24"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M150 120h50v38h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(164.5 128.5)" width="19" height="19">go</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M235 158v24"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M210 120h50v38h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(226.5 128.5)" width="15" height="19">to</text>
|
||||
<path fill="none" stroke="#c00" stroke-width="2" stroke-miterlimit="10" d="M341 158v13h-20v11M341 158v13h55v11"/>
|
||||
<path fill="#f8cecc" stroke="#c00" stroke-width="2" d="M270 120h141v38H270z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(320.5 128.5)" width="38" height="19">N.Y.!”</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M120 158v24"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M100 120h40v38h-40z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(113.5 128.5)" width="11" height="19">’s</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M16 220v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M1 181.8h30V220H1z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(11.5 190.5)" width="7" height="19">“</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M66 220v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M41 181.8h50V220H41z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(53.5 190.5)" width="23" height="19">Let</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M175 220v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M150 181.8h50V220h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(164.5 190.5)" width="19" height="19">go</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M235 220v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M210 181.8h50V220h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(226.5 190.5)" width="15" height="19">to</text>
|
||||
<path fill="none" stroke="#c00" stroke-width="2" stroke-miterlimit="10" d="M321 220v11h-20v12M321 220v11h34v12"/>
|
||||
<path fill="#f8cecc" stroke="#c00" stroke-width="2" d="M270 181.8h101V220H270z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(304.5 190.5)" width="30" height="19">N.Y.!</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M120 220v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M100 181.8h40V220h-40z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(113.5 190.5)" width="11" height="19">’s</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M396 220v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M381 181.8h30V220h-30z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(391.5 190.5)" width="7" height="19">”</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M16 281v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M1 242.7h30V281H1z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(11.5 251.5)" width="7" height="19">“</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M66 281v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M41 242.7h50V281H41z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(53.5 251.5)" width="23" height="19">Let</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M175 281v20-17 20"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M150 242.7h50V281h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(164.5 251.5)" width="19" height="19">go</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M235 281v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M210 242.7h50V281h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(226.5 251.5)" width="15" height="19">to</text>
|
||||
<path fill="none" stroke="#c00" stroke-width="2" stroke-miterlimit="10" d="M301 281v23"/>
|
||||
<path fill="#f8cecc" stroke="#b85450" stroke-width="2" d="M270 242.7h61V281h-61z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(286.5 251.5)" width="26" height="19">N.Y.</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M120 281v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M100 242.7h40V281h-40z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(113.5 251.5)" width="11" height="19">’s</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M396 281v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M381 242.7h30V281h-30z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(391.5 251.5)" width="7" height="19">”</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M355 281v23"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M340 242.7h30V281h-30z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(351.5 251.5)" width="5" height="19">!</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M1 304h30v38H1z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(11.5 312.5)" width="7" height="19">“</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M41 304h50v38H41z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(53.5 312.5)" width="23" height="19">Let</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M150 304h50v38h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(164.5 312.5)" width="19" height="19">go</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M210 304h50v38h-50z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(226.5 312.5)" width="15" height="19">to</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M270 304h61v38h-61z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(286.5 312.5)" width="26" height="19">N.Y.</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M100 304h40v38h-40z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(113.5 312.5)" width="11" height="19">’s</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M381 304h30v38h-30z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(391.5 312.5)" width="7" height="19">”</text>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" d="M340 304h30v38h-30z"/>
|
||||
<text class="svg__tokenization__text" dy="1em" transform="translate(351.5 312.5)" width="5" height="19">!</text>
|
||||
<rect width="104" height="19" x="437" y="72" fill="#c00" stroke="#c00" stroke-width="2" rx="2.9" ry="2.9"/>
|
||||
<text class="svg__tokenization__text-small" dy="0.9em" transform="translate(455.5 74.5)" width="65" height="12">EXCEPTION</text>
|
||||
<rect width="104" height="19" x="437" y="11" fill="#c00" stroke="#c00" stroke-width="2" rx="2.9" ry="2.9"/>
|
||||
<text class="svg__tokenization__text-small" dy="0.9em" transform="translate(466.5 13.5)" width="43" height="12">PREFIX</text>
|
||||
<rect width="104" height="19" x="437" y="130" fill="#c00" stroke="#c00" stroke-width="2" rx="2.9" ry="2.9"/>
|
||||
<text class="svg__tokenization__text-small" dy="0.9em" transform="translate(466.5 132.5)" width="43" height="12">SUFFIX</text>
|
||||
<rect width="104" height="19" x="437" y="191" fill="#c00" stroke="#c00" stroke-width="2" rx="2.9" ry="2.9"/>
|
||||
<text class="svg__tokenization__text-small" dy="0.9em" transform="translate(466.5 193.5)" width="43" height="12">SUFFIX</text>
|
||||
<rect width="104" height="19" x="437" y="252" fill="#c00" stroke="#c00" stroke-width="2" rx="2.9" ry="2.9"/>
|
||||
<text class="svg__tokenization__text-small" dy="0.9em" transform="translate(455.5 254.5)" width="65" height="12">EXCEPTION</text>
|
||||
<rect width="104" height="19" x="437" y="313" fill="#82b366" stroke="#82b366" stroke-width="2" rx="2.9" ry="2.9"/>
|
||||
<text class="svg__tokenization__text-small" dy="0.9em" transform="translate(473.5 315.5)" width="29" height="12">DONE</text>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="598" height="386" viewBox="0 0 598 386">
|
||||
<defs>
|
||||
<path id="a" d="M51.3 10.9a4.3 4.3 0 01-.6-2.2c0-.6.2-1.2.5-1.9a6 6 0 011.4-1.6l.6.4.1.2-.1.3a7.5 7.5 0 00-.6 1l-.2.5a2.5 2.5 0 000 1.4l.3.8.1.2c0 .2 0 .3-.3.4l-1.2.5zm3.4 0a4.3 4.3 0 01-.7-2.2c0-.6.2-1.2.5-1.9A6 6 0 0156 5.2l.6.4h.1v.5a7.5 7.5 0 00-.7 1l-.2.5a2.5 2.5 0 000 1.4l.4.8v.2c0 .2 0 .3-.2.4l-1.2.5zm7.4 9.3H69V22h-9V6.2h2.2v14zM75 10.7a5 5 0 011.9.3 4.1 4.1 0 012.4 2.5c.2.7.3 1.4.3 2.2v.6l-.4.1h-7.5c0 .7.1 1.4.3 1.9.2.5.4 1 .7 1.3l1.1.8 1.5.2c.5 0 .9 0 1.2-.2a6 6 0 001.6-.7l.5-.2.3.2.6.7-.9.8-1 .5a6.9 6.9 0 01-4.6 0c-.7-.2-1.2-.6-1.7-1-.5-.6-.8-1.2-1.1-2a7.6 7.6 0 010-4.7 4.7 4.7 0 012.7-3c.6-.2 1.3-.3 2.1-.3zm0 1.4a3 3 0 00-2.2.8c-.5.6-.9 1.3-1 2.3h6l-.1-1.2c-.1-.4-.3-.7-.6-1-.2-.3-.5-.5-.9-.7a3 3 0 00-1.2-.2zm10.5 10c-.9 0-1.6-.2-2-.7-.5-.5-.7-1.2-.7-2v-6.9h-1.4l-.3-.1-.1-.3v-.8l1.8-.2.5-3.5.1-.3h1.3V11H88v1.4h-3.2v6.7c0 .5.1.8.4 1 .2.3.5.4.8.4l.6-.1a2.3 2.3 0 00.6-.4h.2l.3.1.6 1c-.3.3-.8.5-1.2.7l-1.5.3zm6.2-16.7a4.1 4.1 0 01.6 2.1 4 4 0 01-.5 2 6 6 0 01-1.3 1.6l-.6-.4-.2-.1v-.1l.1-.3a5.1 5.1 0 00.7-1l.2-.5a2.5 2.5 0 000-1.4l-.4-.8v-.2c0-.2 0-.3.2-.4l1.2-.5zm9.7 7.3c-.1.2-.3.2-.4.2h-.4a8.6 8.6 0 00-1.2-.6 3.4 3.4 0 00-2 0l-.6.3-.4.5-.2.7c0 .2.1.5.3.7l.6.5 1 .3a49.6 49.6 0 013 1.3l.7.8.2 1.2c0 .5 0 1-.3 1.4-.2.5-.4.8-.8 1.1a4 4 0 01-1.3.8c-.5.2-1.1.3-1.8.3a5.6 5.6 0 01-3.7-1.4l.4-.7.2-.2.4-.1.4.1.5.4.8.3 1 .2c.5 0 .8 0 1-.2.4 0 .6-.2.8-.4l.4-.6.2-.7c0-.3-.1-.5-.3-.7a2 2 0 00-.6-.6l-1-.3a68.4 68.4 0 01-2.1-.8l-1-.5-.6-.9c-.2-.3-.2-.7-.2-1.2a3 3 0 011-2.2c.3-.3.7-.6 1.2-.8a5 5 0 011.7-.2c.8 0 1.4.1 2 .3l1.5 1-.4.7z"/>
|
||||
<path id="b" d="M183.5 10.7l1.4.1 1.1.5h3v.7c0 .3 0 .4-.4.5l-1.3.2c.3.4.4 1 .4 1.6a3.4 3.4 0 01-1.2 2.6c-.3.3-.8.5-1.3.7a5.6 5.6 0 01-3.2 0l-.5.5-.2.5c0 .3.1.5.4.6.2.2.5.3.8.3l1.2.1a36.1 36.1 0 012.7.3c.5 0 .9.2 1.2.4.4.2.7.4.9.7a3 3 0 010 2.6c-.3.5-.6 1-1 1.3-.5.3-1 .6-1.7.8a8.4 8.4 0 01-4.3 0 5 5 0 01-1.6-.6c-.4-.2-.7-.6-.9-1-.2-.3-.3-.6-.3-1 0-.6.2-1 .5-1.4.4-.4.9-.7 1.5-1a2 2 0 01-.8-.5c-.2-.3-.3-.6-.3-1l.1-.5c0-.2.2-.4.3-.5a2.9 2.9 0 011-1c-.5-.2-1-.6-1.2-1.2-.3-.5-.5-1-.5-1.7a3.3 3.3 0 011.2-2.6 4 4 0 011.3-.8l1.7-.2zm3.5 12c0-.4 0-.6-.2-.8l-.7-.4-.9-.2a13.9 13.9 0 00-2.2-.1l-1.2-.1-1 .7a1.5 1.5 0 00-.2 1.7l.6.6c.3.1.6.3 1 .3l1.4.2c.6 0 1 0 1.4-.2.5 0 .8-.2 1.1-.4.3-.1.5-.3.7-.6l.2-.8zm-3.5-6.1l1-.2c.4-.1.6-.3.8-.5l.5-.7.2-.9c0-.7-.2-1.2-.7-1.6-.4-.4-1-.6-1.8-.6s-1.4.2-1.8.6c-.4.4-.6 1-.6 1.6 0 .3 0 .6.2 1a2 2 0 001.2 1c.3.2.6.3 1 .3zm12-6c.8 0 1.6.2 2.2.5a4.7 4.7 0 012.8 3c.2.7.3 1.5.3 2.3 0 .9 0 1.7-.3 2.4s-.6 1.3-1 1.8c-.6.5-1.1.9-1.8 1.2-.6.2-1.4.4-2.2.4-.8 0-1.5-.2-2.2-.4-.6-.3-1.2-.7-1.7-1.2-.4-.5-.8-1.1-1-1.8a7 7 0 01-.4-2.4c0-.8.1-1.6.4-2.3.2-.8.6-1.4 1-1.9.5-.5 1-.8 1.7-1.1.7-.3 1.4-.4 2.2-.4zm0 10c1.1 0 2-.3 2.5-1 .5-.8.8-1.8.8-3.2 0-1.3-.3-2.3-.8-3-.5-.8-1.4-1.2-2.5-1.2-.5 0-1 .1-1.4.3-.4.2-.8.5-1 .8l-.7 1.4-.2 1.7.2 1.8.6 1.3c.3.4.7.6 1 .8l1.5.3z"/>
|
||||
<path id="c" d="M250.4 22.2c-.8 0-1.5-.3-2-.8s-.7-1.2-.7-2v-6.9h-1.3l-.3-.1-.2-.3v-.8l1.9-.2.4-3.5c0-.1 0-.2.2-.3h1.3V11h3.2v1.4h-3.2v6.7c0 .5 0 .8.3 1 .2.3.5.4.9.4l.5-.1a2.3 2.3 0 00.7-.4h.2l.3.1.5 1a4.1 4.1 0 01-2.7 1zm9.4-11.5c.8 0 1.5.1 2.2.4a4.7 4.7 0 012.7 3 7.2 7.2 0 010 4.7c-.2.7-.6 1.3-1 1.8-.5.5-1 .9-1.7 1.2-.7.2-1.4.4-2.2.4-.8 0-1.6-.2-2.2-.4-.7-.3-1.2-.7-1.7-1.2s-.8-1.1-1-1.8a7 7 0 01-.4-2.4c0-.8 0-1.6.3-2.3a4.7 4.7 0 012.7-3c.7-.3 1.5-.4 2.3-.4zm0 10c1 0 2-.4 2.4-1.2.6-.7.9-1.7.9-3 0-1.4-.3-2.4-.9-3.2-.5-.7-1.3-1-2.4-1-.6 0-1 0-1.5.2l-1 .8c-.3.4-.5.8-.6 1.4l-.2 1.7c0 .7 0 1.3.2 1.8.1.5.3 1 .6 1.3.3.4.6.6 1 .8.4.2 1 .3 1.5.3z"/>
|
||||
<path id="d" d="M347.6 6.2l.5.1.3.3 9.1 11.9a7.5 7.5 0 010-1.1V6.2h1.8V22h-1a1 1 0 01-.5 0 1 1 0 01-.3-.4l-9.1-11.9a14.1 14.1 0 010 1V22h-1.9V6.2h1.1zm14.6 14.6a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1c.2 0 .4.2.5.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.4-1zm10-5V22h-2v-6.3l-5.9-9.5h2c.1 0 .3 0 .4.2l.3.3 3.6 6.2a7.6 7.6 0 01.6 1.4 13 13 0 01.6-1.4l3.6-6.2.3-.3.4-.2h2l-5.9 9.5zm5.2 5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1c.2 0 .3.2.4.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1zm8.4-14.6v6.3a27.8 27.8 0 01-.2 4h-1.4a66.4 66.4 0 01-.2-4V6.2h1.8zm-2.3 14.6a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1c.2 0 .3.2.4.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1zm8.1-15.4a4.1 4.1 0 01.6 2.1 4 4 0 01-.5 2 6 6 0 01-1.3 1.6l-.6-.4-.2-.1v-.1l.1-.3a5.1 5.1 0 00.7-1l.2-.5a2.5 2.5 0 000-1.4l-.4-.8v-.2c0-.2 0-.3.2-.4l1.2-.5zm3.4 0a4.1 4.1 0 01.6 2.1 4 4 0 01-.5 2 6 6 0 01-1.4 1.6l-.6-.4-.1-.1v-.1-.3a5.1 5.1 0 00.7-1l.2-.5a2.5 2.5 0 000-1.4l-.4-.8v-.2c0-.2 0-.3.3-.4l1.2-.5z"/>
|
||||
<path id="e" d="M13.5 77.8c-.5-.8-.8-1.6-.8-2.5 0-.7.2-1.4.6-2 .3-.7.9-1.4 1.6-2l.8.6.2.2V72.4l-.1.2a5.7 5.7 0 00-.6.8l-.2.6-.1.7.1.8c0 .3.2.5.4.9l.1.3c0 .2-.1.4-.4.5l-1.6.6zm3.6 0c-.5-.8-.7-1.6-.7-2.5 0-.7.2-1.4.5-2 .4-.7 1-1.4 1.6-2l.9.6.1.2v.5a5.7 5.7 0 00-.7.8l-.2.6v1.5l.5.9v.3c0 .2 0 .4-.3.5l-1.7.6z"/>
|
||||
<path id="f" d="M80.8 86.8h6.8v1.7h-9V72.8h2.2v14zm12.9-9.6a5 5 0 011.8.4A4.1 4.1 0 0198 80c.2.6.3 1.3.3 2.1l-.1.6-.4.2h-7.4c0 .7.1 1.3.3 1.8.2.5.4 1 .7 1.3.3.4.7.6 1.1.8l1.5.3 1.2-.2a6 6 0 001.6-.7l.4-.2c.2 0 .3 0 .4.2l.6.7-1 .7c-.2.3-.6.4-1 .6a6.9 6.9 0 01-4.5 0c-.7-.3-1.2-.6-1.7-1.1-.5-.5-.9-1.2-1.1-1.9a7.6 7.6 0 010-4.7c.2-.7.5-1.3 1-1.8.4-.5 1-.9 1.6-1.2.6-.2 1.4-.4 2.2-.4zm0 1.5a3 3 0 00-2.2.8c-.5.5-.9 1.3-1 2.3h6l-.1-1.3-.6-1c-.2-.2-.5-.5-.9-.6a3 3 0 00-1.2-.2zm10.5 10c-.9 0-1.6-.2-2-.7-.5-.5-.8-1.2-.8-2.1V79h-1.6l-.1-.4v-.8l1.8-.2.5-3.4.1-.3.3-.1h1v3.8h3.2V79h-3.2v6.7c0 .5.1.8.3 1 .3.3.6.4 1 .4h.4a2.3 2.3 0 00.7-.4l.2-.1c.1 0 .2 0 .3.2l.6 1-1.3.7-1.4.2zm6.2-16.7a4.1 4.1 0 01.6 2 4 4 0 01-.5 2 6 6 0 01-1.4 1.6l-.6-.3V77h-.1l.1-.3a5.1 5.1 0 00.6-1l.2-.6a2.5 2.5 0 000-1.3c0-.3-.2-.6-.3-.8l-.1-.3c0-.1 0-.3.3-.4l1.2-.4zm9.6 7.2c0 .2-.2.3-.4.3l-.3-.1a8.6 8.6 0 00-1.3-.6 3.4 3.4 0 00-1.8 0l-.7.4-.5.5-.1.6c0 .3 0 .5.2.7l.7.5 1 .4a49.6 49.6 0 013 1.3c.2.2.5.5.6.8.2.3.3.7.3 1.1l-.3 1.5c-.2.4-.5.8-.8 1a4 4 0 01-1.3.8l-1.8.3a5.6 5.6 0 01-3.8-1.3l.5-.8.2-.2h.8l.5.4.7.4 1.2.1 1-.1.7-.4.4-.6.1-.7c0-.3 0-.6-.2-.8a2 2 0 00-.7-.5l-.9-.4a68.4 68.4 0 01-2.1-.7l-1-.6-.6-.8-.3-1.2a3 3 0 011-2.3l1.3-.7a5 5 0 011.7-.3c.7 0 1.4.1 2 .4.6.2 1 .5 1.5 1l-.5.6z"/>
|
||||
<path id="g" d="M182.2 77.2c.4 0 .9 0 1.3.2.4 0 .8.2 1.2.4h3v.8c0 .2-.2.4-.5.4l-1.2.2c.2.5.3 1 .3 1.6a3.4 3.4 0 01-1.1 2.6c-.4.3-.8.6-1.4.7-.5.2-1 .3-1.6.3-.6 0-1 0-1.5-.2l-.5.5c-.2.2-.2.3-.2.5s0 .4.3.6l.8.3h1.2a36.1 36.1 0 012.8.3l1.2.4.8.8c.2.3.3.7.3 1.2s0 1-.3 1.4c-.3.5-.6.9-1 1.2a7 7 0 01-3.8 1.1c-.9 0-1.6 0-2.2-.2a5 5 0 01-1.5-.6l-1-1-.2-1c0-.6.1-1.1.5-1.5.3-.4.8-.7 1.4-1a2 2 0 01-.7-.5c-.2-.2-.3-.6-.3-1v-.5l.3-.5a2.9 2.9 0 011.1-.9c-.5-.3-1-.7-1.3-1.2-.3-.5-.5-1.1-.5-1.8 0-.5.1-1 .4-1.5l.8-1.1a4 4 0 011.4-.7c.5-.2 1-.3 1.7-.3zm3.4 12c0-.3 0-.6-.2-.7-.1-.2-.4-.3-.6-.4l-1-.2a13.9 13.9 0 00-2.2-.2h-1.1c-.4.1-.8.4-1 .6a1.5 1.5 0 00-.2 1.8c.1.2.3.4.6.5.2.2.6.3 1 .4l1.4.1 1.4-.1c.4-.1.8-.2 1-.4l.7-.6c.2-.3.2-.6.2-.8zm-3.4-6.1c.4 0 .7 0 1-.2.3 0 .6-.2.8-.4l.4-.7.2-1c0-.6-.2-1.2-.6-1.6-.4-.4-1-.6-1.8-.6s-1.4.2-1.8.6a2.5 2.5 0 00-.5 2.5 2 2 0 001.2 1.2l1 .2zm12-5.9c.8 0 1.5.2 2.2.4a4.7 4.7 0 012.7 3c.2.7.4 1.5.4 2.4 0 .8-.2 1.6-.4 2.3-.2.7-.6 1.3-1 1.8-.5.5-1 1-1.7 1.2-.7.3-1.4.4-2.2.4-.8 0-1.6-.1-2.2-.4-.7-.3-1.3-.7-1.7-1.2-.5-.5-.8-1-1-1.8a7 7 0 01-.5-2.3c0-.9.2-1.7.4-2.4.3-.7.6-1.3 1-1.8.5-.5 1.1-.9 1.8-1.2.6-.2 1.4-.4 2.2-.4zm0 10c1 0 1.9-.4 2.4-1.1.6-.8.8-1.8.8-3.1s-.2-2.4-.8-3.1c-.5-.8-1.3-1.1-2.4-1.1-.6 0-1 0-1.5.3-.4.1-.7.4-1 .8-.3.3-.5.8-.6 1.3-.2.5-.2 1.1-.2 1.8 0 .6 0 1.2.2 1.8.1.5.3 1 .6 1.3l1 .8c.4.2 1 .3 1.5.3z"/>
|
||||
<path id="h" d="M249 88.7c-1 0-1.6-.2-2-.7-.6-.5-.8-1.2-.8-2.1V79h-1.6l-.2-.4v-.8l1.9-.2.4-3.4c0-.2 0-.2.2-.3l.3-.1h1v3.8h3.2V79h-3.2v6.7c0 .5 0 .8.3 1 .2.3.5.4.9.4h.5a2.3 2.3 0 00.7-.4l.2-.1c.1 0 .2 0 .3.2l.5 1-1.2.7-1.5.2zm9.3-11.5c.8 0 1.5.2 2.2.4a4.7 4.7 0 012.7 3c.3.7.4 1.5.4 2.4 0 .8-.1 1.6-.4 2.3-.2.7-.6 1.3-1 1.8-.5.5-1 1-1.7 1.2-.7.3-1.4.4-2.2.4-.8 0-1.6-.1-2.2-.4-.7-.3-1.2-.7-1.7-1.2s-.8-1-1-1.8a7 7 0 01-.4-2.3c0-.9 0-1.7.3-2.4s.6-1.3 1.1-1.8c.5-.5 1-.9 1.7-1.2.6-.2 1.4-.4 2.2-.4zm0 10c1 0 2-.4 2.4-1.1.6-.8.9-1.8.9-3.1s-.3-2.4-.9-3.1c-.5-.8-1.3-1.1-2.4-1.1-.6 0-1 0-1.5.3-.4.1-.7.4-1 .8-.3.3-.5.8-.6 1.3L255 83c0 .6 0 1.2.2 1.8.1.5.3 1 .6 1.3l1 .8c.4.2 1 .3 1.5.3z"/>
|
||||
<path id="i" d="M347.2 72.8h.5l.3.3 9.1 12a7.5 7.5 0 010-1.2V72.8h1.8v15.7h-1a1 1 0 01-.5 0 1 1 0 01-.3-.3l-9.1-12a14.1 14.1 0 010 1.1v11.2h-1.9V72.8h1.1zm14.6 14.5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4c.2 0 .4 0 .5.2.2 0 .3.1.5.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .3 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.4-1zm10-5v6.2h-2v-6.2l-5.9-9.5h2l.4.1.2.4 3.7 6.1a7.6 7.6 0 01.6 1.4 13 13 0 01.6-1.4l3.6-6.1.3-.4.4-.1h2l-5.9 9.5zm5.2 5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4c.1 0 .3 0 .5.2.2 0 .3.1.4.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .3 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1zm8.4-14.5V79a27.8 27.8 0 01-.3 4h-1.3a66.4 66.4 0 01-.3-4v-6.3h2zm-2.3 14.5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4c.1 0 .3 0 .5.2l.4.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .3 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1zm8.1-15.3a4.1 4.1 0 01.6 2 4 4 0 01-.5 2 6 6 0 01-1.3 1.6l-.6-.3-.2-.2.1-.3a5.1 5.1 0 00.7-1l.2-.6a2.5 2.5 0 000-1.3l-.4-.8v-.3c0-.1 0-.3.2-.4l1.2-.4zm3.3 0a4.1 4.1 0 01.7 2 4 4 0 01-.5 2 6 6 0 01-1.4 1.6l-.6-.3-.1-.2v-.3a5.1 5.1 0 00.7-1l.2-.6a2.5 2.5 0 000-1.3c0-.3-.2-.6-.4-.8v-.3c0-.1 0-.3.2-.4l1.2-.4z"/>
|
||||
<path id="j" d="M13.5 141c-.5-.7-.8-1.6-.8-2.4 0-.7.2-1.4.6-2.1.3-.7.9-1.3 1.6-1.8l.8.5.2.1v.4l-.1.2a5.7 5.7 0 00-.6.8l-.2.6-.1.6.1.8c0 .3.2.6.4 1l.1.2c0 .3-.1.4-.4.5l-1.6.7zm3.6 0c-.5-.7-.7-1.6-.7-2.4 0-.7.2-1.4.5-2.1.4-.7 1-1.3 1.6-1.8l.9.5.1.1V136a5.7 5.7 0 00-.7.8l-.2.6v1.4l.5 1v.2c0 .3 0 .4-.3.5l-1.7.7z"/>
|
||||
<path id="k" d="M60 149.4h6.3v2.4h-9.4V136h3v13.5zm12.4-9c.7 0 1.4 0 2 .3a4.3 4.3 0 012.5 2.6 6 6 0 01.4 2.7l-.1.3-.2.2h-7.3c0 1.2.4 2 1 2.6a3 3 0 002 .8c.5 0 1 0 1.2-.2l.9-.3.6-.4.5-.1h.3l.2.2.8 1-1 1a5.7 5.7 0 01-2.4.8h-1.3a6 6 0 01-2.1-.3c-.7-.3-1.3-.7-1.8-1.2s-.9-1.1-1.2-1.9a7.3 7.3 0 010-4.7c.2-.7.6-1.3 1-1.8a5 5 0 011.7-1.2c.7-.3 1.5-.4 2.3-.4zm0 1.9c-.7 0-1.3.2-1.8.7-.4.4-.7 1-.8 1.9h5v-1l-.5-.8a2 2 0 00-.8-.6l-1-.2zm10.8 9.7c-1 0-1.7-.3-2.2-.8-.6-.6-.8-1.4-.8-2.3v-6.3H79c-.1 0-.3 0-.4-.2l-.1-.4v-1l1.8-.4.6-3c0-.2 0-.3.2-.4l.4-.1h1.4v3.5h3v2h-3v6c0 .4 0 .7.2 1l.8.2h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.3-.9.6-1.4.7a5 5 0 01-1.6.3z"/>
|
||||
<path id="l" d="M183 140.3c.4 0 .9 0 1.3.2.4 0 .8.2 1.2.4h3.2v1l-.1.4c0 .1-.2.2-.5.2l-1 .2a3.5 3.5 0 01.3 1.3 3.4 3.4 0 01-1.3 2.6c-.4.4-.9.6-1.4.8a5.7 5.7 0 01-3 .1c-.3.2-.5.5-.5.7 0 .3 0 .4.3.5l.8.3h1.2a24.2 24.2 0 012.6.3l1.2.4.8.8c.2.4.3.8.3 1.4 0 .5 0 1-.3 1.4l-1 1.3-1.8.9a8.9 8.9 0 01-4.5 0c-.7-.1-1.2-.3-1.6-.6l-1-1-.2-1c0-.6.1-1 .4-1.4.4-.4.8-.7 1.4-.9-.3-.1-.5-.3-.7-.6-.2-.3-.2-.6-.2-1v-.5l.3-.6.5-.5.6-.4a3.3 3.3 0 01-1.8-3 3.4 3.4 0 011.2-2.7c.4-.3 1-.5 1.5-.7a6 6 0 011.8-.3zm3 12c0-.2-.1-.4-.3-.5 0-.2-.3-.3-.5-.3a14.7 14.7 0 00-2.8-.3l-1-.1c-.4.1-.6.3-.8.6-.2.2-.3.5-.3.8 0 .2 0 .3.2.5 0 .2.2.3.4.5l.9.2 1.2.2c.5 0 1 0 1.3-.2.4 0 .7-.1 1-.3l.5-.5.1-.6zm-3-6.4l.8-.1.7-.4.3-.6.2-.7c0-.6-.2-1-.5-1.4-.4-.3-.9-.5-1.5-.5-.7 0-1.2.2-1.5.5-.4.4-.5.8-.5 1.4v.7a1.6 1.6 0 001 1l1 .1zm12.3-5.5c.8 0 1.6 0 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.4 4.9 4.9 0 01-2.9 3 6.2 6.2 0 01-4.6 0 5 5 0 01-2.8-3c-.3-.7-.4-1.6-.4-2.4 0-1 0-1.7.4-2.5.2-.7.6-1.3 1-1.8a5 5 0 011.9-1.1c.6-.3 1.4-.4 2.3-.4zm0 9.5c.9 0 1.6-.3 2-1 .5-.6.7-1.5.7-2.7 0-1.2-.2-2.2-.7-2.8-.4-.6-1.1-1-2-1-1 0-1.7.4-2.2 1-.4.6-.6 1.6-.6 2.8 0 1.2.2 2.1.6 2.7.5.7 1.2 1 2.2 1z"/>
|
||||
<path id="m" d="M251.3 152c-1 0-1.7-.3-2.2-.8-.6-.6-.8-1.4-.8-2.3v-6.3H247c-.1 0-.2 0-.3-.2l-.2-.4v-1l1.8-.4.6-3c0-.2 0-.3.2-.4l.4-.1h1.4v3.5h3v2h-3v6c0 .4 0 .7.3 1l.7.2h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.3-.9.6-1.4.7a5 5 0 01-1.6.3zm9.7-11.6c.8 0 1.6 0 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.4 4.9 4.9 0 01-2.9 3 6.2 6.2 0 01-4.6 0 5 5 0 01-2.8-3c-.3-.7-.4-1.6-.4-2.4 0-1 0-1.7.4-2.5.2-.7.6-1.3 1-1.8a5 5 0 011.9-1.1c.6-.3 1.4-.4 2.3-.4zm0 9.5c.9 0 1.6-.3 2-1 .5-.6.7-1.5.7-2.7 0-1.2-.2-2.2-.7-2.8-.4-.6-1.1-1-2-1-1 0-1.7.4-2.2 1-.4.6-.6 1.6-.6 2.8 0 1.2.2 2.1.6 2.7.5.7 1.2 1 2.2 1z"/>
|
||||
<path id="n" d="M347.7 136l.5.1.3.3 9.1 11.9a7.5 7.5 0 010-1V136h1.8v15.7h-1a1 1 0 01-.5 0 1 1 0 01-.3-.4l-9.1-11.8a14.1 14.1 0 010 1v11.2h-1.9v-15.7h1.1zm14.6 14.6a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1.5.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.4-1zm10-5v6.2h-2v-6.3l-5.9-9.4h2l.4.1.2.4 3.7 6a7.6 7.6 0 01.6 1.5 13 13 0 01.6-1.4l3.6-6.1c0-.2.2-.3.3-.4l.4-.1h2l-5.9 9.4zm5.2 5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1.4.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1zm8.4-14.5v6.2a27.8 27.8 0 01-.3 4h-1.3a66.4 66.4 0 01-.3-4v-6.2h2zm-2.3 14.5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1.4.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1zm8.1-15.4a4.1 4.1 0 01.6 2.1 4 4 0 01-.5 2 6 6 0 01-1.3 1.6l-.6-.4h-.2v-.2l.1-.3a5.1 5.1 0 00.7-1l.2-.5a2.5 2.5 0 000-1.4l-.4-.8v-.2c0-.2 0-.3.2-.4l1.2-.5zm3.3 0a4.1 4.1 0 01.7 2.1 4 4 0 01-.5 2 6 6 0 01-1.4 1.6l-.6-.4h-.1v-.2-.3a5.1 5.1 0 00.7-1l.2-.5a2.5 2.5 0 000-1.4l-.4-.8v-.2c0-.2 0-.3.2-.4l1.2-.5z"/>
|
||||
<path id="o" d="M127 135l.6 1.2a4.3 4.3 0 01-.4 3.3c-.4.7-.9 1.3-1.6 1.9l-.8-.5-.2-.2v-.2l.1-.3a6.5 6.5 0 00.6-.9 2.9 2.9 0 00.3-1.2l-.1-.8c0-.3-.2-.6-.4-.9l-.1-.3c0-.2.1-.4.4-.5l1.6-.6zm9.7 7.7l-.2.3h-.3a29 29 0 00-1.6-.6h-1a2 2 0 00-1.2.3 1 1 0 00-.5.9l.3.6.6.4.9.3a29 29 0 012 .8l.9.5.6.9c.2.3.3.7.3 1.1 0 .6-.1 1-.3 1.5-.2.5-.5.9-.9 1.2a4 4 0 01-1.4.8 6.1 6.1 0 01-3 .2 6.7 6.7 0 01-2.1-.7l-.8-.6.7-1c0-.2.1-.2.3-.3l.4-.1.4.1a10.6 10.6 0 001.3.6l1 .2.8-.1.6-.3.3-.5.1-.5c0-.2 0-.5-.2-.6a2 2 0 00-.6-.5 29.4 29.4 0 01-3-1l-.9-.6-.6-1c-.2-.3-.2-.7-.2-1.2a3.3 3.3 0 011-2.4 4 4 0 011.4-.8c.5-.2 1.1-.2 1.8-.2a4.8 4.8 0 013.7 1.4l-.6 1z"/>
|
||||
<path id="p" d="M13.5 208.7c-.5-.8-.8-1.6-.8-2.5 0-.7.2-1.4.6-2 .3-.7.9-1.3 1.6-2l.8.6.2.2V203.3l-.1.2a5.7 5.7 0 00-.6.9l-.2.5-.1.7.1.8c0 .3.2.6.4.9l.1.3c0 .2-.1.4-.4.5l-1.6.6zm3.6 0c-.5-.8-.7-1.6-.7-2.5 0-.7.2-1.4.5-2 .4-.7 1-1.3 1.6-2l.9.6.1.2v.5a5.7 5.7 0 00-.7.9l-.2.5v1.5l.5.9v.3c0 .2 0 .4-.3.5l-1.7.6z"/>
|
||||
<path id="q" d="M60 217h6.3v2.5h-9.4v-16h3V217zm12.4-9c.7 0 1.4.1 2 .3a4.3 4.3 0 012.5 2.6 6 6 0 01.4 2.7l-.1.3-.2.2h-7.3c0 1.2.4 2 1 2.6a3 3 0 002 .8l1.2-.1.9-.4.6-.3.5-.2h.3l.2.3.8 1-1 .9a5.7 5.7 0 01-2.4.8l-1.3.1a6 6 0 01-2.1-.4c-.7-.2-1.3-.6-1.8-1.1-.5-.5-.9-1.2-1.2-2a7.3 7.3 0 010-4.7c.2-.7.6-1.3 1-1.8a5 5 0 011.7-1.2c.7-.3 1.5-.4 2.3-.4zm0 2c-.7 0-1.3.2-1.8.6-.4.5-.7 1-.8 2h5v-1l-.5-.9a2 2 0 00-.8-.6l-1-.2zm10.8 9.6c-1 0-1.7-.2-2.2-.8-.6-.6-.8-1.3-.8-2.3v-6.3H79l-.4-.1-.1-.5v-1l1.8-.3.6-3.1c0-.2 0-.3.2-.4H82.9v3.5h3v1.9h-3v6.1c0 .4 0 .6.2.8.2.2.5.3.8.3h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.4-.9.6-1.4.8a5 5 0 01-1.6.2z"/>
|
||||
<path id="r" d="M185.5 208l1.3.1 1.2.5h3.2v1l-.1.4-.5.2-1 .1a3.5 3.5 0 01.3 1.3 3.4 3.4 0 01-1.3 2.7l-1.4.7a5.7 5.7 0 01-3 .2c-.3.2-.5.4-.5.7 0 .2 0 .4.3.5l.8.2h1.2a24.2 24.2 0 012.6.3l1.2.5c.3.2.6.4.8.8.2.3.3.8.3 1.3s0 1-.3 1.5l-1 1.2c-.6.4-1.1.7-1.8.9a8.9 8.9 0 01-4.5 0c-.7 0-1.2-.3-1.6-.6l-1-1-.2-1c0-.6.1-1 .4-1.4.4-.3.8-.6 1.4-.8l-.7-.7c-.2-.2-.2-.6-.2-1v-.5l.3-.5.5-.5.6-.4c-.6-.3-1-.8-1.3-1.3-.4-.5-.5-1-.5-1.8a3.4 3.4 0 011.2-2.6c.4-.4 1-.6 1.5-.8a6 6 0 011.8-.2zm3 12c0-.3-.1-.4-.3-.6l-.5-.3a14.7 14.7 0 00-2.8-.3l-1-.1-.8.6c-.2.2-.3.5-.3.8 0 .2 0 .4.2.5 0 .2.2.4.4.5l.9.3h2.5l1-.3.5-.5.1-.6zm-3-6.5l.8-.1c.3 0 .5-.2.7-.4l.3-.6.2-.7c0-.6-.2-1-.5-1.3-.4-.4-.9-.5-1.5-.5-.7 0-1.2.1-1.5.5-.4.3-.5.7-.5 1.3v.7a1.6 1.6 0 001 1l1 .1zm12.3-5.5c.8 0 1.6.1 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.4 4.9 4.9 0 01-2.9 3c-.6.3-1.4.4-2.2.4-.9 0-1.7-.1-2.3-.4a5 5 0 01-3-3c-.2-.7-.3-1.5-.3-2.4 0-.9 0-1.7.4-2.4.2-.7.6-1.3 1-1.8a5 5 0 011.9-1.2c.6-.3 1.4-.4 2.3-.4zm0 9.5c.9 0 1.6-.3 2-1 .5-.5.7-1.5.7-2.7 0-1.2-.2-2.1-.7-2.8-.4-.6-1.1-1-2-1-1 0-1.7.4-2.2 1-.4.7-.6 1.6-.6 2.8 0 1.2.2 2.1.6 2.8.5.6 1.2 1 2.2 1z"/>
|
||||
<path id="s" d="M252.8 219.6c-1 0-1.7-.2-2.2-.8-.6-.6-.8-1.3-.8-2.3v-6.3h-1.2l-.3-.1-.2-.5v-1l1.8-.3.6-3.1c0-.2 0-.3.2-.4H252.5v3.5h3v1.9h-3v6.1c0 .4 0 .6.3.8.1.2.4.3.7.3h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.4-.9.6-1.4.8a5 5 0 01-1.6.2zm9.7-11.6c.8 0 1.6.1 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.4 4.9 4.9 0 01-2.9 3c-.6.3-1.4.4-2.2.4-.9 0-1.7-.1-2.3-.4a5 5 0 01-3-3c-.2-.7-.3-1.5-.3-2.4 0-.9 0-1.7.4-2.4.2-.7.6-1.3 1-1.8a5 5 0 011.9-1.2c.6-.3 1.4-.4 2.3-.4zm0 9.5c.9 0 1.6-.3 2-1 .5-.5.7-1.5.7-2.7 0-1.2-.2-2.1-.7-2.8-.4-.6-1.1-1-2-1-1 0-1.7.4-2.2 1-.4.7-.6 1.6-.6 2.8 0 1.2.2 2.1.6 2.8.5.6 1.2 1 2.2 1z"/>
|
||||
<path id="t" d="M331.8 203.7h.4l.3.4 9.2 11.8a7.5 7.5 0 01-.1-1v-11.2h1.9v15.8h-1.1a1 1 0 01-.4-.1 1 1 0 01-.4-.3l-9-11.9a14.1 14.1 0 010 1v11.3h-2v-15.8h1.2zm14.6 14.5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.3h.5l.4.4a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .3 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1zm10-5v6.3h-2.1v-6.3l-5.8-9.5h1.9l.4.1.3.4 3.6 6.1a7.6 7.6 0 01.6 1.4 13 13 0 01.7-1.4l3.6-6.1.2-.3c.1-.2.3-.2.5-.2h1.9l-5.8 9.5zm5.1 5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.3h.5l.5.4a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .3 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.4-1zm8.5-14.5v6.3a27.8 27.8 0 01-.3 4h-1.3a66.4 66.4 0 01-.3-4v-6.3h1.9zm-2.4 14.5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.3h.5l.5.4a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .3 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.4-1z"/>
|
||||
<path id="u" d="M127 202.6l.6 1.2a4.3 4.3 0 01-.4 3.4c-.4.6-.9 1.3-1.6 1.8l-.8-.5-.2-.2v-.1l.1-.4a6.5 6.5 0 00.6-.8 2.9 2.9 0 00.3-1.3l-.1-.8c0-.3-.2-.6-.4-.9l-.1-.3c0-.2.1-.4.4-.5l1.6-.6zm9.7 7.8l-.2.2h-.3a29 29 0 00-1.6-.6h-1a2 2 0 00-1.2.3 1 1 0 00-.5.9c0 .2.1.5.3.6.1.2.3.3.6.4l.9.4a29 29 0 012 .7l.9.6c.3.2.5.5.6.8.2.3.3.7.3 1.2l-.3 1.5-.9 1.2a4 4 0 01-1.4.8 6.1 6.1 0 01-3 .1 6.7 6.7 0 01-2.1-.7l-.8-.6.7-1 .3-.3h.8a10.6 10.6 0 001.3.7l1 .1h.8l.6-.4.3-.4.1-.5c0-.3 0-.5-.2-.7a2 2 0 00-.6-.4 29.4 29.4 0 01-3-1l-.9-.7-.6-.9c-.2-.3-.2-.8-.2-1.3a3.3 3.3 0 011-2.4 4 4 0 011.4-.7 5.6 5.6 0 014 .1c.6.2 1.1.6 1.5 1l-.6 1z"/>
|
||||
<path id="v" d="M429.6 202.6l.5 1.2a4.3 4.3 0 01-.3 3.4c-.4.6-1 1.3-1.6 1.8l-.9-.5-.1-.2v-.1-.4a7.8 7.8 0 00.7-.8l.2-.6a2.5 2.5 0 000-1.5l-.5-.9v-.3c0-.2 0-.4.3-.5l1.7-.6zm3.6 0l.6 1.2a4.3 4.3 0 01-.4 3.4c-.3.6-.9 1.3-1.6 1.8l-.8-.5-.2-.2v-.1l.1-.4a7.8 7.8 0 00.6-.8l.2-.6a2.5 2.5 0 000-1.5c0-.3-.2-.6-.4-.9l-.1-.3c0-.2.1-.4.4-.5l1.6-.6z"/>
|
||||
<path id="w" d="M13.5 275.3c-.5-.9-.8-1.7-.8-2.5 0-.7.2-1.4.6-2.1.3-.7.9-1.3 1.6-1.9l.8.6.2.1v.4l-.1.1a5.7 5.7 0 00-.6.9l-.2.6-.1.6.1.8c0 .3.2.6.4 1l.1.2c0 .3-.1.4-.4.5l-1.6.7zm3.6 0c-.5-.9-.7-1.7-.7-2.5 0-.7.2-1.4.5-2.1.4-.7 1-1.3 1.6-1.9l.9.6.1.1v.5a5.7 5.7 0 00-.7.9l-.2.6v1.4l.5 1v.2c0 .3 0 .4-.3.5l-1.7.7z"/>
|
||||
<path id="x" d="M58.4 283.6h6.4v2.4h-9.4v-16h3v13.6zm12.5-9c.7 0 1.4 0 2 .3a4.3 4.3 0 012.5 2.6 6 6 0 01.4 2.7l-.1.3-.2.1-.3.1h-7c0 1.2.4 2 1 2.6a3 3 0 002 .8c.5 0 1 0 1.2-.2l.9-.3.6-.4.5-.1h.3l.2.2.8 1c-.3.4-.6.7-1 .9a5.7 5.7 0 01-2.4.9H71a6 6 0 01-2.1-.3c-.7-.3-1.3-.7-1.8-1.2s-.9-1.1-1.2-1.9a7.3 7.3 0 010-4.8c.2-.6.6-1.2 1-1.7a5 5 0 011.7-1.2c.7-.3 1.5-.5 2.3-.5zm0 1.9c-.7 0-1.3.2-1.8.7-.4.4-.7 1-.8 1.9h5v-1l-.5-.9a2 2 0 00-.8-.5l-1-.2zm10.8 9.7c-1 0-1.7-.3-2.2-.9-.6-.5-.8-1.3-.8-2.2v-6.4H77l-.1-.5V275l1.8-.3.6-3c0-.2 0-.3.2-.4l.4-.1h1.4v3.5h3v2h-3v6c0 .4 0 .7.2.9.2.2.5.3.8.3h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.3-.9.6-1.4.7a5 5 0 01-1.6.3z"/>
|
||||
<path id="y" d="M183 274.5c.4 0 .9 0 1.3.2.4 0 .8.2 1.2.4h3.2v1l-.1.4c0 .1-.2.2-.5.2l-1 .2a3.5 3.5 0 01.3 1.3 3.4 3.4 0 01-1.3 2.6l-1.4.8a5.7 5.7 0 01-3 .1c-.3.2-.5.4-.5.7 0 .2 0 .4.3.5l.8.2 1.2.1a24.2 24.2 0 012.6.3c.5 0 .9.2 1.2.4l.8.8c.2.4.3.8.3 1.3s0 1-.3 1.5l-1 1.3c-.6.3-1.1.6-1.8.8-.7.3-1.4.4-2.3.4-.9 0-1.6-.1-2.2-.3-.7-.1-1.2-.4-1.6-.6l-1-1-.2-1.1c0-.5.1-1 .4-1.3.4-.4.8-.7 1.4-.9l-.7-.6c-.2-.3-.2-.6-.2-1v-.5l.3-.6.5-.5.6-.4a3.3 3.3 0 01-1.8-3 3.4 3.4 0 011.2-2.7 6 6 0 013.2-1zm3 12c0-.2-.1-.4-.3-.5 0-.2-.3-.3-.5-.4a14.7 14.7 0 00-2.8-.3h-1c-.4.1-.6.3-.8.5-.2.3-.3.5-.3.8 0 .2 0 .4.2.6 0 .2.2.3.4.4.2.2.5.3.9.3l1.2.1h1.3l1-.4.5-.5.1-.6zm-3-6.4c.3 0 .6 0 .8-.2.3 0 .5-.2.7-.3l.3-.6.2-.8c0-.5-.2-1-.5-1.3-.4-.3-.9-.5-1.5-.5-.7 0-1.2.2-1.5.5-.4.3-.5.8-.5 1.3v.8a1.6 1.6 0 001 1h1zm12.3-5.6c.8 0 1.6.2 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.5 4.9 4.9 0 01-2.9 3 6.2 6.2 0 01-4.6 0 5 5 0 01-2.8-3c-.3-.8-.4-1.6-.4-2.5 0-.9 0-1.7.4-2.4.2-.7.6-1.3 1-1.8a5 5 0 011.9-1.2c.6-.2 1.4-.4 2.3-.4zm0 9.6c.9 0 1.6-.3 2-1 .5-.6.7-1.5.7-2.7 0-1.3-.2-2.2-.7-2.8-.4-.7-1.1-1-2-1-1 0-1.7.3-2.2 1-.4.6-.6 1.5-.6 2.8 0 1.2.2 2 .6 2.7.5.7 1.2 1 2.2 1z"/>
|
||||
<path id="z" d="M251.3 286.2c-1 0-1.7-.3-2.2-.9-.6-.5-.8-1.3-.8-2.2v-6.4h-1.5l-.2-.5V275l1.8-.3.6-3c0-.2 0-.3.2-.4l.4-.1h1.4v3.5h3v2h-3v6c0 .4 0 .7.3.9.1.2.4.3.7.3h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.3-.9.6-1.4.7a5 5 0 01-1.6.3zm9.7-11.7c.8 0 1.6.2 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.5 4.9 4.9 0 01-2.9 3 6.2 6.2 0 01-4.6 0 5 5 0 01-2.8-3c-.3-.8-.4-1.6-.4-2.5 0-.9 0-1.7.4-2.4.2-.7.6-1.3 1-1.8a5 5 0 011.9-1.2c.6-.2 1.4-.4 2.3-.4zm0 9.6c.9 0 1.6-.3 2-1 .5-.6.7-1.5.7-2.7 0-1.3-.2-2.2-.7-2.8-.4-.7-1.1-1-2-1-1 0-1.7.3-2.2 1-.4.6-.6 1.5-.6 2.8 0 1.2.2 2 .6 2.7.5.7 1.2 1 2.2 1z"/>
|
||||
<path id="A" d="M310.4 270.2l.4.1.4.3 9 11.9a7.5 7.5 0 010-1.1v-11.2h2V286H321a1 1 0 01-.4 0 1 1 0 01-.3-.4l-9.2-11.9a14.1 14.1 0 010 1V286h-1.8v-15.8h1.1zm14.6 14.6a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1c.2 0 .3.2.5.3a1.3 1.3 0 01.3 1 1.4 1.4 0 01-.3 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.4-1zm10-5v6.2H333v-6.3l-5.8-9.5h1.9c.2 0 .3 0 .4.2.2 0 .2.2.3.3l3.6 6.2a7.6 7.6 0 01.7 1.4 13 13 0 01.6-1.4l3.6-6.2.3-.3.4-.2h2l-5.9 9.5zm5.2 5a1.4 1.4 0 01.4-1 1.3 1.3 0 011-.4l.5.1.4.3a1.3 1.3 0 01.4 1 1.4 1.4 0 01-.4 1 1.3 1.3 0 01-1 .4 1.4 1.4 0 01-1-.4 1.4 1.4 0 01-.3-1z"/>
|
||||
<path id="B" d="M126.5 269.1l.6 1.3a4.3 4.3 0 01-.4 3.3c-.4.7-.9 1.3-1.6 1.9l-.8-.6-.2-.1v-.2l.1-.3a6.5 6.5 0 00.6-.9 2.9 2.9 0 00.3-1.2l-.1-.8c0-.3-.2-.6-.4-1l-.1-.2c0-.3.1-.4.4-.5l1.6-.7zm9.7 7.8l-.2.3h-.3a29 29 0 00-1.6-.6h-1a2 2 0 00-1.2.3 1 1 0 00-.5.9l.3.6.6.4.9.3a29 29 0 012 .8l.9.5c.3.3.5.5.6.9.2.3.3.7.3 1.1 0 .6-.1 1-.3 1.5-.2.5-.5.9-.9 1.2a4 4 0 01-1.4.8 6.1 6.1 0 01-3 .2 6.7 6.7 0 01-2.1-.8l-.8-.5.7-1c0-.2.1-.3.3-.3l.4-.1.4.1a10.6 10.6 0 001.3.6l1 .2.8-.1.6-.3.3-.5.1-.5c0-.3 0-.5-.2-.6a2 2 0 00-.6-.5 29.4 29.4 0 01-3-1l-.9-.7-.6-.8c-.2-.4-.2-.8-.2-1.3a3.3 3.3 0 011-2.4 4 4 0 011.4-.8 5.6 5.6 0 014 .1c.6.3 1.1.6 1.5 1l-.6 1z"/>
|
||||
<path id="C" d="M429.6 269.1l.5 1.3a4.3 4.3 0 01-.3 3.3c-.4.7-1 1.3-1.6 1.9l-.9-.6-.1-.1v-.2-.3a7.8 7.8 0 00.7-.9l.2-.6a2.5 2.5 0 000-1.4l-.5-1v-.2c0-.3 0-.4.3-.5l1.7-.7zm3.6 0l.6 1.3a4.3 4.3 0 01-.4 3.3c-.3.7-.9 1.3-1.6 1.9l-.8-.6-.2-.1v-.2l.1-.3a7.8 7.8 0 00.6-.9l.2-.6a2.5 2.5 0 000-1.4c0-.3-.2-.6-.4-1l-.1-.2c0-.3.1-.4.4-.5l1.6-.7z"/>
|
||||
<path id="D" d="M387.8 270v6.4a19.2 19.2 0 01-.3 4h-1.9a41.8 41.8 0 01-.3-4V270h2.5zm-3 14.5a1.7 1.7 0 01.5-1.2 1.7 1.7 0 011.2-.5 1.6 1.6 0 011.2.5 1.7 1.7 0 01.3 1.9c0 .2-.2.3-.3.5l-.5.3-.7.2a1.7 1.7 0 01-1.2-.5l-.3-.5-.2-.7z"/>
|
||||
<path id="E" d="M13.5 341.8c-.5-.8-.8-1.6-.8-2.5 0-.7.2-1.4.6-2 .3-.7.9-1.4 1.6-2l.8.6.2.2V336.4l-.1.2a5.7 5.7 0 00-.6.8l-.2.6-.1.7.1.8c0 .3.2.5.4.9l.1.3c0 .2-.1.4-.4.5l-1.6.6zm3.6 0c-.5-.8-.7-1.6-.7-2.5 0-.7.2-1.4.5-2 .4-.7 1-1.4 1.6-2l.9.6.1.2v.5a5.7 5.7 0 00-.7.8l-.2.6v1.5l.5.9v.3c0 .2 0 .4-.3.5l-1.7.6z"/>
|
||||
<path id="F" d="M60 350.1h6.3v2.4h-9.4v-15.9h3v13.5zm12.4-9c.7 0 1.4.1 2 .3a4.3 4.3 0 012.5 2.6 6 6 0 01.4 2.7l-.1.3-.2.2h-7.3c0 1.2.4 2 1 2.6a3 3 0 002 .8l1.2-.1.9-.4.6-.3.5-.2h.3l.2.3.8 1-1 .8a5.7 5.7 0 01-2.4 1h-1.3a6 6 0 01-2.1-.4c-.7-.2-1.3-.6-1.8-1.1-.5-.5-.9-1.2-1.2-2a7.3 7.3 0 010-4.7c.2-.7.6-1.3 1-1.8a5 5 0 011.7-1.2c.7-.3 1.5-.4 2.3-.4zm0 2c-.7 0-1.3.2-1.8.6-.4.4-.7 1-.8 1.9h5v-1l-.5-.8a2 2 0 00-.8-.6l-1-.2zm10.8 9.6c-1 0-1.7-.3-2.2-.8-.6-.6-.8-1.3-.8-2.3v-6.3H79l-.4-.1-.1-.5v-1l1.8-.4.6-3c0-.2 0-.3.2-.4H82.9v3.5h3v1.9h-3v6.1c0 .4 0 .6.2.8.2.2.5.3.8.3h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.4-.9.6-1.4.8a5 5 0 01-1.6.2z"/>
|
||||
<path id="G" d="M184 341l1.3.2 1.2.4h3.2v1l-.1.5-.5.2-1 .1a3.5 3.5 0 01.3 1.3 3.4 3.4 0 01-1.3 2.7l-1.4.7a5.7 5.7 0 01-3 .1c-.3.3-.5.5-.5.8 0 .2 0 .4.3.5l.8.2h1.2a24.2 24.2 0 012.6.3l1.2.5c.3.2.6.4.8.8.2.3.3.8.3 1.3s0 1-.3 1.4c-.3.5-.6 1-1 1.3-.6.4-1.1.7-1.8.9a8.9 8.9 0 01-4.5 0c-.7-.1-1.2-.3-1.6-.6l-1-1-.2-1c0-.6.1-1 .4-1.4.4-.4.8-.6 1.4-.9-.3-.1-.5-.3-.7-.6-.2-.2-.2-.6-.2-1v-.5l.3-.5.5-.5.6-.5a3.3 3.3 0 01-1.8-3 3.4 3.4 0 011.2-2.7c.4-.3 1-.5 1.5-.7a6 6 0 011.8-.2zm3 12c0-.2-.1-.3-.3-.5l-.5-.3a14.7 14.7 0 00-2.8-.3l-1-.1-.8.6c-.2.2-.3.5-.3.8 0 .2 0 .4.2.5 0 .2.2.4.4.5l.9.3h2.5l1-.4c.2 0 .4-.3.5-.4l.1-.6zm-3-6.4l.8-.1.7-.4.3-.6.2-.7c0-.6-.2-1-.5-1.3-.4-.4-.9-.5-1.5-.5-.7 0-1.2.1-1.5.5-.4.3-.5.7-.5 1.3v.7a1.6 1.6 0 001 1l1 .1zm12.3-5.5c.8 0 1.6.1 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.4 4.9 4.9 0 01-2.9 3c-.6.3-1.4.4-2.2.4-.9 0-1.7-.1-2.3-.4a5 5 0 01-3-3c-.2-.7-.3-1.5-.3-2.4 0-1 0-1.7.4-2.4.2-.7.6-1.4 1-1.9a5 5 0 011.9-1.1c.6-.3 1.4-.4 2.3-.4zm0 9.5c.9 0 1.6-.3 2-1 .5-.6.7-1.5.7-2.7 0-1.2-.2-2.1-.7-2.8-.4-.6-1.1-1-2-1-1 0-1.7.4-2.2 1-.4.7-.6 1.6-.6 2.8 0 1.2.2 2.1.6 2.8.5.6 1.2 1 2.2 1z"/>
|
||||
<path id="H" d="M249.8 352.7c-1 0-1.7-.3-2.2-.8-.6-.6-.8-1.3-.8-2.3v-6.3h-1.2l-.3-.1-.2-.5v-1l1.8-.4.6-3c0-.2 0-.3.2-.4H249.5v3.5h3v1.9h-3v6.1c0 .4 0 .6.3.8.1.2.4.3.7.3h.4a2.3 2.3 0 00.5-.3h.4l.2.2.8 1.3c-.4.4-.9.6-1.4.8a5 5 0 01-1.6.2zm9.7-11.6c.8 0 1.6.1 2.2.4a5 5 0 013 3c.2.7.3 1.5.3 2.4a7 7 0 01-.4 2.4 4.9 4.9 0 01-2.9 3c-.6.3-1.4.4-2.2.4-.9 0-1.7-.1-2.3-.4a5 5 0 01-3-3c-.2-.7-.3-1.5-.3-2.4 0-1 0-1.7.4-2.4.2-.7.6-1.4 1-1.9a5 5 0 011.9-1.1c.6-.3 1.4-.4 2.3-.4zm0 9.5c.9 0 1.6-.3 2-1 .5-.6.7-1.5.7-2.7 0-1.2-.2-2.1-.7-2.8-.4-.6-1.1-1-2-1-1 0-1.7.4-2.2 1-.4.7-.6 1.6-.6 2.8 0 1.2.2 2.1.6 2.8.5.6 1.2 1 2.2 1z"/>
|
||||
<path id="I" d="M312.3 336.6h.4l.2.1.2.2.2.2 8.4 10.6a11 11 0 010-1.4v-9.7h2.5v16h-1.5c-.2 0-.4 0-.6-.2-.2 0-.3-.2-.4-.4l-8.4-10.6a15.3 15.3 0 01.1 1.4v9.7h-2.6v-15.9h1.5zm14.3 14.4a1.7 1.7 0 01.5-1.1 1.7 1.7 0 011.2-.5 1.6 1.6 0 011.2.5 1.7 1.7 0 01.3 1.8c0 .2-.2.4-.3.5l-.6.4-.6.1a1.7 1.7 0 01-1.2-.5c-.1-.1-.3-.3-.3-.5l-.2-.7zm11-4.6v6.1h-3v-6.1l-5.7-9.8h2.6l.6.2.4.5 2.9 5.3a13.3 13.3 0 01.8 1.7 12 12 0 01.7-1.7l2.9-5.3c0-.2.2-.3.4-.5l.6-.2h2.6l-5.8 9.8zm4.7 4.6a1.7 1.7 0 01.5-1.1 1.7 1.7 0 011.2-.5 1.6 1.6 0 011.1.5 1.7 1.7 0 01.4 1.8c0 .2-.2.4-.4.5l-.5.4-.6.1a1.7 1.7 0 01-1.2-.5l-.4-.5-.1-.7z"/>
|
||||
<path id="J" d="M128 335.7l.6 1.2a4.3 4.3 0 01-.4 3.4c-.4.6-.9 1.2-1.6 1.8l-.8-.5-.2-.2v-.2l.1-.3a6.5 6.5 0 00.6-.9 2.9 2.9 0 00.3-1.2l-.1-.8c0-.3-.2-.6-.4-.9l-.1-.3c0-.2.1-.4.4-.5l1.6-.6zm9.7 7.8l-.2.2h-.3a29 29 0 00-1.6-.6h-1a2 2 0 00-1.2.3 1 1 0 00-.5.9l.3.6c.1.2.3.3.6.4l.9.4a29 29 0 012 .7l.9.6c.3.2.5.5.6.8.2.3.3.7.3 1.2l-.3 1.5-.9 1.2a4 4 0 01-1.4.7 6.1 6.1 0 01-3 .2 6.7 6.7 0 01-2.1-.7l-.8-.6.7-1 .3-.3h.8a10.6 10.6 0 001.3.7l1 .1.8-.1.6-.3.3-.4.1-.5c0-.3 0-.5-.2-.7a2 2 0 00-.6-.4 29.4 29.4 0 01-3-1l-.9-.7-.6-.9c-.2-.3-.2-.8-.2-1.3a3.3 3.3 0 011-2.4 4 4 0 011.4-.7 5.6 5.6 0 014 .1c.6.2 1.1.6 1.5 1l-.6 1z"/>
|
||||
<path id="K" d="M429.6 335.7l.5 1.2a4.3 4.3 0 01-.3 3.4l-1.6 1.8-.9-.5-.1-.2v-.2-.3a7.8 7.8 0 00.7-.9l.2-.5a2.5 2.5 0 000-1.5l-.5-.9v-.3c0-.2 0-.4.3-.5l1.7-.6zm3.6 0l.6 1.2a4.3 4.3 0 01-.4 3.4c-.3.6-.9 1.2-1.6 1.8l-.8-.5-.2-.2v-.2l.1-.3a7.8 7.8 0 00.6-.9l.2-.5a2.5 2.5 0 000-1.5c0-.3-.2-.6-.4-.9l-.1-.3c0-.2.1-.4.4-.5l1.6-.6z"/>
|
||||
<path id="L" d="M387.8 336.6v6.3a19.2 19.2 0 01-.3 4h-1.9a41.8 41.8 0 01-.3-4v-6.3h2.5zm-3 14.4a1.7 1.7 0 01.5-1.1 1.7 1.7 0 011.2-.5 1.6 1.6 0 011.2.5 1.7 1.7 0 01.3 1.8c0 .2-.2.4-.3.5l-.5.4-.7.1a1.7 1.7 0 01-1.2-.5l-.3-.5-.2-.7z"/>
|
||||
<path id="M" d="M16.4 11.3V15H14V4h3.9c.7 0 1.4.2 2 .3l1.3.8c.4.3.6.7.8 1.1l.3 1.4c0 .6-.1 1-.3 1.5a3 3 0 01-.8 1.2c-.4.3-.8.6-1.4.8-.5.2-1.2.2-2 .2h-1.3zm0-1.9h1.4c.6 0 1-.1 1.4-.4.3-.4.4-.8.4-1.4l-.1-.6a1.4 1.4 0 00-1-1H16.5v3.4zM26 11v4h-2.5V4H27c.8 0 1.5.2 2 .3l1.4.7c.4.3.6.6.8 1l.2 1.3-.1 1a3 3 0 01-1.1 1.6l-1 .5.5.3.4.5 2.3 3.8h-2.3c-.4 0-.7-.2-.9-.5l-1.8-3.2-.3-.3a1 1 0 00-.5 0H26zm0-1.8h1l1-.1.5-.4c.2-.1.3-.3.3-.5l.1-.7c0-.5-.1-.9-.4-1.1-.3-.3-.8-.4-1.5-.4h-1v3.2zm14.5-5.1v2H36v2.5h3.4v1.8H36v2.7h4.5V15h-7V4h7zM49 4v2h-4.5v2.7h3.7v2h-3.7V15h-2.6V4h7zM53 15h-2.5V4H53v11zm4.8-5.6L54.4 4h2.9l.2.3L59.7 8v-.2l.2-.1 1.9-3.3c0-.2.3-.3.5-.3h2.4l-3.4 5.2 3.5 5.7h-2.6l-.4-.1a1 1 0 01-.2-.3l-2.2-3.8-.1.3-2 3.5-.3.3-.4.1h-2.4l3.6-5.6z"/>
|
||||
<path id="N" d="M20 136.3l-.2.3h-.4-.3a67.9 67.9 0 00-1-.5l-.8-.2c-.5 0-.8.1-1 .4a1 1 0 00-.4.8c0 .2 0 .4.2.5.1.2.3.3.6.4l.7.3a19.6 19.6 0 011.8.7l.8.5a2.6 2.6 0 01.8 2c0 .5-.1 1-.3 1.4a3.3 3.3 0 01-2 2l-1.6.2a5.3 5.3 0 01-2.1-.4 6 6 0 01-1-.4 4 4 0 01-.7-.6l.8-1.2s0-.2.2-.2l.3-.1.5.1a32.8 32.8 0 001.1.7l1 .1c.4 0 .7 0 1-.3.3-.2.4-.5.4-1 0-.2 0-.4-.2-.6l-.6-.4a23.2 23.2 0 01-2.6-.9l-.7-.5-.6-1a3.5 3.5 0 010-2.4l.8-1c.3-.3.7-.6 1.2-.8.4-.2 1-.2 1.6-.2a6 6 0 011.9.3 5 5 0 011.4.8l-.6 1.2zm6.6 6.7c.3 0 .6 0 .9-.2.3 0 .5-.2.7-.5l.4-.7.1-1V134h2.6v6.4a5 5 0 01-.4 1.9 4.1 4.1 0 01-2.4 2.4c-.5.2-1.2.3-2 .3-.7 0-1.3 0-1.9-.3-.6-.2-1-.6-1.5-1-.4-.4-.7-.8-.9-1.4-.2-.6-.3-1.2-.3-1.9v-6.4h2.5v6.4c0 .4 0 .8.2 1 0 .4.2.6.4.8.2.3.4.4.7.5l.9.2zm13.4-9v2h-4.5v2.8h3.7v2h-3.7v4.2h-2.6v-11h7zm8.3 0v2h-4.5v2.8h3.8v2h-3.8v4.2h-2.5v-11h7zm4.1 11H50v-11h2.5v11zm4.7-5.6l-3.4-5.3h2.9l.2.3L59 138l.1-.2.1-.1 2-3.3c0-.2.2-.3.4-.3h2.5l-3.5 5.2 3.5 5.7h-2.5l-.4-.1a1 1 0 01-.2-.3l-2.2-3.8-.2.3-2 3.5-.3.3-.3.1h-2.4l3.5-5.6z"/>
|
||||
<path id="O" d="M20 201.3l-.2.3h-.4-.3a67.9 67.9 0 00-1-.5l-.8-.2c-.5 0-.8.1-1 .4a1 1 0 00-.4.8c0 .2 0 .4.2.5.1.2.3.3.6.4l.7.3a19.6 19.6 0 011.8.7l.8.5a2.6 2.6 0 01.8 2c0 .5-.1 1-.3 1.4a3.3 3.3 0 01-2 2l-1.6.2a5.3 5.3 0 01-2.1-.4 6 6 0 01-1-.4 4 4 0 01-.7-.6l.8-1.2s0-.2.2-.2l.3-.1.5.1a32.8 32.8 0 001.1.7l1 .1c.4 0 .7 0 1-.3.3-.2.4-.5.4-1 0-.2 0-.4-.2-.6l-.6-.4a23.2 23.2 0 01-2.6-.9l-.7-.5-.6-1a3.5 3.5 0 010-2.4l.8-1c.3-.3.7-.6 1.2-.8.4-.2 1-.2 1.6-.2a6 6 0 011.9.3 5 5 0 011.4.8l-.6 1.2zm6.6 6.7c.3 0 .6 0 .9-.2.3 0 .5-.2.7-.5l.4-.7.1-1V199h2.6v6.4a5 5 0 01-.4 1.9 4.1 4.1 0 01-2.4 2.4c-.5.2-1.2.3-2 .3-.7 0-1.3 0-1.9-.3-.6-.2-1-.6-1.5-1-.4-.4-.7-.8-.9-1.4-.2-.6-.3-1.2-.3-1.9v-6.4h2.5v6.4c0 .4 0 .8.2 1 0 .4.2.6.4.8.2.3.4.4.7.5l.9.2zm13.4-9v2h-4.5v2.8h3.7v2h-3.7v4.2h-2.6v-11h7zm8.3 0v2h-4.5v2.8h3.8v2h-3.8v4.2h-2.5v-11h7zm4.1 11H50v-11h2.5v11zm4.7-5.6l-3.4-5.3h2.9l.2.3L59 203l.1-.2.1-.1 2-3.3c0-.2.2-.3.4-.3h2.5l-3.5 5.2 3.5 5.7h-2.5l-.4-.1a1 1 0 01-.2-.3l-2.2-3.8-.2.3-2 3.5-.3.3-.3.1h-2.4l3.5-5.6z"/>
|
||||
<path id="P" d="M8 264v2H3.4v2.6h3.4v1.8H3.4v2.7H8v1.9H1v-11h7zm4 5.4L8.8 264h2.9l.2.3L14 268v-.2l.2-.1 1.9-3.3c0-.2.3-.3.5-.3H19l-3.4 5.2L19 275h-2.6l-.4-.1a1 1 0 01-.2-.3l-2.2-3.8-.1.3-2 3.5-.3.3-.4.1H8.6l3.5-5.6zm15.2 2.8h.2l.1.1 1 1c-.4.7-1 1-1.6 1.4-.7.3-1.5.4-2.4.4-.8 0-1.5-.1-2.2-.4a4.8 4.8 0 01-2.7-3 6.5 6.5 0 010-4.4 5.2 5.2 0 013-3 6.3 6.3 0 014.5 0 4.8 4.8 0 011.6 1.1l-.9 1.2-.2.1-.3.1H27l-.2-.2a45 45 0 00-1.2-.5 3.5 3.5 0 00-2 .2l-1 .7-.6 1-.2 1.5c0 .6 0 1 .2 1.5l.6 1.1a2.6 2.6 0 002 1l.7-.1a2.6 2.6 0 001.5-.7h.2l.2-.1zm9.5-8.1v2h-4.5v2.5h3.5v1.8h-3.5v2.7h4.5v1.9h-7v-11h7zm4 7.2v3.7h-2.5v-11H42c.8 0 1.4.2 2 .3.6.2 1 .5 1.4.8l.8 1.1.2 1.4c0 .6 0 1-.3 1.5a3 3 0 01-.8 1.2l-1.3.8c-.6.2-1.2.2-2 .2h-1.3zm0-1.9H42c.7 0 1.1-.1 1.4-.4.3-.4.5-.8.5-1.4l-.1-.6a1.4 1.4 0 00-1-1h-2.1v3.4zm15-5.3v2h-3.1v8.9H50v-9H47v-2h8.7zm3.8 10.9h-2.6v-11h2.6v11zm12.8-5.5c0 .8-.1 1.6-.4 2.2a5.3 5.3 0 01-5.3 3.4c-.8 0-1.6-.1-2.3-.4a5.3 5.3 0 01-3.4-5.2c0-.8.2-1.5.5-2.2a5.2 5.2 0 013-3c.6-.2 1.4-.3 2.2-.3a6 6 0 012.4.4 5.4 5.4 0 013.3 5.1zm-2.6 0c0-.5 0-1-.2-1.4a3 3 0 00-.6-1.1c-.3-.3-.6-.6-1-.7a3.4 3.4 0 00-2.6 0c-.4.1-.7.4-1 .7a3 3 0 00-.5 1l-.3 1.5c0 .6.1 1 .3 1.5 0 .4.3.8.6 1.1.2.3.5.5 1 .7l1.2.2c.5 0 1 0 1.3-.2l1-.7c.3-.3.5-.7.6-1.1l.2-1.5zm5.1-5.4h.5l.2.2.2.2 5.2 6.5a13.8 13.8 0 010-1.1V264H83V275h-1.9a1 1 0 01-.4-.4l-5.1-6.5a23.3 23.3 0 010 1v5.9h-2.2v-11h1.3z"/>
|
||||
<path id="Q" d="M31.8 334.5c0 .8-.1 1.6-.4 2.2a5.1 5.1 0 01-3 2.9c-.6.3-1.4.4-2.3.4H22v-11h4.2c.9 0 1.7.2 2.4.5s1.3.6 1.8 1.1c.5.5.8 1 1.1 1.8.3.6.4 1.3.4 2.1zm-2.6 0c0-.5 0-1-.2-1.4-.1-.5-.3-.8-.6-1.1-.3-.3-.6-.6-1-.7-.3-.2-.8-.3-1.3-.3h-1.7v7h1.7c.5 0 1 0 1.3-.2l1-.7c.3-.3.5-.7.6-1.1.2-.4.2-1 .2-1.5zm14.6 0c0 .8-.1 1.6-.4 2.2a5.3 5.3 0 01-5.3 3.4c-.8 0-1.6-.1-2.3-.4a5.3 5.3 0 01-3.3-5.2c0-.8.1-1.5.4-2.2a5.2 5.2 0 013-3c.6-.2 1.4-.3 2.2-.3a6 6 0 012.4.4 5.4 5.4 0 013.3 5.1zm-2.6 0c0-.5 0-1-.2-1.4a3 3 0 00-.6-1.1c-.3-.3-.6-.6-1-.7a3.4 3.4 0 00-2.6 0l-1 .7a3 3 0 00-.5 1c-.2.5-.2 1-.2 1.5 0 .6 0 1 .2 1.5.1.4.3.8.6 1.1.2.3.6.5 1 .7l1.2.2c.5 0 1 0 1.3-.2l1-.7c.3-.3.5-.7.6-1.1.2-.4.2-1 .2-1.5zm5.2-5.4h.4l.2.2.2.2 5.2 6.5a13.8 13.8 0 010-1.1V329h2.2V340H52.8a1 1 0 01-.4-.4l-5.2-6.5a23.3 23.3 0 010 1v5.9H45v-11h1.4zm17 0v2H59v2.5h3.5v1.8h-3.5v2.7h4.5v1.9h-7v-11h7z"/>
|
||||
<path id="R" d="M8 69v2H3.4v2.6h3.4v1.8H3.4V78H8v2H1V69h7zm4 5.4L8.8 69h2.9l.2.3L14 73v-.2l.2-.1 1.9-3.3c0-.2.3-.3.5-.3H19l-3.4 5.2L19 80h-2.6l-.4-.1a1 1 0 01-.2-.3l-2.2-3.8-.1.3-2 3.5-.3.3-.4.1H8.6l3.5-5.6zm15.2 2.8h.2l.1.1 1 1c-.4.7-1 1-1.6 1.4-.7.3-1.5.4-2.4.4-.8 0-1.5-.1-2.2-.4a4.8 4.8 0 01-2.7-3 6.5 6.5 0 010-4.4 5.2 5.2 0 013-3 6.3 6.3 0 014.5 0 4.8 4.8 0 011.6 1.1l-.9 1.2-.2.1-.3.1H27l-.2-.2a45 45 0 00-1.2-.5 3.5 3.5 0 00-2 .2l-1 .7-.6 1-.2 1.5c0 .6 0 1 .2 1.5l.6 1.1a2.6 2.6 0 002 1l.7-.1a2.6 2.6 0 001.5-.7h.2l.2-.1zm9.5-8.1v2h-4.5v2.5h3.5v1.8h-3.5V78h4.5v2h-7V69h7zm4 7.2V80h-2.5V69H42c.8 0 1.4.2 2 .3.6.2 1 .5 1.4.8l.8 1.1.2 1.4c0 .6 0 1-.3 1.5a3 3 0 01-.8 1.2l-1.3.8c-.6.2-1.2.2-2 .2h-1.3zm0-1.9H42c.7 0 1.1-.1 1.4-.4.3-.4.5-.8.5-1.4l-.1-.6a1.4 1.4 0 00-1-1h-2.1v3.4zm15-5.3v2h-3.1V80H50v-9H47v-2h8.7zM59.5 80h-2.6V69h2.6v11zm12.8-5.5c0 .8-.1 1.6-.4 2.2a5.3 5.3 0 01-5.3 3.4c-.8 0-1.6-.1-2.3-.4a5.3 5.3 0 01-3.4-5.2c0-.8.2-1.5.5-2.2a5.2 5.2 0 013-3c.6-.2 1.4-.3 2.2-.3a6 6 0 012.4.4 5.4 5.4 0 013.3 5.1zm-2.6 0c0-.5 0-1-.2-1.4a3 3 0 00-.6-1.1c-.3-.3-.6-.6-1-.7a3.4 3.4 0 00-2.6 0c-.4.1-.7.4-1 .7a3 3 0 00-.5 1l-.3 1.5c0 .6.1 1 .3 1.5 0 .4.3.8.6 1.1.2.3.5.5 1 .7l1.2.2c.5 0 1 0 1.3-.2l1-.7c.3-.3.5-.7.6-1.1l.2-1.5zm5.1-5.4h.5l.2.2.2.2 5.2 6.5a13.8 13.8 0 010-1.1V69H83V80h-1.9a1 1 0 01-.4-.4L75.8 73a23.3 23.3 0 010 1V80h-2.2V69h1.3z"/>
|
||||
</defs>
|
||||
<g fill="none" fill-rule="evenodd">
|
||||
<g stroke-linejoin="round" stroke-width="3.8">
|
||||
<path stroke="#3AC" d="M82.4 46.5v13h-60v12m60-25v13h21.8v12"/>
|
||||
<path fill="#C3E7F1" stroke="#3AC" d="M6 5h152.7v41.7H6z"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M195.8 46.5v25"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M168.5 5h54.6v41.7h-54.6z"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M261.3 46.5v25"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M234 5h54.5v41.7H234z"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M377 46.5v25"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M299.5 5h153.8v41.7H299.5z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M22.4 113v21.8"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M6 71.5h32.7v41.7H6z"/>
|
||||
<path stroke="#3AC" d="M104.2 113v12H76.9v9.8m27.3-21.8v12h31.6v9.8"/>
|
||||
<path fill="#C3E7F1" stroke="#3AC" d="M49.6 71.5h109.1v41.7H49.6z"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M195.8 113v21.8"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M168.5 71.5h54.6v41.7h-54.6z"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M261.3 113v21.8"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M234 71.5h54.5v41.7H234z"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M377 113v21.8"/>
|
||||
<path fill="#F5F5F5" stroke="#B7B7B7" d="M299.5 71.5h153.8v41.7H299.5z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M6 134.8h32.7v41.5H6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M77 176.3v26.2"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M49.6 134.8h54.6v41.5H49.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M195.8 176.3v26.2"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M168.5 134.8h54.6v41.5h-54.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M261.3 176.3v26.2"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M234 134.8h54.5v41.5H234z"/>
|
||||
<path stroke="#3AC" d="M377 176.3v14.2h-22v12m22-26.2v14.2h60v12"/>
|
||||
<path fill="#C3E7F1" stroke="#3AC" d="M299.5 134.8h153.8v41.5H299.5z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M135.8 176.3v26.2"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M114 134.8h43.6v41.5H114z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M22.4 244v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M6 202.2h32.7v41.7H6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M77 244v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M49.6 202.2h54.6v41.7H49.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M195.8 244v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M168.5 202.2h54.6v41.7h-54.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M261.3 244v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M234 202.2h54.5v41.7H234z"/>
|
||||
<path stroke="#3AC" d="M355 244v12h-21.7v13m21.8-25v12h37v13"/>
|
||||
<path fill="#C3E7F1" stroke="#3AC" d="M299.5 202.2h110.1v41.7H299.5z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M135.8 244v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M114 202.2h43.6v41.7H114z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M437 244v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M420.5 202.2h32.8v41.7h-32.8z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M22.4 310.5v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M6 268.7h32.7v41.8H6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M77 310.5v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M49.6 268.7h54.6v41.8H49.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M195.8 310.5v21.8-18.6 21.8"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M168.5 268.7h54.6v41.8h-54.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M234 268.7h54.5v41.8H234z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M333.3 310.5v25"/>
|
||||
<path fill="#C3E7F1" stroke="#3AC" d="M299.5 268.7H366v41.8h-66.5z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M135.8 310.5v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M114 268.7h43.6v41.8H114z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M437 310.5v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M420.5 268.7h32.8v41.8h-32.8z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M392.2 310.5v25"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M375.8 268.7h32.7v41.8h-32.7z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M8 335.5h28.7a2 2 0 012 2V375a2 2 0 01-2 2H8a2 2 0 01-2-2v-37.5c0-1 .9-2 2-2z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M49.6 335.5h54.6V377H49.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M168.5 335.5h54.6V377h-54.6z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M234 335.5h54.5V377H234z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M299.5 335.5H366V377h-66.5z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M114 335.5h43.6V377H114z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M420.5 335.5h32.8V377h-32.8z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M375.8 335.5h32.7V377h-32.7z"/>
|
||||
<path fill="#B5F3D4" stroke="#3AD787" d="M261.3 310.5v25"/>
|
||||
</g>
|
||||
<g fill-rule="nonzero">
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#a"/>
|
||||
<use fill="#1A1E23" xlink:href="#a"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#b"/>
|
||||
<use fill="#1A1E23" xlink:href="#b"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#c"/>
|
||||
<use fill="#1A1E23" xlink:href="#c"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#d"/>
|
||||
<use fill="#1A1E23" xlink:href="#d"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#e"/>
|
||||
<use fill="#1A1E23" xlink:href="#e"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#f"/>
|
||||
<use fill="#1A1E23" xlink:href="#f"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#g"/>
|
||||
<use fill="#1A1E23" xlink:href="#g"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#h"/>
|
||||
<use fill="#1A1E23" xlink:href="#h"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#i"/>
|
||||
<use fill="#1A1E23" xlink:href="#i"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#j"/>
|
||||
<use fill="#1A1E23" xlink:href="#j"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#k"/>
|
||||
<use fill="#1A1E23" xlink:href="#k"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#l"/>
|
||||
<use fill="#1A1E23" xlink:href="#l"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#m"/>
|
||||
<use fill="#1A1E23" xlink:href="#m"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#n"/>
|
||||
<use fill="#1A1E23" xlink:href="#n"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#o"/>
|
||||
<use fill="#1A1E23" xlink:href="#o"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#p"/>
|
||||
<use fill="#1A1E23" xlink:href="#p"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#q"/>
|
||||
<use fill="#1A1E23" xlink:href="#q"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#r"/>
|
||||
<use fill="#1A1E23" xlink:href="#r"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#s"/>
|
||||
<use fill="#1A1E23" xlink:href="#s"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#t"/>
|
||||
<use fill="#1A1E23" xlink:href="#t"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#u"/>
|
||||
<use fill="#1A1E23" xlink:href="#u"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#v"/>
|
||||
<use fill="#1A1E23" xlink:href="#v"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#w"/>
|
||||
<use fill="#1A1E23" xlink:href="#w"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#x"/>
|
||||
<use fill="#1A1E23" xlink:href="#x"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#y"/>
|
||||
<use fill="#1A1E23" xlink:href="#y"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#z"/>
|
||||
<use fill="#1A1E23" xlink:href="#z"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#A"/>
|
||||
<use fill="#1A1E23" xlink:href="#A"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#B"/>
|
||||
<use fill="#1A1E23" xlink:href="#B"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#C"/>
|
||||
<use fill="#1A1E23" xlink:href="#C"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#D"/>
|
||||
<use fill="#1A1E23" xlink:href="#D"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#E"/>
|
||||
<use fill="#1A1E23" xlink:href="#E"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#F"/>
|
||||
<use fill="#1A1E23" xlink:href="#F"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#G"/>
|
||||
<use fill="#1A1E23" xlink:href="#G"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#H"/>
|
||||
<use fill="#1A1E23" xlink:href="#H"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#I"/>
|
||||
<use fill="#1A1E23" xlink:href="#I"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#J"/>
|
||||
<use fill="#1A1E23" xlink:href="#J"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#K"/>
|
||||
<use fill="#1A1E23" xlink:href="#K"/>
|
||||
</g>
|
||||
<g transform="translate(6 11)">
|
||||
<use fill="#3D4251" xlink:href="#L"/>
|
||||
<use fill="#1A1E23" xlink:href="#L"/>
|
||||
</g>
|
||||
</g>
|
||||
<rect width="101" height="20" x="483" y="16" fill="#3AC" fill-rule="nonzero" stroke="#3AC" stroke-width="2.2" rx="10"/>
|
||||
<rect width="101" height="20" x="483" y="146" fill="#3AC" fill-rule="nonzero" stroke="#3AC" stroke-width="2.2" rx="10"/>
|
||||
<rect width="101" height="20" x="483" y="211" fill="#3AC" fill-rule="nonzero" stroke="#3AC" stroke-width="2.2" rx="10"/>
|
||||
<rect width="101" height="20" x="483" y="276" fill="#3AC" fill-rule="nonzero" stroke="#3AC" stroke-width="2.2" rx="10"/>
|
||||
<rect width="101" height="20" x="483" y="341" fill="#3AD787" fill-rule="nonzero" stroke="#3AD787" stroke-width="2.2" rx="10"/>
|
||||
<rect width="101" height="20" x="483" y="81" fill="#3AC" fill-rule="nonzero" stroke="#3AC" stroke-width="2.2" rx="10"/>
|
||||
<g fill-rule="nonzero">
|
||||
<g transform="translate(493 16)">
|
||||
<use fill="#000" xlink:href="#M"/>
|
||||
<use fill="#FFF" xlink:href="#M"/>
|
||||
</g>
|
||||
<g transform="translate(493 16)">
|
||||
<use fill="#000" xlink:href="#N"/>
|
||||
<use fill="#FFF" xlink:href="#N"/>
|
||||
</g>
|
||||
<g transform="translate(493 16)">
|
||||
<use fill="#000" xlink:href="#O"/>
|
||||
<use fill="#FFF" xlink:href="#O"/>
|
||||
</g>
|
||||
<g transform="translate(493 16)">
|
||||
<use fill="#000" xlink:href="#P"/>
|
||||
<use fill="#FFF" xlink:href="#P"/>
|
||||
</g>
|
||||
<g transform="translate(493 16)">
|
||||
<use fill="#000" xlink:href="#Q"/>
|
||||
<use fill="#FFF" xlink:href="#Q"/>
|
||||
</g>
|
||||
<g transform="translate(493 16)">
|
||||
<use fill="#000" xlink:href="#R"/>
|
||||
<use fill="#FFF" xlink:href="#R"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
|
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 45 KiB |
|
@ -1,40 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="612" height="280" viewBox="-10 -10 622 360">
|
||||
<style>
|
||||
.svg__trainloop__text { fill: #1a1e23; font: 18px Arial, sans-serif }
|
||||
.svg__trainloop__text-small { fill: #1a1e23; font: 16px Arial, sans-serif }
|
||||
</style>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M121 264h31.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M158.8 264l-8 4 2-4-2-4z"/>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M121 229h31.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M158.8 229l-8 4 2-4-2-4z"/>
|
||||
<rect width="120" height="71" x="1" y="211" fill="#f5f5f5" stroke="#666" stroke-width="2" rx="10.7" ry="10.7"/>
|
||||
<text class="svg__trainloop__text" dy="1em" transform="translate(11.5 236.5)" width="93" height="18">Training data</text>
|
||||
<path fill="none" stroke="#d6b656" stroke-width="2" stroke-miterlimit="10" d="M221 279v22h330v-30.8"/>
|
||||
<path fill="#d6b656" stroke="#d6b656" stroke-width="2" stroke-miterlimit="10" d="M551 264.2l4 8-4-2-4 2z"/>
|
||||
<path fill="#fff2cc" stroke="#d6b656" stroke-width="2" d="M161 249h120v30H161z"/>
|
||||
<text class="svg__trainloop__text-small" dy="1em" transform="translate(202.5 254.5)" width="35" height="18">label</text>
|
||||
<path fill="none" stroke="#9673a6" stroke-width="2" stroke-miterlimit="10" d="M281 229h36.8"/>
|
||||
<path fill="#9673a6" stroke="#9673a6" stroke-width="2" stroke-miterlimit="10" d="M323.8 229l-8 4 2-4-2-4z"/>
|
||||
<path fill="#e1d5e7" stroke="#9673a6" stroke-width="2" d="M161 214h120v30H161z"/>
|
||||
<text class="svg__trainloop__text-small" dy="1em" transform="translate(206.5 219.5)" width="27" height="18">text</text>
|
||||
<path fill="none" stroke="#9673a6" stroke-width="2" stroke-miterlimit="10" d="M446 229h36.8"/>
|
||||
<path fill="#9673a6" stroke="#9673a6" stroke-width="2" stroke-miterlimit="10" d="M488.8 229l-8 4 2-4-2-4z"/>
|
||||
<path fill="none" stroke="#f33" stroke-width="2" stroke-miterlimit="10" d="M416 194l30.6-48"/>
|
||||
<path fill="#f33" stroke="#f33" stroke-width="2" stroke-miterlimit="10" d="M449.8 141l-1 8.8-2.2-4-4.5-.3z"/>
|
||||
<path fill="#e1d5e7" stroke="#9673a6" stroke-width="2" d="M326 194h120v69H326z"/>
|
||||
<text class="svg__trainloop__text" dy="1em" transform="translate(371.5 218.5)" width="27" height="18">Doc</text>
|
||||
<path fill="none" stroke="#f33" stroke-width="2" stroke-miterlimit="10" d="M521 195l-35.2-49.3"/>
|
||||
<path fill="#f33" stroke="#f33" stroke-width="2" stroke-miterlimit="10" d="M482.3 140.8l8 4.2-4.5.7-2 4z"/>
|
||||
<path fill="#fff2cc" stroke="#d6b656" stroke-width="2" d="M491 195h120v67H491z"/>
|
||||
<text class="svg__trainloop__text" dy="1em" transform="translate(513.5 218.5)" width="73" height="18">Example</text>
|
||||
<path fill="none" stroke="#f33" stroke-width="2" stroke-miterlimit="10" d="M466 59V21h-40.8"/>
|
||||
<path fill="#f33" stroke="#f33" stroke-width="2" stroke-miterlimit="10" d="M419.2 21l8-4-2 4 2 4z"/>
|
||||
<path fill="#f99" stroke="#f33" stroke-width="2" stroke-miterlimit="10" d="M436 59h60l30 40-30 40h-60l-30-40z"/>
|
||||
<text class="svg__trainloop__text" dy="0.85em" transform="translate(442.5 90.5)" width="45" height="16">update</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M342 1h60l30 40-30 40h-60l-30-40z"/>
|
||||
<text class="svg__trainloop__text" dy="0.8em" transform="translate(360.5 32.5)" width="21" height="16">nlp</text>
|
||||
<path fill="none" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M341 99h56.8"/>
|
||||
<path fill="#82b366" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M403.8 99l-8 4 2-4-2-4z"/>
|
||||
<path fill="#d5e8d4" stroke="#82b366" stroke-width="2" stroke-miterlimit="10" d="M251 59h60l30 40-30 40h-60l-30-40z"/>
|
||||
<text class="svg__trainloop__text" dy="0.85em" transform="translate(245.5 90.5)" width="61" height="16">optimizer</text>
|
||||
</svg>
|
Before Width: | Height: | Size: 3.9 KiB |
|
@ -1,77 +1,118 @@
|
|||
<svg class="o-svg" xmlns="http://www.w3.org/2000/svg" viewBox="-10 -10 582 365" width="572" height="355">
|
||||
<style>
|
||||
.svg__vocab__text { fill: #1a1e23; font: 18px Arial, sans-serif }
|
||||
.svg__vocab__text-large { fill: #fff; font: bold 18px Arial, sans-serif; text-transform: uppercase }
|
||||
.svg__vocab__text-box { fill: #fff; font: bold 12px Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace }
|
||||
.svg__vocab__text-code { fill: #1a1e23; font: bold 12px Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace }
|
||||
</style>
|
||||
<rect width="570" height="88" x="1" y="135" fill="#d5e8d4" stroke="#82b366" stroke-width="2" rx="13.2" ry="13.2"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M444 164h100v40H444z"/>
|
||||
<text class="svg__vocab__text" dx="-0.5em" dy="1em" transform="translate(477.5 174.5)" width="31" height="17">31979...</text>
|
||||
<rect width="52" height="20" x="468" y="152" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dy="0.9em" width="44" height="12" transform="translate(471.5 155.5)">Lexeme</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M76 164h100v40H76z"/>
|
||||
<text class="svg__vocab__text" dx="-0.5em" dy="1em" width="23" height="17" transform="translate(113.5 174.5)">46904...</text>
|
||||
<rect width="52" height="20" x="100" y="152" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dy="0.9em" width="44" height="12" transform="translate(103.5 155.5)">Lexeme</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M263 164h100v40H263z"/>
|
||||
<text class="svg__vocab__text" dx="-0.7em" dy="1em" width="23" height="17" transform="translate(300.5 174.5)">37020...</text>
|
||||
<rect width="52" height="20" x="287" y="152" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dy="0.9em" width="44" height="12" transform="translate(290.5 155.5)">Lexeme</text>
|
||||
<rect width="570" height="88" x="1" y="246" fill="#f5f5f5" stroke="#666" stroke-width="2" rx="13.2" ry="13.2"/>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M444 275h100v40H444z"/>
|
||||
<text class="svg__vocab__text" dy="1em" width="55" height="17" transform="translate(465.5 285.5)">"coffee"</text>
|
||||
<rect width="52" height="20" x="468" y="263" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dx="-0.5em" dy="0.9em" width="28" height="12" transform="translate(479.5 266.5)">31979…</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M76 275h100v40H76z"/>
|
||||
<text class="svg__vocab__text" dy="1em" width="17" height="17" transform="translate(116.5 285.5)">"I"</text>
|
||||
<rect width="52" height="20" x="100" y="263" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dx="-0.7em" dy="0.9em" width="22" height="12" transform="translate(114.5 266.5)">46904…</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M263 275h100v40H263z"/>
|
||||
<text class="svg__vocab__text" dy="1em" width="41" height="17" transform="translate(291.5 285.5)">"love"</text>
|
||||
<rect width="52" height="20" x="287" y="263" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dx="-0.7em" dy="0.9em" width="22" height="12" transform="translate(301.5 266.5)">37020…</text>
|
||||
<rect width="570" height="110" x="1" y="1" fill="#e1d5e7" stroke="#9673a6" stroke-width="2" rx="16.5" ry="16.5"/>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M263 60h-78.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M178.2 60l8-4-2 4 2 4z"/>
|
||||
<rect fill="#E1D5E7" width="50" height="12" transform="translate(202.5 53.5)"/>
|
||||
<text class="svg__vocab__text-code" dx="0.5em" dy="1em" width="50" height="12" transform="translate(202.5 53.5)">nsubj</text>
|
||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M363 60h72.8"/>
|
||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M441.8 60l-8 4 2-4-2-4z"/>
|
||||
<rect fill="#E1D5E7" width="43" height="12" transform="translate(375.5 54.5)"/>
|
||||
<text class="svg__vocab__text-code" dx="0.5em" dy="1em" width="43" height="12" transform="translate(375.5 54.5)">dobj</text>
|
||||
<rect width="50" height="88" x="1" y="246" fill="#666" stroke="#666" stroke-width="2" rx="7.5" ry="7.5"/>
|
||||
<text class="svg__vocab__text-large" dx="-0.25em" dy="1em" width="53" height="36" transform="rotate(-90 162 155)">String</text>
|
||||
<text class="svg__vocab__text-large" dy="2em" width="53" height="36" transform="rotate(-90 162 155)">Store</text>
|
||||
<rect width="50" height="88" x="1" y="135" fill="#82b366" stroke="#82b366" stroke-width="2" rx="7.5" ry="7.5"/>
|
||||
<text class="svg__vocab__text-large" dx="-0.25em" dy="0.9em" width="47" height="17" transform="rotate(-90 109.5 93)">Vocab</text>
|
||||
<rect width="50" height="110" x="1" y="1" fill="#9673a6" stroke="#9673a6" stroke-width="2" rx="7.5" ry="7.5"/>
|
||||
<text class="svg__vocab__text-large" dx="-0.25em" dy="0.9em" width="31" height="17" transform="rotate(-90 44 27.5)">Doc</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M263 27h100v66H263z"/>
|
||||
<text class="svg__vocab__text" dy="1em" width="31" height="33" transform="translate(296.5 42.5)">love</text>
|
||||
<text class="svg__vocab__text-code" dy="2.8em" width="31" height="33" transform="translate(296.5 42.5)">VERB</text>
|
||||
<rect width="50" height="20" x="288" y="16" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dy="0.9em" transform="translate(294.5 19.5)">Token</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M76 27h100v66H76z"/>
|
||||
<text class="svg__vocab__text" dx="0.8em" dy="1em" width="29" height="33" transform="translate(110.5 42.5)">I</text>
|
||||
<text class="svg__vocab__text-code" dy="2.8em" width="29" height="33" transform="translate(110.5 42.5)">PRON</text>
|
||||
<rect width="50" height="20" x="105" y="17" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dy="0.9em" width="36" height="12" transform="translate(111.5 20.5)">Token</text>
|
||||
<path fill="#f5f5f5" stroke="#666" stroke-width="2" d="M444 27h100v66H444z"/>
|
||||
<text class="svg__vocab__text" dy="1em" width="45" height="33" transform="translate(470.5 42.5)">coffee</text>
|
||||
<text class="svg__vocab__text-code" dx="0.6em" dy="2.8em" width="45" height="33" transform="translate(470.5 42.5)">NOUN</text>
|
||||
<rect width="50" height="20" x="469" y="16" fill="#666" rx="3" ry="3"/>
|
||||
<text class="svg__vocab__text-box" dy="0.9em" width="36" height="12" transform="translate(475.5 19.5)">Token</text>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M126 141.8v-38.6"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M126 149.8l-2.7-8h5.4zM126 95.2l2.7 8h-5.4z"/>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M126 214.2v38.6"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M126 206.2l2.7 8h-5.4zM126 260.8l-2.7-8h5.4z"/>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M313 103.2v38.6"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M313 95.2l2.7 8h-5.4zM313 149.8l-2.7-8h5.4z"/>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M313 214.2v38.6"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M313 206.2l2.7 8h-5.4zM313 260.8l-2.7-8h5.4z"/>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M494 214.2v38.6"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M494 206.2l2.7 8h-5.4zM494 260.8l-2.7-8h5.4z"/>
|
||||
<path fill="none" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M494 103.2v38.6"/>
|
||||
<path fill="#666" stroke="#666" stroke-width="2" stroke-miterlimit="10" d="M494 95.2l2.7 8h-5.4zM494 149.8l-2.7-8h5.4z"/>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="584" height="348" viewBox="0 0 584 348">
|
||||
<g fill="none" fill-rule="evenodd">
|
||||
<g transform="translate(7 8)">
|
||||
<rect width="554" height="106" x="2.5" y="2.5" fill="#C3E7F1" stroke="#3AC" stroke-width="5" rx="15"/>
|
||||
<path fill="#3AC" stroke="#3AC" stroke-width="5" d="M58.5 108.5V2.5H16C8.5 2.5 2.5 8.5 2.5 16v79c0 7.5 6 13.5 13.5 13.5h42.5z"/>
|
||||
<g transform="translate(82 13)">
|
||||
<rect width="102" height="65" x="1.5" y="9.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M53.9 44h-2.8V29.5h2.8z"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M36.7 55.3a13.2 13.2 0 014.2-.1c.5 0 1 .2 1.3.5.4.2.7.5 1 1 .2.4.3 1 .3 1.6 0 .6 0 1.1-.3 1.5a3 3 0 01-.8 1c-.4.4-.8.6-1.2.7l-1.5.2h-.2a7.7 7.7 0 01-.9 0V65h-1.9v-9.7zm2.9 1.4a6.4 6.4 0 00-1 0V60h.2a3.5 3.5 0 00.5 0H40.2l.7-.3.4-.5c.2-.3.2-.6.2-1l-.1-.7-.5-.5-.6-.2h-.7zm5.4-1.4a17.2 17.2 0 012.8-.2H49l1.2.5a2.4 2.4 0 011.2 2.3c0 .4 0 .7-.2 1 0 .3-.1.6-.3.8l-.5.6-.6.4 2.3 4.3H50l-2-3.8H47V65h-2v-9.7zm3 1.5a12 12 0 00-1.1 0v2.9h.7c.6 0 1-.1 1.3-.4.4-.2.5-.6.5-1.2 0-.4 0-.7-.3-1-.3-.2-.7-.3-1.2-.3zm5 3.3c0-.8 0-1.5.2-2.1.2-.7.4-1.2.7-1.6.3-.5.7-.8 1.2-1 .4-.3 1-.4 1.6-.4.7 0 1.2.1 1.7.4.5.2.9.6 1.1 1 .3.5.6 1 .7 1.6l.2 2.1c0 1.6-.3 2.8-1 3.7-.5 1-1.5 1.4-2.7 1.4-.7 0-1.2-.1-1.7-.4-.5-.2-.9-.6-1.2-1-.3-.5-.5-1-.6-1.6A10 10 0 0153 60zm2 0v1.3l.3 1.1.5.7c.2.2.5.3.9.3.3 0 .6 0 .8-.2l.5-.6c.2-.3.3-.7.3-1.1a12.7 12.7 0 00-.2-3.9l-.5-.7c-.2-.2-.5-.3-.9-.3-.7 0-1.1.3-1.3.9A7 7 0 0055 60zm8.5-.9V65h-1.8v-9.8h1.7l3.4 6-.1-1.2v-4.8h1.8V65h-1.7l-3.3-5.8z"/>
|
||||
<g transform="translate(28)">
|
||||
<rect width="48" height="20" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M13.3 4.6V6h-2.9v8H8.7V6H5.8V4.6h7.5zM17 7.2c.5 0 1 .1 1.4.3a3 3 0 011.7 1.7l.2 1.5c0 .5 0 1-.2 1.4a2.9 2.9 0 01-1.7 1.8L17 14c-.5 0-1 0-1.3-.2A3 3 0 0114 12a4 4 0 01-.3-1.4c0-.6 0-1 .3-1.5.1-.4.3-.8.6-1a3 3 0 011-.7l1.4-.3zm0 5.7c.6 0 1-.2 1.3-.6.2-.4.4-1 .4-1.6 0-.8-.2-1.3-.4-1.7-.3-.4-.7-.5-1.3-.5-.5 0-1 .1-1.2.5-.3.4-.4 1-.4 1.7s.1 1.2.4 1.6c.2.4.7.6 1.2.6zm6.2-8.6V10h.5l.2-.2 1.7-2c0-.2.1-.2.2-.3H27.7l-2 2.4-.3.3-.2.2.2.2.2.3 2.2 3.2h-1.7l-.3-.3-1.7-2.5c0-.1 0-.2-.2-.2H23.3v3h-1.6V4.3h1.6zm8.3 3l1.2.1A2.5 2.5 0 0134.3 9a3.6 3.6 0 01.2 1.6l-.1.1-.1.1H30c0 .8.2 1.3.5 1.6.4.3.8.5 1.3.5l.7-.1.5-.2.4-.2.3-.1h.1l.2.1.4.6-.6.5a3.4 3.4 0 01-1.4.5l-.7.1c-.5 0-1 0-1.3-.2-.4-.2-.8-.4-1-.7-.4-.3-.6-.7-.8-1.1a4.3 4.3 0 010-2.9l.6-1a3 3 0 011-.7c.4-.2.9-.3 1.4-.3zm0 1c-.4 0-.8.2-1 .5a2 2 0 00-.6 1.1h3v-.6l-.3-.5-.5-.3-.6-.1zm4.2 5.7V7.3h1c.2 0 .3.1.4.3v.6a3.7 3.7 0 011-.7 2.4 2.4 0 011.1-.3c.4 0 .7 0 1 .2l.7.5c.2.3.4.5.4.8l.2 1V14H40V9.8c0-.5-.1-.8-.3-1-.2-.2-.5-.3-.8-.3-.3 0-.6 0-.8.2-.3 0-.5.3-.7.5V14h-1.6z"/>
|
||||
</g>
|
||||
</g>
|
||||
<g transform="translate(257 16)">
|
||||
<rect width="102" height="65" x="1.5" y="9.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M37.4 29.1V44h-2.5V29.1h2.5zm7.1 4.5c.8 0 1.5.1 2.1.4a4.5 4.5 0 012.6 2.7 6.3 6.3 0 010 4.4c-.2.6-.5 1.2-1 1.6-.4.5-1 .8-1.6 1a5.7 5.7 0 01-4.2 0 4.5 4.5 0 01-2.6-2.7c-.2-.6-.3-1.3-.3-2.1 0-.9 0-1.6.3-2.2l1-1.7c.5-.5 1-.8 1.7-1 .6-.3 1.3-.4 2-.4zm0 8.6c.9 0 1.5-.2 2-.8.4-.6.6-1.4.6-2.5s-.2-2-.7-2.6c-.4-.5-1-.8-1.9-.8-.8 0-1.5.3-1.9.8-.4.6-.6 1.5-.6 2.6 0 1 .2 2 .6 2.5.4.6 1 .9 2 .9zM56.7 44h-2.3l-4-10.3h2c.2 0 .3 0 .5.2l.2.3 2 5.5a10.3 10.3 0 01.5 1.8 18.2 18.2 0 01.5-1.8l2-5.5.3-.3.4-.2h2L56.6 44zm9.7-10.4a5 5 0 011.8.3 4 4 0 012.4 2.4 5.5 5.5 0 01.3 2.4l-.1.3-.2.1H64c0 1.1.3 2 .8 2.4.5.5 1.2.7 2 .7l1-.1c.3 0 .6-.2.8-.3l.6-.3.4-.2.3.1.2.2.7.9-1 .8a5.2 5.2 0 01-2.1.8 8 8 0 01-1.1 0c-.7 0-1.4 0-2-.3-.6-.2-1.2-.6-1.6-1a5 5 0 01-1-1.8 6.7 6.7 0 01-.1-4.3c.2-.6.5-1.2 1-1.6.4-.5.9-.9 1.5-1.1.6-.3 1.3-.4 2-.4zm0 1.8c-.6 0-1.2.2-1.6.6a3 3 0 00-.8 1.7h4.7c0-.3 0-.6-.2-1a2 2 0 00-.4-.7l-.6-.5-1-.1z"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M40 62.4l2-7.2h2L41 65H39l-3-9.8h2l2 7.2zm5-7.2h6.5v1.7H47v2.3h4.2v1.7H47v2.4h4.6V65H45v-9.8zm8.4.1a17.2 17.2 0 012.8-.2h1.3l1.2.5A2.4 2.4 0 0160 58c0 .4 0 .7-.2 1 0 .3-.1.6-.3.8l-.5.6-.6.4 2.3 4.3h-2.2l-2-3.8h-1.1V65h-2v-9.7zm3 1.5a12 12 0 00-1.1 0v2.9h.7c.6 0 1-.1 1.3-.4.4-.2.5-.6.5-1.2 0-.4 0-.7-.3-1-.3-.2-.7-.3-1.2-.3zm12 .7l-.1.7a2 2 0 01-.9 1.2l-.8.4c.3 0 .6.2.8.3a2 2 0 011.1 1.1l.2 1a2.6 2.6 0 01-1.2 2.3 4 4 0 01-1.2.5 6 6 0 01-1.5.1h-.6a16.3 16.3 0 01-2.4-.2v-9.6a17.9 17.9 0 014.3-.1c.4 0 .8.2 1.1.3.4.2.6.5.9.8.2.3.3.7.3 1.2zm-3.5 6h.6c.3 0 .5-.2.6-.3l.4-.4.2-.7c0-.3 0-.5-.2-.7L66 61a2 2 0 00-.6-.2h-1.8v2.7h.2a6.5 6.5 0 001 0zm-.7-4.2a8.6 8.6 0 001 0l.8-.5c.3-.3.4-.6.4-1l-.1-.5a1 1 0 00-.4-.4l-.5-.2h-.6a14.9 14.9 0 00-1.2 0v2.6h.6z"/>
|
||||
<g transform="translate(28)">
|
||||
<rect width="48" height="20" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M13.3 4.6V6h-2.9v8H8.7V6H5.8V4.6h7.5zM17 7.2c.5 0 1 .1 1.4.3a3 3 0 011.7 1.7l.2 1.5c0 .5 0 1-.2 1.4a2.9 2.9 0 01-1.7 1.8L17 14c-.5 0-1 0-1.3-.2A3 3 0 0114 12a4 4 0 01-.3-1.4c0-.6 0-1 .3-1.5.1-.4.3-.8.6-1a3 3 0 011-.7l1.4-.3zm0 5.7c.6 0 1-.2 1.3-.6.2-.4.4-1 .4-1.6 0-.8-.2-1.3-.4-1.7-.3-.4-.7-.5-1.3-.5-.5 0-1 .1-1.2.5-.3.4-.4 1-.4 1.7s.1 1.2.4 1.6c.2.4.7.6 1.2.6zm6.2-8.6V10h.5l.2-.2 1.7-2c0-.2.1-.2.2-.3H27.7l-2 2.4-.3.3-.2.2.2.2.2.3 2.2 3.2h-1.7l-.3-.3-1.7-2.5c0-.1 0-.2-.2-.2H23.3v3h-1.6V4.3h1.6zm8.3 3l1.2.1A2.5 2.5 0 0134.3 9a3.6 3.6 0 01.2 1.6l-.1.1-.1.1H30c0 .8.2 1.3.5 1.6.4.3.8.5 1.3.5l.7-.1.5-.2.4-.2.3-.1h.1l.2.1.4.6-.6.5a3.4 3.4 0 01-1.4.5l-.7.1c-.5 0-1 0-1.3-.2-.4-.2-.8-.4-1-.7-.4-.3-.6-.7-.8-1.1a4.3 4.3 0 010-2.9l.6-1a3 3 0 011-.7c.4-.2.9-.3 1.4-.3zm0 1c-.4 0-.8.2-1 .5a2 2 0 00-.6 1.1h3v-.6l-.3-.5-.5-.3-.6-.1zm4.2 5.7V7.3h1c.2 0 .3.1.4.3v.6a3.7 3.7 0 011-.7 2.4 2.4 0 011.1-.3c.4 0 .7 0 1 .2l.7.5c.2.3.4.5.4.8l.2 1V14H40V9.8c0-.5-.1-.8-.3-1-.2-.2-.5-.3-.8-.3-.3 0-.6 0-.8.2-.3 0-.5.3-.7.5V14h-1.6z"/>
|
||||
</g>
|
||||
</g>
|
||||
<g transform="translate(432 16)">
|
||||
<rect width="102" height="65" x="1.5" y="9.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M32.9 36l-.2.1-.3.1-.4-.1a11.9 11.9 0 00-1-.5l-.9-.1c-.4 0-.8 0-1.1.2l-.9.7a3 3 0 00-.4 1l-.2 1.5c0 .5 0 1 .2 1.4l.5 1c.2.4.5.6.8.7.3.2.7.3 1 .3a2.6 2.6 0 001.6-.5l.4-.3.5-.2c.2 0 .3.1.4.3l.7.9a4.5 4.5 0 01-1.9 1.3 7.6 7.6 0 01-2.2.4c-.5 0-1.1-.2-1.7-.4-.6-.2-1-.6-1.5-1a5 5 0 01-1-1.7c-.2-.7-.3-1.4-.3-2.3 0-.7 0-1.4.3-2 .2-.7.5-1.2 1-1.7.4-.5.9-.9 1.5-1.1.6-.3 1.3-.4 2.2-.4a5 5 0 012 .4c.5.2 1 .5 1.5 1l-.6 1zm6.7-2.4c.8 0 1.4.1 2 .4a4.5 4.5 0 012.7 2.7c.2.6.3 1.3.3 2.1 0 .9 0 1.6-.3 2.3-.3.6-.6 1.2-1 1.6-.5.5-1 .8-1.6 1-.7.3-1.3.4-2.1.4s-1.5 0-2.1-.3a4.5 4.5 0 01-2.6-2.8 6.2 6.2 0 010-4.4 4.5 4.5 0 012.6-2.7c.6-.2 1.3-.3 2-.3zm0 8.6c.8 0 1.5-.2 1.9-.8.4-.6.6-1.4.6-2.5s-.2-2-.6-2.6c-.4-.5-1-.8-2-.8-.8 0-1.4.3-1.8.8-.4.6-.7 1.5-.7 2.6 0 1 .3 2 .7 2.5.4.6 1 .9 1.9.9zM47 44v-8.4l-.9-.1a1 1 0 01-.4-.2.5.5 0 01-.2-.5v-1H47v-.7c0-.6.1-1.1.3-1.6a3.3 3.3 0 012-2 4.6 4.6 0 012.9 0v1.2l-.4.4h-.5a3 3 0 00-.8.1 1.3 1.3 0 00-1 1v1.6H52v1.8h-2.6V44H47zm7 0v-8.4l-.9-.1a1 1 0 01-.4-.2.5.5 0 01-.2-.5v-1H54v-.7c0-.6.1-1.1.3-1.6a3.3 3.3 0 012-2 4.6 4.6 0 012.9 0v1.2c0 .2-.2.3-.3.4h-.6a3 3 0 00-.8.1 1.3 1.3 0 00-1 1v1.6H59v1.8h-2.6V44H54zm10.8-10.4a5 5 0 011.8.3 4 4 0 012.3 2.4 5.5 5.5 0 01.3 2.4v.3l-.2.1h-6.6c0 1.1.3 2 .8 2.4.5.5 1.2.7 2 .7l1-.1c.3 0 .6-.2.8-.3l.5-.3.5-.2.3.1.2.2.7.9-1 .8a5.2 5.2 0 01-2.1.8 8 8 0 01-1.1 0c-.8 0-1.4 0-2-.3-.6-.2-1.2-.6-1.6-1a5 5 0 01-1.1-1.8 6.7 6.7 0 010-4.3c.2-.6.5-1.2 1-1.6.3-.5.9-.9 1.5-1.1.6-.3 1.3-.4 2-.4zm0 1.8c-.6 0-1.2.2-1.6.6a3 3 0 00-.8 1.7h4.7c0-.3 0-.6-.2-1a2 2 0 00-.4-.7l-.7-.5-1-.1zm10.7-1.8a5 5 0 011.8.3 4 4 0 012.3 2.4 5.5 5.5 0 01.3 2.4v.3l-.2.1H73c0 1.1.3 2 .8 2.4.5.5 1.2.7 2 .7l1-.1c.3 0 .5-.2.8-.3l.5-.3.5-.2.3.1.2.2.7.9-1 .8a5.2 5.2 0 01-2.1.8 8 8 0 01-1.2 0c-.7 0-1.3 0-2-.3-.6-.2-1-.6-1.5-1A5 5 0 0171 41a6.7 6.7 0 010-4.3c.2-.6.5-1.2 1-1.6.3-.5.8-.9 1.5-1.1.6-.3 1.3-.4 2-.4zm0 1.8c-.6 0-1.2.2-1.6.6a3 3 0 00-.8 1.7h4.6v-1a2 2 0 00-.5-.7l-.7-.5-1-.1z"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M38.3 59.2V65h-1.8v-9.8h1.7l3.4 6-.1-1.2v-4.8h1.8V65h-1.7l-3.3-5.8zm6.3.9c0-.8 0-1.5.2-2.1.2-.7.4-1.2.7-1.6.3-.5.7-.8 1.2-1 .4-.3 1-.4 1.6-.4.7 0 1.2.1 1.7.4.5.2.9.6 1.1 1 .3.5.6 1 .7 1.6l.2 2.1c0 1.6-.3 2.8-1 3.7-.5 1-1.5 1.4-2.7 1.4-.7 0-1.2-.1-1.7-.4-.5-.2-.9-.6-1.2-1-.3-.5-.5-1-.6-1.6a10 10 0 01-.2-2.1zm2 0v1.3l.3 1.1.5.7c.2.2.5.3.9.3.3 0 .6 0 .8-.2l.5-.6c.2-.3.3-.7.3-1.1a12.7 12.7 0 00-.2-3.9l-.5-.7c-.2-.2-.5-.3-.9-.3-.7 0-1.1.3-1.3.9a7 7 0 00-.4 2.5zm11.8-4.9h1.8v6.3c0 .6 0 1.2-.3 1.6a2.9 2.9 0 01-1.8 1.8c-.4.2-.8.3-1.3.3-1.2 0-2.1-.3-2.7-1-.6-.5-.9-1.4-.9-2.5v-6.5h2v6.1c0 .8 0 1.3.3 1.7.3.3.7.5 1.3.5.6 0 1-.2 1.2-.6.3-.3.4-.9.4-1.6v-6.1zm5 4l.1 1.2V65h-1.8v-9.8h1.7l3.4 6-.1-1.2v-4.8h1.8V65h-1.7l-3.3-5.8z"/>
|
||||
<g transform="translate(28)">
|
||||
<rect width="48" height="20" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M13.3 4.6V6h-2.9v8H8.7V6H5.8V4.6h7.5zM17 7.2c.5 0 1 .1 1.4.3a3 3 0 011.7 1.7l.2 1.5c0 .5 0 1-.2 1.4a2.9 2.9 0 01-1.7 1.8L17 14c-.5 0-1 0-1.3-.2A3 3 0 0114 12a4 4 0 01-.3-1.4c0-.6 0-1 .3-1.5.1-.4.3-.8.6-1a3 3 0 011-.7l1.4-.3zm0 5.7c.6 0 1-.2 1.3-.6.2-.4.4-1 .4-1.6 0-.8-.2-1.3-.4-1.7-.3-.4-.7-.5-1.3-.5-.5 0-1 .1-1.2.5-.3.4-.4 1-.4 1.7s.1 1.2.4 1.6c.2.4.7.6 1.2.6zm6.2-8.6V10h.5l.2-.2 1.7-2c0-.2.1-.2.2-.3H27.7l-2 2.4-.3.3-.2.2.2.2.2.3 2.2 3.2h-1.7l-.3-.3-1.7-2.5c0-.1 0-.2-.2-.2H23.3v3h-1.6V4.3h1.6zm8.3 3l1.2.1A2.5 2.5 0 0134.3 9a3.6 3.6 0 01.2 1.6l-.1.1-.1.1H30c0 .8.2 1.3.5 1.6.4.3.8.5 1.3.5l.7-.1.5-.2.4-.2.3-.1h.1l.2.1.4.6-.6.5a3.4 3.4 0 01-1.4.5l-.7.1c-.5 0-1 0-1.3-.2-.4-.2-.8-.4-1-.7-.4-.3-.6-.7-.8-1.1a4.3 4.3 0 010-2.9l.6-1a3 3 0 011-.7c.4-.2.9-.3 1.4-.3zm0 1c-.4 0-.8.2-1 .5a2 2 0 00-.6 1.1h3v-.6l-.3-.5-.5-.3-.6-.1zm4.2 5.7V7.3h1c.2 0 .3.1.4.3v.6a3.7 3.7 0 011-.7 2.4 2.4 0 011.1-.3c.4 0 .7 0 1 .2l.7.5c.2.3.4.5.4.8l.2 1V14H40V9.8c0-.5-.1-.8-.3-1-.2-.2-.5-.3-.8-.3-.3 0-.6 0-.8.2-.3 0-.5.3-.7.5V14h-1.6z"/>
|
||||
</g>
|
||||
</g>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M205.6 54h2.1l.2.8.9-.7c.3-.2.7-.3 1.2-.3.3 0 .6 0 .8.2.3 0 .5.2.7.3l.4.8.2 1.2V60h-1.6v-3.4c0-.5 0-.8-.3-1-.2-.3-.5-.4-.8-.4-.3 0-.6.1-.8.3-.3.1-.4.4-.5.6V60h-1.6v-4.7h-.9V54zm12 4.3l-.4-.3-.7-.3a31.3 31.3 0 00-1.9-.6c-.3-.1-.5-.3-.7-.6-.2-.2-.3-.5-.3-.8 0-.3 0-.6.2-.8l.5-.6c.3-.2.5-.3.8-.3l1-.2a5.6 5.6 0 011.9.3l.5.3c.2 0 .3.2.4.3l-.6 1a5.4 5.4 0 00-1.5-.6 2.7 2.7 0 00-1.3.1c-.2.1-.4.2-.4.4s.1.3.3.4l.8.2a130.2 130.2 0 001.9.6l.7.5c.2.3.3.6.3 1 0 .5-.2 1-.7 1.3-.4.4-1 .6-2 .6-.6 0-1.2-.1-1.7-.3-.5-.2-1-.5-1.2-.8l.7-1a3.7 3.7 0 001 .5l.7.2.7.1.6-.1c.2-.1.3-.2.3-.5zm6.3-4.3h2.2v4.7h.9V60h-1.9l-.3-.9-.8.8c-.4.2-.8.3-1.3.3-.3 0-.5 0-.8-.2-.3 0-.5-.2-.6-.3l-.5-.8a4 4 0 01-.1-1.2v-2.4h-.7V54h2.3v3.4c0 .5 0 .8.2 1 .2.3.4.4.7.4.3 0 .6-.1.8-.3.3-.2.4-.4.5-.6v-2.6h-.6V54zm3-2.4h2.5v2.9l.8-.5 1-.1c.8 0 1.4.2 2 .7.4.5.6 1.2.6 2.2 0 .6 0 1-.2 1.5a2.8 2.8 0 01-1.8 1.6c-.4.2-.9.2-1.4.2a4.9 4.9 0 01-2.5-.6v-6.6h-1v-1.3zm3.9 3.6h-.4a1.8 1.8 0 00-.7.5 1 1 0 00-.3.3v2.5a3.2 3.2 0 001.3.3h.6l.5-.4.3-.6.1-1c0-.5-.1-.9-.4-1.2-.2-.3-.6-.4-1-.4zm4.6-1.2h4.4v5.5c0 1-.2 1.7-.7 2.2-.5.5-1.2.8-2.1.8-.4 0-.8 0-1.2-.2l-.9-.5.7-1.2.5.3a1.8 1.8 0 001.3 0c.2 0 .3 0 .5-.2l.3-.6v-4.8h-2.8V54zm2-1.6c0-.3 0-.5.3-.7l.7-.2c.4 0 .6 0 .8.2.2.2.3.4.3.7 0 .2 0 .4-.3.6-.2.2-.4.2-.8.2-.3 0-.6 0-.7-.2a.8.8 0 01-.3-.6z"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M385.8 52.6h2.5v5.2a16.3 16.3 0 000 1.9h.9V61h-2l-.1-.8-.4.4a2.4 2.4 0 01-1.6.6c-.4 0-.7 0-1-.2-.4-.1-.7-.3-1-.6-.2-.2-.4-.5-.5-1a4 4 0 01-.2-1.3c0-.5 0-1 .2-1.4.1-.4.4-.7.6-1 .3-.3.6-.5 1-.6.4-.2.8-.2 1.2-.2h.8l.5.1v-1h-1v-1.4zm-.4 7.2c.4 0 .6 0 .9-.2.2-.1.3-.3.4-.6v-2.6l-.4-.2H385l-.5.4-.4.6-.1 1c0 .5 0 .9.3 1.2.3.3.7.4 1.1.4zm4.3-1.8l.2-1.3a2.8 2.8 0 011.7-1.7l1.3-.2c.5 0 1 .1 1.4.3.3.1.7.3 1 .6l.6 1 .2 1.3-.2 1.3a2.8 2.8 0 01-1.7 1.7l-1.3.2c-.5 0-1-.1-1.3-.3-.4-.1-.8-.3-1-.6l-.7-1-.2-1.3zm1.6 0l.1.7.3.6a1.4 1.4 0 001.2.5c.5 0 .8-.1 1.1-.4.3-.3.5-.8.5-1.4 0-.6-.2-1-.4-1.3-.3-.4-.7-.5-1.2-.5h-.6l-.5.4c-.2.1-.3.3-.4.6v.8zm5-5.4h2.5v2.9l.8-.5 1-.1c.8 0 1.4.2 2 .7.4.5.6 1.2.6 2.2 0 .6 0 1-.2 1.5a2.8 2.8 0 01-1.8 1.6c-.4.2-.9.2-1.4.2a4.9 4.9 0 01-2.5-.6v-6.6h-1v-1.3zm3.9 3.6h-.4a1.8 1.8 0 00-.7.5 1 1 0 00-.3.3v2.5a3.2 3.2 0 001.3.3h.6l.5-.4.3-.6.1-1c0-.5-.1-.9-.4-1.2-.2-.3-.6-.4-1-.4zm4.6-1.2h4.4v5.5c0 1-.2 1.7-.7 2.2-.5.5-1.2.8-2.1.8-.4 0-.8 0-1.2-.2l-.9-.5.7-1.2.5.3a1.8 1.8 0 001.3 0c.2 0 .3 0 .5-.2l.3-.6v-4.8h-2.8V55zm2-1.6c0-.3 0-.5.3-.7l.7-.2c.4 0 .6 0 .8.2.2.2.3.4.3.7 0 .2 0 .4-.3.6-.2.2-.4.2-.8.2-.3 0-.6 0-.7-.2a.8.8 0 01-.3-.6z"/>
|
||||
<path fill="#3AC" d="M189 57l6.4-3v6z"/>
|
||||
<path stroke="#3AC" stroke-linecap="square" stroke-width="1.4" d="M196 56.9h6.5"/>
|
||||
<path fill="#3AC" d="M428 56.7l-6.4 3v-6z"/>
|
||||
<path stroke="#3AC" stroke-linecap="square" stroke-width="1.4" d="M421 56.9h-6.5"/>
|
||||
<path stroke="#3AC" stroke-linecap="square" stroke-width="1.4" d="M375.5 56.9H367"/>
|
||||
<path stroke="#3AC" stroke-linecap="square" stroke-width="1.4" d="M252.5 56.9H244"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M30 66.1c1 0 2 .2 2.9.6a6.7 6.7 0 013.8 3.8c.3.9.5 1.9.5 3V79H22.8v-5.6c0-1 .1-2 .5-3a6.8 6.8 0 013.8-3.7c.9-.4 1.8-.6 2.9-.6zm0 2.8c-.8 0-1.5 0-2.2.3-.6.2-1.1.5-1.6 1a4 4 0 00-1 1.3 5 5 0 00-.3 2v2.8H35v-2.9a5 5 0 00-.4-1.9 4 4 0 00-1-1.4c-.4-.4-1-.7-1.6-.9-.6-.2-1.3-.3-2.1-.3zM30 50c1 0 2 .2 3 .6a6.7 6.7 0 013.9 3.8c.3.9.5 1.9.5 3 0 1-.2 2-.6 3a6.8 6.8 0 01-3.9 3.8 8 8 0 01-3 .5c-1 0-2-.2-2.9-.5a7 7 0 01-3.9-3.8c-.3-1-.5-2-.5-3 0-1.1.2-2.1.6-3a6.8 6.8 0 013.8-3.8 8 8 0 013-.5zm0 2.8c-.8 0-1.5.1-2.2.3-.6.2-1.1.5-1.6 1-.4.3-.7.8-1 1.4a5 5 0 00-.3 1.9c0 .7.1 1.3.3 1.9l1 1.4 1.6 1c.7.2 1.4.3 2.2.3.8 0 1.5-.1 2.1-.4a4 4 0 002.6-2.4 5 5 0 00.4-1.8 5 5 0 00-.4-2 4 4 0 00-1-1.4c-.4-.4-1-.7-1.6-.9-.6-.2-1.3-.3-2.1-.3zm3.8-14.8c0-.2 0-.3.2-.4l1.1-1c.8.5 1.3 1.3 1.7 2.1.4.9.6 1.9.6 3.1 0 1-.2 2-.6 2.9a6.4 6.4 0 01-3.8 3.6 8.8 8.8 0 01-6 0 6.8 6.8 0 01-3.9-4 8 8 0 010-5.8c.4-.8.8-1.5 1.4-2l1.2.9c.1 0 .2 0 .2.2l.1.3c0 .2 0 .3-.2.5a5.8 5.8 0 00-.7 1.5c-.2.4-.2.9-.2 1.5a4.2 4.2 0 001.4 3.3c.4.4 1 .7 1.6 1 .6.2 1.3.3 2 .3.9 0 1.6 0 2.3-.3l1.6-1a4 4 0 001-1.4c.2-.5.3-1 .3-1.7v-1a4.2 4.2 0 00-.6-1.5l-.5-.7-.1-.2v-.2z"/>
|
||||
</g>
|
||||
<g transform="translate(7 128)">
|
||||
<rect width="554" height="89" x="2.5" y="7.5" fill="#D7CCF4" stroke="#8978B5" stroke-width="5" rx="15"/>
|
||||
<path fill="#8978B5" stroke="#8978B5" stroke-width="5" d="M58.5 96.5v-89H16c-7.5 0-13.5 6-13.5 13.5v62c0 7.5 6 13.5 13.5 13.5h42.5z"/>
|
||||
<g transform="translate(82 21)">
|
||||
<rect width="102" height="49" x="1.5" y="8.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M30 37.3h-1.7V40h-1.7v-2.7h-4.2V36l4.4-5.9h1.5v5.7H30v1.5zm-3.4-4.6l-2.4 3.1h2.4v-3zM37.9 37a3.1 3.1 0 01-1 2.3c-.2.2-.6.5-1 .6-.4.2-.9.3-1.3.3-.5 0-1 0-1.4-.2a3 3 0 01-1.9-1.9c-.2-.5-.2-1-.2-1.6a6.4 6.4 0 011.7-4.5 6.4 6.4 0 013.5-2l.4 1.4a5.4 5.4 0 00-2.5 1.3l-.8 1c-.3.3-.4.7-.5 1l.8-.5a3.3 3.3 0 012.4 0 2.7 2.7 0 011.6 1.5c.2.4.2.8.2 1.3zm-1.7 0c0-.5-.2-.9-.5-1.1-.3-.3-.7-.4-1.2-.4a2 2 0 00-1.7.8v.7l.1.6a1.6 1.6 0 00.9 1 1.8 1.8 0 001.3 0l.5-.4.4-.5.2-.7zm3.3-3.7c0-.5 0-1 .2-1.3a3 3 0 011.8-1.8l1.3-.2c.6 0 1 0 1.5.2a3 3 0 011.8 2c.2.4.2.9.2 1.5 0 1-.1 2-.4 2.8a4.9 4.9 0 01-2.8 3.2l-2 .5-.3-1.4 1.4-.4c.4-.2.8-.4 1-.7.4-.3.6-.6.8-1 .2-.3.4-.7.4-1l-.7.4-1.2.2c-.3 0-.7 0-1-.2a3 3 0 01-1.8-1.5 3 3 0 01-.2-1.3zm1.7-.1a1.6 1.6 0 00.6 1.2l.5.3h.7l1-.1.6-.5v-.5l-.1-.8a2 2 0 00-.3-.7 1.7 1.7 0 00-1.4-.6c-.4 0-.8.1-1.1.4-.3.3-.5.7-.5 1.3zm6.4 1.9c0-.9.1-1.6.3-2.2.2-.7.4-1.2.7-1.6.4-.5.8-.8 1.2-1l1.5-.3c1.2 0 2 .4 2.7 1.2.7.9 1 2.1 1 3.9 0 .8-.1 1.6-.3 2.2-.2.7-.4 1.2-.7 1.6a3 3 0 01-1.2 1l-1.5.3c-1.2 0-2.1-.4-2.7-1.3-.7-.9-1-2.2-1-3.8zm5.6 0a12.4 12.4 0 000-.8l-3.5 3c.3 1 .8 1.4 1.6 1.4.6 0 1-.3 1.4-.9.3-.6.5-1.5.5-2.7zm-3.8 0a10.8 10.8 0 000 .8l3.5-3.2c-.3-.8-.8-1.2-1.6-1.2-.6 0-1 .3-1.4 1-.3.5-.5 1.4-.5 2.6zm14.2 2.2h-1.7V40h-1.7v-2.7H56V36l4.4-5.9h1.5v5.7h1.7v1.5zm-3.4-4.6l-2.4 3.1h2.4v-3zm6.8 6.4c0-.4 0-.6.3-.8.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8zm8.4 0c0-.4 0-.6.3-.8.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8zm8.4 0c0-.4 0-.6.3-.8.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8z"/>
|
||||
<rect width="54" height="20" x="25" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M32 12.6h3.7V14h-5.5V4.6h1.7v8zm7.6-5.4c.4 0 .8 0 1.1.2A2.5 2.5 0 0142.2 9a3.6 3.6 0 01.2 1.6v.1l-.1.1H38c0 .8.2 1.3.5 1.6.3.3.8.5 1.3.5l.7-.1.5-.2.3-.2.3-.1h.2l.1.1.5.6-.6.5a3.4 3.4 0 01-1.4.5l-.8.1c-.4 0-.8 0-1.2-.2-.4-.2-.8-.4-1-.7-.4-.3-.6-.7-.8-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.9-.3 1.4-.3zm0 1.2c-.5 0-.8.1-1.1.4a2 2 0 00-.5 1.1h3v-.6l-.3-.5-.5-.3-.6-.1zm5.7 2.2l-2.1-3.3H45l.1.2 1.4 2.3a1.8 1.8 0 01.2-.5l1.1-1.7.2-.2h1.7l-2.2 3 2.3 3.6H47.9l-.1-.3-1.5-2.3-.1.4-1.3 1.9-.1.2-.3.1h-1.4l2.2-3.4zm8.2-3.4c.5 0 .8 0 1.2.2A2.5 2.5 0 0156.2 9a3.6 3.6 0 01.2 1.6v.1l-.2.1H52c0 .8.3 1.3.6 1.6.3.3.7.5 1.3.5l.6-.1.5-.2.4-.2.3-.1h.2l.1.1.5.6-.6.5a3.4 3.4 0 01-1.5.5l-.7.1c-.4 0-.9 0-1.3-.2l-1-.7-.7-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.8-.3 1.3-.3zm0 1.2c-.4 0-.8.1-1 .4a2 2 0 00-.5 1.1h3l-.1-.6c0-.2-.1-.3-.3-.5l-.4-.3-.6-.1zm4.2 5.6V7.3h1c.2 0 .4.1.4.3l.1.5.4-.3a2.4 2.4 0 01.9-.5h.5c.4 0 .8 0 1 .3.3.2.5.5.7.9a2 2 0 01.9-1l.6-.2a2.9 2.9 0 011.6.1l.7.5.4.8.2 1V14h-1.6V9.8c0-.5-.1-.8-.3-1-.2-.2-.4-.3-.8-.3h-.5a1 1 0 00-.6.7V14h-1.7V9.8c0-.5 0-.8-.3-1-.1-.2-.4-.3-.8-.3l-.6.1-.6.5V14h-1.6zm13.7-6.8c.5 0 .8 0 1.2.2A2.5 2.5 0 0174 9a3.6 3.6 0 01.2 1.6v.1l-.2.1H70c0 .8.2 1.3.5 1.6.3.3.7.5 1.3.5l.6-.1.5-.2.4-.2.3-.1h.2v.1l.6.6-.6.5a3.4 3.4 0 01-1.5.5l-.7.1c-.5 0-.9 0-1.3-.2l-1-.7c-.3-.3-.6-.7-.7-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.8-.3 1.3-.3zm0 1.2c-.4 0-.8.1-1 .4a2 2 0 00-.5 1.1h3l-.1-.6c0-.2-.2-.3-.3-.5l-.4-.3-.6-.1z"/>
|
||||
</g>
|
||||
<g transform="translate(432 21)">
|
||||
<rect width="102" height="49" x="1.5" y="8.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M25.4 38.7c.3 0 .6 0 .8-.2.3 0 .5-.2.7-.3l.4-.5.2-.6-.2-.7-.5-.5-.7-.2-1-.1h-.8v-1l2.4-2.9H23v-1.5H29v1.3l-2.5 2.8h.3c.3 0 .7 0 1 .2a2.4 2.4 0 011.4 1.3l.2 1.2a2.9 2.9 0 01-1.2 2.4 5.2 5.2 0 01-2.7.8 7.2 7.2 0 01-2.5-.5l.5-1.5.4.2a8.4 8.4 0 001 .2h.6zm6.4-.2h2.1v-6.2l-1.8 1.5-.9-1.2 3.2-2.4h1.3v8.3h2V40h-6v-1.5zm7.7-5.2c0-.5 0-1 .2-1.3a3 3 0 011.8-1.8l1.3-.2c.6 0 1 0 1.5.2a3 3 0 011.8 2c.2.4.2.9.2 1.5 0 1-.1 2-.4 2.8a4.9 4.9 0 01-2.8 3.2l-2 .5-.3-1.4 1.4-.4c.4-.2.8-.4 1-.7.4-.3.6-.6.8-1 .2-.3.4-.7.4-1l-.7.4-1.2.2c-.3 0-.7 0-1-.2a3 3 0 01-1.8-1.5 3 3 0 01-.2-1.3zm1.7-.1a1.6 1.6 0 00.6 1.2l.5.3h.7l1-.1.6-.5v-.5l-.1-.8a2 2 0 00-.3-.7 1.7 1.7 0 00-1.4-.6c-.4 0-.8.1-1.1.4-.3.3-.5.7-.5 1.3zm7.3 6.8l4-8.1H48v-1.7h6.4v1.4l-4 8.4h-1.9zm7.8-6.7c0-.5 0-1 .2-1.3a3 3 0 011.8-1.8l1.3-.2c.6 0 1 0 1.5.2a3 3 0 011.8 2c.2.4.2.9.2 1.5 0 1-.1 2-.4 2.8a4.9 4.9 0 01-2.8 3.2l-2 .5-.3-1.4 1.4-.4c.4-.2.8-.4 1-.7.4-.3.6-.6.8-1 .2-.3.4-.7.4-1l-.7.4-1.1.2c-.4 0-.8 0-1.2-.2a3 3 0 01-1.7-1.5 3 3 0 01-.2-1.3zm1.7-.1a1.6 1.6 0 00.6 1.2l.5.3h.7l1-.1.6-.5v-.5l-.1-.8a2 2 0 00-.3-.7 1.7 1.7 0 00-1.4-.6c-.4 0-.8.1-1.1.4-.3.3-.5.7-.5 1.3zm9 5.9c0-.4 0-.6.3-.8.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8zm8.4 0c0-.4 0-.6.3-.8.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8zm8.4 0c0-.4 0-.6.3-.8.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8z"/>
|
||||
<rect width="54" height="20" x="25" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M32 12.6h3.7V14h-5.5V4.6h1.7v8zm7.6-5.4c.4 0 .8 0 1.1.2A2.5 2.5 0 0142.2 9a3.6 3.6 0 01.2 1.6v.1l-.1.1H38c0 .8.2 1.3.5 1.6.3.3.8.5 1.3.5l.7-.1.5-.2.3-.2.3-.1h.2l.1.1.5.6-.6.5a3.4 3.4 0 01-1.4.5l-.8.1c-.4 0-.8 0-1.2-.2-.4-.2-.8-.4-1-.7-.4-.3-.6-.7-.8-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.9-.3 1.4-.3zm0 1.2c-.5 0-.8.1-1.1.4a2 2 0 00-.5 1.1h3v-.6l-.3-.5-.5-.3-.6-.1zm5.7 2.2l-2.1-3.3H45l.1.2 1.4 2.3a1.8 1.8 0 01.2-.5l1.1-1.7.2-.2h1.7l-2.2 3 2.3 3.6H47.9l-.1-.3-1.5-2.3-.1.4-1.3 1.9-.1.2-.3.1h-1.4l2.2-3.4zm8.2-3.4c.5 0 .8 0 1.2.2A2.5 2.5 0 0156.2 9a3.6 3.6 0 01.2 1.6v.1l-.2.1H52c0 .8.3 1.3.6 1.6.3.3.7.5 1.3.5l.6-.1.5-.2.4-.2.3-.1h.2l.1.1.5.6-.6.5a3.4 3.4 0 01-1.5.5l-.7.1c-.4 0-.9 0-1.3-.2l-1-.7-.7-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.8-.3 1.3-.3zm0 1.2c-.4 0-.8.1-1 .4a2 2 0 00-.5 1.1h3l-.1-.6c0-.2-.1-.3-.3-.5l-.4-.3-.6-.1zm4.2 5.6V7.3h1c.2 0 .4.1.4.3l.1.5.4-.3a2.4 2.4 0 01.9-.5h.5c.4 0 .8 0 1 .3.3.2.5.5.7.9a2 2 0 01.9-1l.6-.2a2.9 2.9 0 011.6.1l.7.5.4.8.2 1V14h-1.6V9.8c0-.5-.1-.8-.3-1-.2-.2-.4-.3-.8-.3h-.5a1 1 0 00-.6.7V14h-1.7V9.8c0-.5 0-.8-.3-1-.1-.2-.4-.3-.8-.3l-.6.1-.6.5V14h-1.6zm13.7-6.8c.5 0 .8 0 1.2.2A2.5 2.5 0 0174 9a3.6 3.6 0 01.2 1.6v.1l-.2.1H70c0 .8.2 1.3.5 1.6.3.3.7.5 1.3.5l.6-.1.5-.2.4-.2.3-.1h.2v.1l.6.6-.6.5a3.4 3.4 0 01-1.5.5l-.7.1c-.5 0-.9 0-1.3-.2l-1-.7c-.3-.3-.6-.7-.7-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.8-.3 1.3-.3zm0 1.2c-.4 0-.8.1-1 .4a2 2 0 00-.5 1.1h3l-.1-.6c0-.2-.2-.3-.3-.5l-.4-.3-.6-.1z"/>
|
||||
</g>
|
||||
<g transform="translate(257 21)">
|
||||
<rect width="102" height="49" x="1.5" y="8.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M25.4 38.7c.3 0 .6 0 .8-.2.3 0 .5-.2.7-.3l.4-.5.2-.6-.2-.7-.5-.5-.7-.2-1-.1h-.8v-1l2.4-2.9H23v-1.5H29v1.3l-2.5 2.8h.3c.3 0 .7 0 1 .2a2.4 2.4 0 011.4 1.3l.2 1.2a2.9 2.9 0 01-1.2 2.4 5.2 5.2 0 01-2.7.8 7.2 7.2 0 01-2.5-.5l.5-1.5.4.2a8.4 8.4 0 001 .2h.6zm6.3 1.3l4-8.1h-4.5v-1.7h6.4v1.4l-4 8.4h-1.9zm7.5-5c0-.8.1-1.5.3-2.1.2-.7.4-1.2.7-1.6.4-.5.8-.8 1.2-1l1.5-.3c1.2 0 2 .4 2.7 1.2.7.9 1 2.1 1 3.9 0 .8-.1 1.6-.3 2.2-.2.7-.4 1.2-.7 1.6a3 3 0 01-1.2 1l-1.5.3c-1.2 0-2.1-.4-2.7-1.3-.7-.9-1-2.2-1-3.8zm5.6 0a12.4 12.4 0 000-.7l-3.5 3c.3 1 .8 1.4 1.6 1.4.6 0 1-.3 1.4-.9.3-.6.5-1.5.5-2.7zM41 35a10.8 10.8 0 000 .9l3.5-3.2c-.3-.8-.8-1.2-1.6-1.2-.6 0-1 .3-1.4 1-.3.5-.5 1.4-.5 2.6zm13.4 5h-6.2v-1.4a12.3 12.3 0 001-.9 27 27 0 002.7-3.1l.4-.8v-.7-.6l-.4-.4-.5-.4a2 2 0 00-.7 0 2.6 2.6 0 00-1.8.6l-.7-1.3 1.3-.7a4.5 4.5 0 012.8-.1c.4 0 .7.2 1 .5a2.4 2.4 0 011 2 4.2 4.2 0 01-.8 2.3 12.8 12.8 0 01-2 2.3 16 16 0 01-1.1 1.2h4V40zm1.6-5c0-.8.1-1.5.3-2.1.2-.7.4-1.2.7-1.6.4-.5.8-.8 1.2-1l1.5-.3c1.2 0 2 .4 2.7 1.2.7.9 1 2.1 1 3.9 0 .8-.1 1.6-.3 2.2-.2.7-.4 1.2-.7 1.6a3 3 0 01-1.2 1l-1.5.3c-1.2 0-2.1-.4-2.7-1.3-.7-.9-1-2.2-1-3.8zm5.6 0a12.4 12.4 0 000-.7l-3.5 3c.3 1 .8 1.4 1.6 1.4.6 0 1-.3 1.4-.9.3-.6.5-1.5.5-2.7zm-3.8 0a10.8 10.8 0 000 .9l3.5-3.2c-.3-.8-.8-1.2-1.6-1.2-.6 0-1 .3-1.4 1-.3.5-.5 1.4-.5 2.6zm9.2 4c0-.3 0-.5.3-.7.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8zm8.4 0c0-.3 0-.5.3-.7.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8zm8.4 0c0-.3 0-.5.3-.7.2-.2.4-.3.8-.3.3 0 .6.1.8.3.2.2.3.4.3.8 0 .3 0 .6-.3.8-.2.2-.5.3-.8.3-.4 0-.6-.1-.8-.3a1 1 0 01-.3-.8z"/>
|
||||
<rect width="54" height="20" x="25" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M32 12.6h3.7V14h-5.5V4.6h1.7v8zm7.6-5.4c.4 0 .8 0 1.1.2A2.5 2.5 0 0142.2 9a3.6 3.6 0 01.2 1.6v.1l-.1.1H38c0 .8.2 1.3.5 1.6.3.3.8.5 1.3.5l.7-.1.5-.2.3-.2.3-.1h.2l.1.1.5.6-.6.5a3.4 3.4 0 01-1.4.5l-.8.1c-.4 0-.8 0-1.2-.2-.4-.2-.8-.4-1-.7-.4-.3-.6-.7-.8-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.9-.3 1.4-.3zm0 1.2c-.5 0-.8.1-1.1.4a2 2 0 00-.5 1.1h3v-.6l-.3-.5-.5-.3-.6-.1zm5.7 2.2l-2.1-3.3H45l.1.2 1.4 2.3a1.8 1.8 0 01.2-.5l1.1-1.7.2-.2h1.7l-2.2 3 2.3 3.6H47.9l-.1-.3-1.5-2.3-.1.4-1.3 1.9-.1.2-.3.1h-1.4l2.2-3.4zm8.2-3.4c.5 0 .8 0 1.2.2A2.5 2.5 0 0156.2 9a3.6 3.6 0 01.2 1.6v.1l-.2.1H52c0 .8.3 1.3.6 1.6.3.3.7.5 1.3.5l.6-.1.5-.2.4-.2.3-.1h.2l.1.1.5.6-.6.5a3.4 3.4 0 01-1.5.5l-.7.1c-.4 0-.9 0-1.3-.2l-1-.7-.7-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.8-.3 1.3-.3zm0 1.2c-.4 0-.8.1-1 .4a2 2 0 00-.5 1.1h3l-.1-.6c0-.2-.1-.3-.3-.5l-.4-.3-.6-.1zm4.2 5.6V7.3h1c.2 0 .4.1.4.3l.1.5.4-.3a2.4 2.4 0 01.9-.5h.5c.4 0 .8 0 1 .3.3.2.5.5.7.9a2 2 0 01.9-1l.6-.2a2.9 2.9 0 011.6.1l.7.5.4.8.2 1V14h-1.6V9.8c0-.5-.1-.8-.3-1-.2-.2-.4-.3-.8-.3h-.5a1 1 0 00-.6.7V14h-1.7V9.8c0-.5 0-.8-.3-1-.1-.2-.4-.3-.8-.3l-.6.1-.6.5V14h-1.6zm13.7-6.8c.5 0 .8 0 1.2.2A2.5 2.5 0 0174 9a3.6 3.6 0 01.2 1.6v.1l-.2.1H70c0 .8.2 1.3.5 1.6.3.3.7.5 1.3.5l.6-.1.5-.2.4-.2.3-.1h.2v.1l.6.6-.6.5a3.4 3.4 0 01-1.5.5l-.7.1c-.5 0-.9 0-1.3-.2l-1-.7c-.3-.3-.6-.7-.7-1.1a4.3 4.3 0 010-2.9c.1-.4.3-.7.6-1a3 3 0 011-.7c.4-.2.8-.3 1.3-.3zm0 1.2c-.4 0-.8.1-1 .4a2 2 0 00-.5 1.1h3l-.1-.6c0-.2-.2-.3-.3-.5l-.4-.3-.6-.1z"/>
|
||||
</g>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M22.4 88.2v-2.1a1 1 0 01.6-1l8.8-3.3a12 12 0 012-.7l-2-.5-8.8-3.4a1 1 0 01-.4-.3.9.9 0 01-.2-.6v-2.2L36.8 80v2.4l-14.4 5.8zm7.2-29.5c1 0 2 .2 3 .6a6.7 6.7 0 013.9 3.8c.3.9.5 1.9.5 3 0 1-.2 2-.6 3a6.8 6.8 0 01-3.9 3.8 8 8 0 01-3 .5c-1 0-2-.2-2.9-.5a7 7 0 01-3.8-3.8c-.4-1-.6-2-.6-3 0-1.1.2-2.1.6-3a6.8 6.8 0 013.8-3.8 8 8 0 013-.6zm0 2.8c-.8 0-1.5.1-2.1.3-.7.2-1.2.5-1.6 1-.5.3-.8.8-1 1.4a5 5 0 00-.4 1.9c0 .7.1 1.3.3 1.9l1 1.4 1.7 1c.6.2 1.3.3 2 .3.9 0 1.6-.1 2.2-.4a4 4 0 002.6-2.4 5 5 0 00.4-1.8 5 5 0 00-.4-2 4 4 0 00-1-1.4c-.4-.4-1-.7-1.6-.9-.6-.2-1.3-.3-2.1-.3zm3.8-14.8c0-.2 0-.3.2-.4l1.1-1c.8.5 1.3 1.3 1.7 2.1.4.9.6 1.9.6 3.1 0 1-.2 2-.6 2.9a6.4 6.4 0 01-3.8 3.6 8.8 8.8 0 01-6 0 6.8 6.8 0 01-3.9-4 8 8 0 010-5.8c.4-.8.8-1.5 1.4-2l1.2.9c.1 0 .2 0 .2.2l.1.3-.1.5a5.8 5.8 0 00-.8 1.5l-.2 1.5a4.2 4.2 0 001.4 3.3c.4.4 1 .7 1.6 1 .6.2 1.3.3 2 .3 1 0 1.6 0 2.3-.3l1.6-1a4 4 0 001-1.4c.2-.5.3-1 .3-1.7v-1a4.2 4.2 0 00-.6-1.5l-.5-.7-.1-.2v-.2zm3.4-16.2v2a1 1 0 01-.6 1l-3 1v6l3 1.1a1 1 0 01.6 1v2L22.4 39v-2.7l14.4-5.7zM31.4 40v-4.6L26.6 37a22 22 0 01-1.7.6 33.3 33.3 0 011.7.5l4.8 1.8zM36.8 29H22.4v-5c0-1 0-1.7.2-2.4s.5-1.2.8-1.6a3 3 0 011.2-1 4 4 0 012.6-.1c.3 0 .6.2.9.4l.7.8.6 1.2c.4-2 1.5-3 3.2-3 .6 0 1.2.1 1.7.3.5.3 1 .6 1.3 1 .4.5.7 1 .9 1.7.2.7.3 1.4.3 2.3V29zm-6.3-2.7h4.2v-2.7c0-.5 0-1-.2-1.3 0-.3-.2-.6-.4-.8l-.7-.4a2.6 2.6 0 00-1.7 0l-.7.5-.4.8-.1 1.2v2.7zm-1.9 0v-2.1c0-1-.1-1.6-.5-2.1-.3-.5-.8-.7-1.6-.7-.7 0-1.3.2-1.6.6-.3.5-.5 1.1-.5 2v2.3h4.2z"/>
|
||||
</g>
|
||||
<g transform="translate(7 238)">
|
||||
<rect width="559" height="94" y="5" fill="#AAE2DC" stroke="#00BEAB" stroke-linejoin="round" stroke-width="3.8" rx="15"/>
|
||||
<path fill="#00BEAB" stroke="#00BEAB" stroke-linejoin="round" stroke-width="3.8" d="M16 5h45v94H16A16 16 0 010 83V21A16 16 0 0116 5z"/>
|
||||
<g transform="translate(82 18)">
|
||||
<rect width="102" height="49" x="1.5" y="8.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M47.4 29.5v3L47 34c0 .3 0 .5-.2.7l-.6.2c-.3 0-.4 0-.6-.2l-.3-.7-.2-1.6v-2.9h2.2zm3.7 0v3l-.3 1.5-.2.7-.6.2c-.3 0-.5 0-.6-.2l-.3-.7-.2-1.6v-2.9h2.2zM56.9 44H54V29.5H57V44zM62 29.5v3l-.2 1.5c0 .3-.2.5-.3.7l-.6.2c-.2 0-.4 0-.6-.2l-.2-.7-.3-1.6v-2.9h2.2zm3.7 0v3l-.3 1.5c0 .3 0 .5-.2.7l-.6.2c-.3 0-.4 0-.6-.2L64 34l-.3-1.6v-2.9h2.2z"/>
|
||||
<rect width="54" height="20" x="25" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M34.9 10.5H36v.9l-.1.2-.3.1H35V14h-1.4v-2.3h-4l-.3-.1-.1-.2-.2-.9 4.5-6h1.5v6zm-1.4-3.1a8.3 8.3 0 010-1l-2.9 4h2.9v-3zm7.2.7c.3 0 .7 0 1 .2a2.5 2.5 0 011.5 1.4c.2.4.2.8.2 1.2a3.2 3.2 0 01-.9 2.3l-1 .7-1.4.2c-.5 0-1 0-1.3-.2a2.9 2.9 0 01-1.7-1.7 3.9 3.9 0 010-2.8c.2-.4.5-1 .9-1.4l2.2-3 .4-.3.5-.1h1.4L39.7 8a46.6 46.6 0 01-.3.4 2.8 2.8 0 011.3-.3zm-2.3 3l.1.7c0 .2.2.4.3.5l.6.4h1.4a1.6 1.6 0 00.9-1l.1-.6-.1-.8c0-.2-.2-.4-.3-.5l-.6-.4a2 2 0 00-1.4 0 1.6 1.6 0 00-.9 1v.6zm8.9-.9l-1-.1A2.4 2.4 0 0145 8.7a3 3 0 01-.2-1.2 3 3 0 011.9-2.8l1.3-.2c.5 0 1 0 1.3.2l1 .7c.2.2.5.6.6 1l.2 1.2a4.4 4.4 0 01-.6 2.3 9 9 0 01-.4.7l-2.2 3-.3.3-.5.1h-1.5l3-3.6a9.1 9.1 0 00.3-.6c-.2.2-.4.3-.7.3a3 3 0 01-.8.1zm2.2-2.8l-.1-.7a1.4 1.4 0 00-.9-.8 1.8 1.8 0 00-1.3 0 1.4 1.4 0 00-.8.9l-.1.6c0 .5.1 1 .4 1.2.3.3.7.4 1.2.4h.6l.6-.4c0-.2.2-.3.3-.5v-.7zm9.1 1.9c0 .8 0 1.5-.3 2.1-.1.6-.4 1.1-.7 1.5a3 3 0 01-1 1L55 14c-.5 0-.9 0-1.3-.3a3 3 0 01-1.1-.9c-.3-.4-.6-.9-.7-1.5a7.8 7.8 0 010-4.3c.1-.5.4-1 .7-1.4A3 3 0 0155 4.5c.5 0 1 0 1.4.3.4.2.8.5 1.1.9.3.4.6.9.7 1.5.2.6.3 1.3.3 2.1zm-1.7 0a8 8 0 00-.1-1.7c-.1-.4-.2-.8-.4-1l-.6-.6a1.5 1.5 0 00-1.3 0c-.2 0-.4.3-.6.5l-.4 1.1-.1 1.7.1 1.7c.1.4.2.8.4 1l.6.6a1.5 1.5 0 001.3 0c.2 0 .4-.3.6-.5l.4-1.1.1-1.7zm8.1 1.2h1.2v1.1l-.3.1H65V14h-1.4v-2.3h-4l-.2-.1-.2-.2-.1-.9 4.4-6H65v6zm-1.4-3.1a8.3 8.3 0 01.1-1l-3 4h3v-3zM67 13a1 1 0 01.3-.7 1 1 0 01.7-.3 1 1 0 01.7.3 1 1 0 01.2 1.1c0 .1 0 .2-.2.3l-.3.2a1 1 0 01-1-.2c-.2 0-.2-.2-.3-.3v-.4zm3 0a1 1 0 01.2-.7 1 1 0 01.8-.3 1 1 0 01.6.3 1 1 0 01.3 1.1l-.3.3c0 .1-.1.2-.3.2a1 1 0 01-1-.2l-.3-.3v-.4zm2.9 0a1 1 0 01.3-.7 1 1 0 01.7-.3 1 1 0 01.7.3 1 1 0 01.2 1.1l-.2.3-.3.2a1 1 0 01-1-.2c-.2 0-.3-.2-.3-.3l-.1-.4z"/>
|
||||
</g>
|
||||
<g transform="translate(432 18)">
|
||||
<rect width="102" height="49" x="1.5" y="8.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M22.4 29.5v3L22 34l-.2.7-.6.2c-.3 0-.5 0-.6-.2l-.3-.7-.2-1.6v-2.9h2.2zm3.6 0v3l-.2 1.5-.2.7-.6.2c-.3 0-.5 0-.6-.2-.2-.2-.2-.4-.3-.7l-.2-1.6v-2.9H26zM36 36l-.2.2-.3.1-.4-.1a11.9 11.9 0 00-1-.5l-.9-.1c-.4 0-.8 0-1.1.2l-.9.7a3 3 0 00-.4 1l-.2 1.5c0 .5 0 1 .2 1.4l.5 1c.2.4.5.6.8.7.3.2.7.3 1 .3a2.6 2.6 0 001.6-.5l.4-.3.5-.2c.2 0 .3.1.4.3l.7.9a4.5 4.5 0 01-1.9 1.3 7.6 7.6 0 01-2.2.4c-.5 0-1.1-.2-1.7-.4-.6-.2-1-.6-1.5-1a5 5 0 01-1-1.7c-.2-.7-.3-1.4-.3-2.3 0-.7 0-1.4.3-2 .2-.7.5-1.2 1-1.7.4-.5.9-.9 1.5-1.1.6-.3 1.3-.4 2.2-.4a5 5 0 012 .4c.5.2 1 .5 1.5 1l-.6 1zm6.7-2.3c.8 0 1.4.1 2 .4a4.5 4.5 0 012.7 2.7c.2.6.3 1.3.3 2.1 0 .9 0 1.6-.3 2.3-.3.6-.6 1.2-1 1.6-.5.5-1 .8-1.6 1-.7.3-1.3.4-2.1.4s-1.5 0-2.1-.3a4.5 4.5 0 01-2.6-2.8 6.2 6.2 0 010-4.4 4.5 4.5 0 012.6-2.7c.6-.2 1.3-.3 2-.3zm0 8.6c.8 0 1.5-.2 1.9-.8.4-.6.6-1.4.6-2.5s-.2-2-.6-2.6c-.4-.5-1-.8-2-.8-.8 0-1.4.3-1.8.8-.4.6-.7 1.5-.7 2.6 0 1 .3 2 .7 2.5.4.6 1 .9 1.9.9zM50 44v-8.4l-.9-.1a1 1 0 01-.4-.2.5.5 0 01-.2-.5v-1H50v-.7c0-.6.1-1.1.3-1.6a3.3 3.3 0 012-2 4.6 4.6 0 012.9 0v1.2l-.4.4h-.5a3 3 0 00-.8.1 1.3 1.3 0 00-1 1v1.6H55v1.8h-2.6V44H50zm7 0v-8.4l-.9-.1a1 1 0 01-.4-.2.5.5 0 01-.2-.5v-1H57v-.7c0-.6.1-1.1.3-1.6a3.3 3.3 0 012-2 4.6 4.6 0 012.9 0v1.2c0 .2-.2.3-.3.4h-.6a3 3 0 00-.8.1 1.3 1.3 0 00-1 1v1.6H62v1.8h-2.6V44H57zm10.8-10.4a5 5 0 011.8.3 4 4 0 012.3 2.4 5.5 5.5 0 01.3 2.4v.3l-.2.1h-6.6c0 1.1.3 2 .8 2.4.5.5 1.2.7 2 .7l1-.1c.3 0 .6-.2.8-.3l.5-.3.5-.2.3.1.2.2.7.9-1 .8a5.2 5.2 0 01-2.1.8 8 8 0 01-1.1 0c-.8 0-1.4 0-2-.3-.6-.2-1.2-.6-1.6-1a5 5 0 01-1.1-1.8 6.7 6.7 0 010-4.3c.2-.6.5-1.2 1-1.6.3-.5.9-.9 1.5-1.1.6-.3 1.3-.4 2-.4zm0 1.8c-.6 0-1.2.2-1.6.6a3 3 0 00-.8 1.7h4.7c0-.3 0-.6-.2-1a2 2 0 00-.4-.7l-.7-.5-1-.1zm10.7-1.8a5 5 0 011.8.3 4 4 0 012.3 2.4 5.5 5.5 0 01.3 2.4v.3l-.2.1H76c0 1.1.3 2 .8 2.4.5.5 1.2.7 2 .7l1-.1c.3 0 .5-.2.8-.3l.5-.3.5-.2.3.1.2.2.7.9-1 .8a5.2 5.2 0 01-2.1.8 8 8 0 01-1.2 0c-.7 0-1.3 0-2-.3-.6-.2-1-.6-1.5-1A5 5 0 0174 41a6.7 6.7 0 010-4.3c.2-.6.5-1.2 1-1.6.3-.5.8-.9 1.5-1.1.6-.3 1.3-.4 2-.4zm0 1.8c-.6 0-1.2.2-1.6.6a3 3 0 00-.8 1.7h4.6v-1a2 2 0 00-.5-.7l-.7-.5-1-.1zm8.6-5.9v3L87 34c0 .3-.1.5-.3.7l-.6.2c-.2 0-.4 0-.5-.2-.2-.2-.3-.4-.3-.7l-.2-1.6v-2.9H87zm3.7 0v3l-.2 1.5c0 .3-.2.5-.3.7l-.6.2c-.2 0-.4 0-.6-.2L89 34l-.3-1.6v-2.9h2.2z"/>
|
||||
<rect width="54" height="20" x="25" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M32.8 4.5c.5 0 .8 0 1.2.2l.9.5a2.2 2.2 0 01.8 1.7l-.1.8-.3.6-.5.4-.6.3c.5.2 1 .5 1.2.8.3.4.4.9.4 1.4 0 .5 0 .9-.2 1.3l-.7.9a3 3 0 01-1 .5 4.2 4.2 0 01-2.5 0 2.7 2.7 0 01-1.6-1.2l-.5-1 .8-.4h.6l.2.2.3.5.3.4a1.6 1.6 0 001.1.4c.3 0 .5 0 .7-.2.2 0 .4-.2.5-.3a1.4 1.4 0 00.4-1v-.7a1 1 0 00-.3-.4c-.2-.2-.4-.3-.7-.3l-1.1-.1V8.6h1l.6-.4.3-.4v-.6c0-.4 0-.8-.3-1-.2-.2-.5-.4-1-.4A1.5 1.5 0 0031.3 7c0 .2-.2.3-.3.4h-.3-1.1c0-.5.2-1 .4-1.3a2.8 2.8 0 011.7-1.3l1.1-.2zm5.3 8.3H40V7.3v-.7l-1.4 1.2h-.1l-.2.1a.5.5 0 01-.4-.2l-.5-.7 2.8-2.4h1.4v8.2h1.7V14H38v-1.2zm9.2-2.6l-1-.1A2.4 2.4 0 0145 8.7a3 3 0 01-.2-1.2 3 3 0 011.9-2.8l1.3-.2c.5 0 1 0 1.3.2l1 .7c.2.2.5.6.6 1l.2 1.2a4.4 4.4 0 01-.6 2.3 9 9 0 01-.4.7l-2.2 3-.3.3-.5.1h-1.5l3-3.6a9.1 9.1 0 00.3-.6c-.2.2-.4.3-.7.3a3 3 0 01-.8.1zm2.2-2.8l-.1-.7a1.4 1.4 0 00-.9-.8 1.8 1.8 0 00-1.3 0 1.4 1.4 0 00-.8.9l-.1.6c0 .5.1 1 .4 1.2.3.3.7.4 1.2.4h.6l.6-.4c0-.2.2-.3.3-.5v-.7zm9-2.8v.7a1.5 1.5 0 01-.2.8l-3.6 7.4-.3.3-.5.2h-1.2l3.7-7.2.5-.7h-4.5c-.1 0-.2 0-.3-.2V4.6h6.4zm3.9 5.6l-1-.1A2.4 2.4 0 0160 8.7a3 3 0 01-.2-1.2 3 3 0 011.9-2.8l1.3-.2c.5 0 .9 0 1.3.2l1 .7.6 1 .2 1.2a4.4 4.4 0 01-.6 2.3 9 9 0 01-.5.7l-2.1 3-.3.3-.5.1h-1.5l3-3.6a9.1 9.1 0 00.3-.6c-.2.2-.5.3-.7.3a3 3 0 01-.8.1zm2.2-2.8c0-.2 0-.4-.2-.7a1.4 1.4 0 00-.8-.8 1.8 1.8 0 00-1.3 0 1.4 1.4 0 00-.8.9l-.1.6c0 .5.1 1 .4 1.2.3.3.6.4 1.1.4h.7l.5-.4.4-.5v-.7zm2.3 5.7a1 1 0 01.3-.7 1 1 0 01.7-.3 1 1 0 01.7.3 1 1 0 01.2 1.1c0 .1 0 .2-.2.3l-.3.2a1 1 0 01-1-.2c-.2 0-.2-.2-.3-.3v-.4zm3 0a1 1 0 01.2-.7 1 1 0 01.8-.3 1 1 0 01.6.3 1 1 0 01.3 1.1l-.3.3c0 .1-.1.2-.3.2a1 1 0 01-1-.2l-.3-.3v-.4zm2.9 0a1 1 0 01.3-.7 1 1 0 01.7-.3 1 1 0 01.7.3 1 1 0 01.2 1.1l-.2.3-.3.2a1 1 0 01-1-.2c-.2 0-.3-.2-.3-.3l-.1-.4z"/>
|
||||
</g>
|
||||
<g transform="translate(257 18)">
|
||||
<rect width="102" height="49" x="1.5" y="8.5" fill="#FFF" stroke="#6A6A6A" stroke-width="3" rx="12"/>
|
||||
<path fill="#3D4251" fill-rule="nonzero" d="M31.4 29.5v3l-.2 1.5-.2.7-.6.2c-.3 0-.5 0-.6-.2-.2-.2-.2-.4-.3-.7l-.2-1.6v-2.9h2.1zm3.7 0v3L35 34c0 .3-.1.5-.3.7l-.6.2c-.2 0-.4 0-.5-.2-.2-.2-.3-.4-.3-.7l-.2-1.6v-2.9H35zm5.3-.4V44h-2.5V29.1h2.5zm7.1 4.5c.8 0 1.5.1 2.1.4a4.5 4.5 0 012.6 2.7 6.3 6.3 0 010 4.4c-.2.6-.5 1.2-1 1.6-.4.5-1 .8-1.6 1a5.7 5.7 0 01-4.2 0 4.5 4.5 0 01-2.6-2.7c-.2-.6-.3-1.3-.3-2.1 0-.9 0-1.6.3-2.2l1-1.7c.5-.5 1-.8 1.7-1 .6-.3 1.3-.4 2-.4zm0 8.6c.9 0 1.5-.2 2-.8.4-.6.6-1.4.6-2.5s-.2-2-.7-2.6c-.4-.5-1-.8-1.9-.8-.8 0-1.5.3-1.9.8-.4.6-.6 1.5-.6 2.6 0 1 .2 2 .6 2.5.4.6 1 .9 2 .9zM59.7 44h-2.3l-4-10.3h2c.2 0 .3 0 .5.2l.2.3 2 5.5a10.3 10.3 0 01.5 1.8 18.2 18.2 0 01.5-1.8l2-5.5.3-.3.4-.2h2L59.6 44zm9.7-10.4a5 5 0 011.8.3 4 4 0 012.4 2.4 5.5 5.5 0 01.3 2.4l-.1.3-.2.1H67c0 1.1.3 2 .8 2.4.5.5 1.2.7 2 .7l1-.1c.3 0 .6-.2.8-.3l.6-.3.4-.2.3.1.2.2.7.9-1 .8a5.2 5.2 0 01-2.1.8 8 8 0 01-1.1 0c-.7 0-1.4 0-2-.3-.6-.2-1.2-.6-1.6-1a5 5 0 01-1-1.8 6.7 6.7 0 01-.1-4.3c.2-.6.5-1.2 1-1.6.4-.5.9-.9 1.5-1.1.6-.3 1.3-.4 2-.4zm0 1.8c-.6 0-1.2.2-1.6.6a3 3 0 00-.8 1.7h4.7c0-.3 0-.6-.2-1a2 2 0 00-.4-.7l-.6-.5-1-.1zm8.6-5.9v3l-.2 1.5c0 .3-.1.5-.3.7l-.6.2c-.2 0-.4 0-.5-.2-.2-.2-.3-.4-.3-.7l-.2-1.6v-2.9H78zm3.7 0v3l-.2 1.5c0 .3-.1.5-.3.7l-.6.2c-.2 0-.4 0-.5-.2-.2-.2-.3-.4-.3-.7l-.2-1.6v-2.9h2.1z"/>
|
||||
<rect width="54" height="20" x="25" fill="#6A6A6A" rx="4"/>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M32.8 4.5c.5 0 .8 0 1.2.2l.9.5a2.2 2.2 0 01.8 1.7l-.1.8-.3.6-.5.4-.6.3c.5.2 1 .5 1.2.8.3.4.4.9.4 1.4 0 .5 0 .9-.2 1.3l-.7.9a3 3 0 01-1 .5 4.2 4.2 0 01-2.5 0 2.7 2.7 0 01-1.6-1.2l-.5-1 .8-.4h.6l.2.2.3.5.3.4a1.6 1.6 0 001.1.4c.3 0 .5 0 .7-.2.2 0 .4-.2.5-.3a1.4 1.4 0 00.4-1v-.7a1 1 0 00-.3-.4c-.2-.2-.4-.3-.7-.3l-1.1-.1V8.6h1l.6-.4.3-.4v-.6c0-.4 0-.8-.3-1-.2-.2-.5-.4-1-.4A1.5 1.5 0 0031.3 7c0 .2-.2.3-.3.4h-.3-1.1c0-.5.2-1 .4-1.3a2.8 2.8 0 011.7-1.3l1.1-.2zm10.6.1v.7a1.5 1.5 0 01-.2.8l-3.5 7.4-.3.3-.6.2h-1.2l3.7-7.2.5-.7h-4.5c-.1 0-.2 0-.3-.2V4.6h6.4zm7.7 4.7c0 .8-.1 1.5-.3 2.1-.2.6-.4 1.1-.7 1.5a3 3 0 01-1.1 1l-1.4.2c-.5 0-1 0-1.4-.3a3 3 0 01-1-.9c-.4-.4-.6-.9-.8-1.5l-.2-2.1c0-.8 0-1.5.2-2.1.2-.6.4-1.1.7-1.5a3 3 0 012.5-1.2c.5 0 1 0 1.4.3.4.2.8.5 1 .9.4.4.6.9.8 1.5.2.6.3 1.3.3 2.1zm-1.7 0a8 8 0 00-.1-1.7l-.4-1-.6-.6a1.5 1.5 0 00-1.4 0c-.2 0-.4.3-.5.5L46 7.6l-.2 1.7c0 .7 0 1.2.2 1.7 0 .4.2.8.4 1 .1.3.3.5.5.6a1.5 1.5 0 001.4 0c.2 0 .4-.3.6-.5l.4-1.1.1-1.7zm5.9-4.8c.4 0 .8 0 1.2.2l1 .5c.2.3.4.5.5.9l.2 1-.1 1a4 4 0 01-.4.9l-.6.8-.8.7-2 2.2a5.1 5.1 0 011.1-.2h2.4c.2 0 .3 0 .4.2l.2.3v1h-6.5v-.5a1 1 0 01.3-.7l2.9-2.9a13.5 13.5 0 001-1.3l.4-.7v-.7-.6a1.2 1.2 0 00-.7-.7h-.6c-.4 0-.7 0-1 .2l-.5.8c0 .2 0 .3-.2.4h-1.4c0-.5.2-1 .4-1.3A2.8 2.8 0 0154 4.7l1.2-.2zm10.8 4.8c0 .8 0 1.5-.2 2.1-.2.6-.4 1.1-.8 1.5a3 3 0 01-1 1l-1.4.2c-.5 0-1 0-1.4-.3a3 3 0 01-1-.9c-.4-.4-.6-.9-.8-1.5a7.8 7.8 0 010-4.3l.7-1.4a3 3 0 012.5-1.2c.5 0 1 0 1.3.3.5.2.8.5 1.1.9.4.4.6.9.8 1.5l.2 2.1zm-1.6 0a8 8 0 00-.2-1.7c0-.4-.2-.8-.4-1-.1-.3-.3-.5-.5-.6A1.5 1.5 0 0062 6c-.2 0-.4.3-.6.5-.1.3-.3.7-.3 1.1l-.2 1.7c0 .7 0 1.2.2 1.7 0 .4.2.8.3 1l.6.6a1.5 1.5 0 001.4 0c.2 0 .4-.3.5-.5l.4-1.1.2-1.7zm2.4 3.8a1 1 0 01.3-.7 1 1 0 01.7-.3 1 1 0 01.7.3 1 1 0 01.2 1.1c0 .1 0 .2-.2.3l-.3.2a1 1 0 01-1-.2c-.2 0-.2-.2-.3-.3v-.4zm3 0a1 1 0 01.2-.7 1 1 0 01.8-.3 1 1 0 01.6.3 1 1 0 01.3 1.1l-.3.3c0 .1-.1.2-.3.2a1 1 0 01-1-.2l-.3-.3v-.4zm2.9 0a1 1 0 01.3-.7 1 1 0 01.7-.3 1 1 0 01.7.3 1 1 0 01.2 1.1l-.2.3-.3.2a1 1 0 01-1-.2c-.2 0-.3-.2-.3-.3l-.1-.4z"/>
|
||||
</g>
|
||||
<path fill="#FFF" fill-rule="nonzero" d="M14 79.5c.2 0 .3.2.4.3v.3l-.1.5a15.3 15.3 0 00-.7 1.3l-.1 1.1v1l.5.7.5.4.8.2c.3 0 .6-.1.8-.3.3-.2.4-.5.6-.8l.4-1a52.4 52.4 0 011-2.5c.1-.4.4-.8.7-1a3.3 3.3 0 012.6-1.1 5 5 0 011.8.3 4.2 4.2 0 012.5 2.6 6.9 6.9 0 01-1.5 6.9l-1.3-.8-.3-.3V87l.1-.6a16.3 16.3 0 001-1.6l.1-1.3c0-.7-.1-1.3-.5-1.7-.4-.5-.9-.7-1.6-.7-.3 0-.7.1-.9.3l-.6.8-.4 1a35.5 35.5 0 01-.9 2.5 4 4 0 01-.7 1c-.3.4-.7.6-1.1.8a4.3 4.3 0 01-4.6-1c-.3-.4-.7-.9-.9-1.4a6.7 6.7 0 010-4.4c.3-.7.7-1.3 1.2-1.8l1.3.6zm-2.6-13.2h2.2v4.4H26v2.7H13.6v4.3h-2.2V66.3zm8.8-4.4H26v2.6H11.4v-4.4c0-1 .1-1.8.3-2.5.2-.7.5-1.3.9-1.7.4-.5.8-.8 1.3-1 .5-.2 1-.3 1.7-.3a4.4 4.4 0 012.5.8l1 1c.2.4.5.8.6 1.4l.8-1 5.4-3.5v2.4c0 .2 0 .4-.2.6l-.4.4-4.6 3a1 1 0 00-.4.4V61.9zm-1.9 0v-1.7c0-.5 0-1-.2-1.4l-.5-.9-.8-.5c-.3-.2-.6-.2-1-.2a2 2 0 00-1.7.7c-.4.5-.6 1.2-.6 2.2v1.7h4.8zM26 48.5v2.7H11.4v-2.7H26zm-14.5-4.7v-.3l.1-.2.1-.2s.2 0 .3-.2l9.6-7.5a10 10 0 01-1.3 0h-8.8v-2.3H26v1.3l-.1.6-.4.4-9.6 7.5a13.9 13.9 0 011.2 0h8.9v2.4H11.4v-1.5zM24 23.5a6.7 6.7 0 00-.6-2.9h-2.6v1.8c0 .2 0 .3-.2.4l-.3.2h-1.6v-4.8h5.9a8 8 0 011.4 3.8l.1 1.7a7 7 0 01-4.4 6.7 8 8 0 01-3 .6c-1 0-2-.2-3-.6a6.7 6.7 0 01-3.8-3.8c-.4-1-.5-2-.5-3.1a7 7 0 011.9-5.3l1.2.8c.2.1.3.3.3.6l-.1.5a38 38 0 00-.7 1.4 6 6 0 000 4l1 1.5a6.3 6.3 0 003.7 1.3c.9 0 1.6-.2 2.3-.4.6-.3 1.2-.6 1.6-1 .5-.4.8-1 1-1.5a5 5 0 00.4-2zm14.2 51.8l.3.3v.3l-.1.5a15.3 15.3 0 00-.7 1.3l-.1 1.1v1l.5.7.5.4.8.2c.3 0 .6-.1.8-.3.3-.2.4-.5.6-.8l.4-1a52.4 52.4 0 011-2.5c.1-.4.4-.8.7-1a3.3 3.3 0 012.6-1.1 5 5 0 011.8.3 4.2 4.2 0 012.5 2.6 6.9 6.9 0 01-1.5 6.9l-1.3-.8-.3-.3v-.3l.1-.6a16.3 16.3 0 001-1.6l.1-1.3c0-.7-.1-1.3-.5-1.8-.4-.4-.9-.6-1.6-.6-.3 0-.7.1-.9.3l-.6.8-.4 1a35.5 35.5 0 01-.9 2.5 4 4 0 01-.7 1l-1.1.8a4.3 4.3 0 01-4.6-1c-.3-.4-.7-.9-.9-1.5-.2-.5-.3-1.2-.3-2s.1-1.6.4-2.3c.2-.7.6-1.4 1-1.9l1.4.7zm-2.7-13.2h2.2v4.4H50v2.7H37.6v4.3h-2.2V62.1zm7.3-15.5c1 0 2 .1 2.9.5a6.7 6.7 0 013.9 3.8c.4.9.5 1.9.5 3a6.8 6.8 0 01-4.4 6.8 8 8 0 01-3 .5c-1 0-2-.2-2.9-.5a7 7 0 01-3.9-3.8c-.4-1-.5-2-.5-3 0-1.1.2-2.1.5-3a6.8 6.8 0 014-3.8 8 8 0 012.9-.5zm0 2.7c-.8 0-1.5.1-2.2.3-.6.2-1.1.5-1.6 1-.4.3-.8.8-1 1.4a5 5 0 00-.3 1.9c0 .7 0 1.3.3 1.9.2.5.6 1 1 1.4l1.6 1c.7.2 1.4.3 2.2.3.7 0 1.5-.1 2-.4a4 4 0 002.6-2.4 5 5 0 00.4-1.8 5 5 0 00-.3-2 4 4 0 00-1-1.4c-.4-.4-1-.7-1.6-.9-.6-.2-1.4-.3-2.1-.3zm1.5-7.6H50v2.6H35.4V40c0-1 .1-1.8.3-2.5.2-.7.5-1.3.9-1.7.4-.5.8-.8 1.3-1 .5-.2 1-.3 1.7-.3a4.4 4.4 0 012.5.8l1 1c.2.4.5.8.6 1.4l.8-1 5.4-3.5v2.4c0 .2 0 .4-.2.6l-.4.4-4.6 3a1 1 0 00-.4.4V41.7zm-1.9 0V40c0-.5 0-1-.2-1.4l-.5-.9-.8-.5c-.3-.2-.6-.2-1-.2a2 2 0 00-1.7.7c-.4.5-.6 1.2-.6 2.2v1.8h4.8zm-6.9-19.5h2.2v6.4h4v-5h2v5h4.1v-6.4H50v9.1H35.4v-9z"/>
|
||||
</g>
|
||||
<path fill="#6A6A6A" d="M140.5 146.4L137 142h7z"/>
|
||||
<path fill="#6A6A6A" d="M140.5 105l3.5 4.4h-7z"/>
|
||||
<path stroke="#6A6A6A" stroke-linecap="square" stroke-width="2" d="M140.4 141v-30.7"/>
|
||||
<g>
|
||||
<path fill="#6A6A6A" d="M316.5 146.4L313 142h7z"/>
|
||||
<path fill="#6A6A6A" d="M316.5 105l3.5 4.4h-7z"/>
|
||||
<path stroke="#6A6A6A" stroke-linecap="square" stroke-width="2" d="M316.4 141v-30.7"/>
|
||||
</g>
|
||||
<g>
|
||||
<path fill="#6A6A6A" d="M492.5 146.4L489 142h7z"/>
|
||||
<path fill="#6A6A6A" d="M492.5 105l3.5 4.4h-7z"/>
|
||||
<path stroke="#6A6A6A" stroke-linecap="square" stroke-width="2" d="M492.4 141v-30.7"/>
|
||||
</g>
|
||||
<g>
|
||||
<path fill="#6A6A6A" d="M140 253l-3.5-4.5h7z"/>
|
||||
<path fill="#6A6A6A" d="M140 211.5l3.5 4.4h-7z"/>
|
||||
<path stroke="#6A6A6A" stroke-linecap="square" stroke-width="2" d="M140 247.6v-30.8"/>
|
||||
<g>
|
||||
<path fill="#6A6A6A" d="M316 253l-3.5-4.5h7z"/>
|
||||
<path fill="#6A6A6A" d="M316 211.5l3.5 4.4h-7z"/>
|
||||
<path stroke="#6A6A6A" stroke-linecap="square" stroke-width="2" d="M316 247.6v-30.8"/>
|
||||
</g>
|
||||
<g>
|
||||
<path fill="#6A6A6A" d="M492 253l-3.5-4.5h7z"/>
|
||||
<path fill="#6A6A6A" d="M492 211.5l3.5 4.4h-7z"/>
|
||||
<path stroke="#6A6A6A" stroke-linecap="square" stroke-width="2" d="M492 247.6v-30.8"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
|
Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 40 KiB |
|
@ -10,8 +10,9 @@ The **shared language data** in the directory root includes rules that can be
|
|||
generalized across languages – for example, rules for basic punctuation, emoji,
|
||||
emoticons and single-letter abbreviations. The **individual language data** in a
|
||||
submodule contains rules that are only relevant to a particular language. It
|
||||
also takes care of putting together all components and creating the `Language`
|
||||
subclass – for example, `English` or `German`.
|
||||
also takes care of putting together all components and creating the
|
||||
[`Language`](/api/language) subclass – for example, `English` or `German`. The
|
||||
values are defined in the [`Language.Defaults`](/api/language#defaults).
|
||||
|
||||
> ```python
|
||||
> from spacy.lang.en import English
|
||||
|
@ -21,14 +22,6 @@ subclass – for example, `English` or `German`.
|
|||
> nlp_de = German() # Includes German data
|
||||
> ```
|
||||
|
||||
<!-- TODO: upgrade graphic
|
||||
|
||||
![Language data architecture](../../images/language_data.svg)
|
||||
|
||||
-->
|
||||
|
||||
<!-- TODO: remove this table in favor of more specific Language.Defaults docs in linguistic features? -->
|
||||
|
||||
| Name | Description |
|
||||
| ---------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Stop words**<br />[`stop_words.py`][stop_words.py] | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
|
||||
|
|
|
@ -13,13 +13,14 @@ passed on to the next component.
|
|||
> component.
|
||||
|
||||
| Name | Component | Creates | Description |
|
||||
| ------------- | ------------------------------------------------------------------ | ----------------------------------------------------------- | ------------------------------------------------ |
|
||||
| -------------- | ------------------------------------------------------------------ | --------------------------------------------------------- | ------------------------------------------------ |
|
||||
| **tokenizer** | [`Tokenizer`](/api/tokenizer) | `Doc` | Segment text into tokens. |
|
||||
| **tagger** | [`Tagger`](/api/tagger) | `Doc[i].tag` | Assign part-of-speech tags. |
|
||||
| **parser** | [`DependencyParser`](/api/dependencyparser) | `Doc[i].head`, `Doc[i].dep`, `Doc.sents`, `Doc.noun_chunks` | Assign dependency labels. |
|
||||
| **ner** | [`EntityRecognizer`](/api/entityrecognizer) | `Doc.ents`, `Doc[i].ent_iob`, `Doc[i].ent_type` | Detect and label named entities. |
|
||||
| **tagger** | [`Tagger`](/api/tagger) | `Token.tag` | Assign part-of-speech tags. |
|
||||
| **parser** | [`DependencyParser`](/api/dependencyparser) | `Token.head`, `Token.dep`, `Doc.sents`, `Doc.noun_chunks` | Assign dependency labels. |
|
||||
| **ner** | [`EntityRecognizer`](/api/entityrecognizer) | `Doc.ents`, `Token.ent_iob`, `Token.ent_type` | Detect and label named entities. |
|
||||
| **lemmatizer** | [`Lemmatizer`](/api/lemmatizer) | `Token.lemma` | Assign base forms. |
|
||||
| **textcat** | [`TextCategorizer`](/api/textcategorizer) | `Doc.cats` | Assign document labels. |
|
||||
| ... | [custom components](/usage/processing-pipelines#custom-components) | `Doc._.xxx`, `Token._.xxx`, `Span._.xxx` | Assign custom attributes, methods or properties. |
|
||||
| **custom** | [custom components](/usage/processing-pipelines#custom-components) | `Doc._.xxx`, `Token._.xxx`, `Span._.xxx` | Assign custom attributes, methods or properties. |
|
||||
|
||||
The processing pipeline always **depends on the statistical model** and its
|
||||
capabilities. For example, a pipeline can only include an entity recognizer
|
||||
|
|
|
@ -228,16 +228,13 @@ available pipeline components and component functions.
|
|||
| `entity_linker` | [`EntityLinker`](/api/entitylinker) | Assign knowledge base IDs to named entities. Should be added after the entity recognizer. |
|
||||
| `entity_ruler` | [`EntityRuler`](/api/entityruler) | Assign named entities based on pattern rules and dictionaries. |
|
||||
| `textcat` | [`TextCategorizer`](/api/textcategorizer) | Assign text categories. |
|
||||
| `lemmatizer` | [`Lemmatizer`](/api/lemmatizer) | Assign base forms to words. |
|
||||
| `morphologizer` | [`Morphologizer`](/api/morphologizer) | Assign morphological features and coarse-grained POS tags. |
|
||||
| `senter` | [`SentenceRecognizer`](/api/sentencerecognizer) | Assign sentence boundaries. |
|
||||
| `sentencizer` | [`Sentencizer`](/api/sentencizer) | Add rule-based sentence segmentation without the dependency parse. |
|
||||
| `tok2vec` | [`Tok2Vec`](/api/tok2vec) | |
|
||||
| `transformer` | [`Transformer`](/api/transformer) | Assign the tokens and outputs of a transformer model. |
|
||||
|
||||
<!-- TODO: finish and update with more components -->
|
||||
|
||||
<!-- TODO: explain default config and factories -->
|
||||
|
||||
### Disabling and modifying pipeline components {#disabling}
|
||||
|
||||
If you don't need a particular component of the pipeline – for example, the
|
||||
|
|
|
@ -10,7 +10,6 @@ menu:
|
|||
- ['Serialization', 'serialization']
|
||||
- ['Training', 'training']
|
||||
- ['Language Data', 'language-data']
|
||||
- ['Lightning Tour', 'lightning-tour']
|
||||
- ['Architecture', 'architecture']
|
||||
- ['Community & FAQ', 'community-faq']
|
||||
---
|
||||
|
@ -379,79 +378,6 @@ spaCy will also export the `Vocab` when you save a `Doc` or `nlp` object. This
|
|||
will give you the object and its encoded annotations, plus the "key" to decode
|
||||
it.
|
||||
|
||||
## Knowledge base {#kb}
|
||||
|
||||
To support the entity linking task, spaCy stores external knowledge in a
|
||||
[`KnowledgeBase`](/api/kb). The knowledge base (KB) uses the `Vocab` to store
|
||||
its data efficiently.
|
||||
|
||||
> - **Mention**: A textual occurrence of a named entity, e.g. 'Miss Lovelace'.
|
||||
> - **KB ID**: A unique identifier referring to a particular real-world concept,
|
||||
> e.g. 'Q7259'.
|
||||
> - **Alias**: A plausible synonym or description for a certain KB ID, e.g. 'Ada
|
||||
> Lovelace'.
|
||||
> - **Prior probability**: The probability of a certain mention resolving to a
|
||||
> certain KB ID, prior to knowing anything about the context in which the
|
||||
> mention is used.
|
||||
> - **Entity vector**: A pretrained word vector capturing the entity
|
||||
> description.
|
||||
|
||||
A knowledge base is created by first adding all entities to it. Next, for each
|
||||
potential mention or alias, a list of relevant KB IDs and their prior
|
||||
probabilities is added. The sum of these prior probabilities should never exceed
|
||||
1 for any given alias.
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
from spacy.kb import KnowledgeBase
|
||||
|
||||
# load the model and create an empty KB
|
||||
nlp = spacy.load('en_core_web_sm')
|
||||
kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=3)
|
||||
|
||||
# adding entities
|
||||
kb.add_entity(entity="Q1004791", freq=6, entity_vector=[0, 3, 5])
|
||||
kb.add_entity(entity="Q42", freq=342, entity_vector=[1, 9, -3])
|
||||
kb.add_entity(entity="Q5301561", freq=12, entity_vector=[-2, 4, 2])
|
||||
|
||||
# adding aliases
|
||||
kb.add_alias(alias="Douglas", entities=["Q1004791", "Q42", "Q5301561"], probabilities=[0.6, 0.1, 0.2])
|
||||
kb.add_alias(alias="Douglas Adams", entities=["Q42"], probabilities=[0.9])
|
||||
|
||||
print()
|
||||
print("Number of entities in KB:",kb.get_size_entities()) # 3
|
||||
print("Number of aliases in KB:", kb.get_size_aliases()) # 2
|
||||
```
|
||||
|
||||
### Candidate generation
|
||||
|
||||
Given a textual entity, the knowledge base can provide a list of plausible
|
||||
candidates or entity identifiers. The [`EntityLinker`](/api/entitylinker) will
|
||||
take this list of candidates as input, and disambiguate the mention to the most
|
||||
probable identifier, given the document context.
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
from spacy.kb import KnowledgeBase
|
||||
|
||||
nlp = spacy.load('en_core_web_sm')
|
||||
kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=3)
|
||||
|
||||
# adding entities
|
||||
kb.add_entity(entity="Q1004791", freq=6, entity_vector=[0, 3, 5])
|
||||
kb.add_entity(entity="Q42", freq=342, entity_vector=[1, 9, -3])
|
||||
kb.add_entity(entity="Q5301561", freq=12, entity_vector=[-2, 4, 2])
|
||||
|
||||
# adding aliases
|
||||
kb.add_alias(alias="Douglas", entities=["Q1004791", "Q42", "Q5301561"], probabilities=[0.6, 0.1, 0.2])
|
||||
|
||||
candidates = kb.get_candidates("Douglas")
|
||||
for c in candidates:
|
||||
print(" ", c.entity_, c.prior_prob, c.entity_vector)
|
||||
```
|
||||
|
||||
## Serialization {#serialization}
|
||||
|
||||
import Serialization101 from 'usage/101/\_serialization.md'
|
||||
|
@ -485,384 +411,6 @@ import LanguageData101 from 'usage/101/\_language-data.md'
|
|||
|
||||
<LanguageData101 />
|
||||
|
||||
## Lightning tour {#lightning-tour}
|
||||
|
||||
The following examples and code snippets give you an overview of spaCy's
|
||||
functionality and its usage.
|
||||
|
||||
### Install models and process text {#lightning-tour-models}
|
||||
|
||||
```bash
|
||||
python -m spacy download en_core_web_sm
|
||||
python -m spacy download de_core_news_sm
|
||||
```
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("Hello, world. Here are two sentences.")
|
||||
print([t.text for t in doc])
|
||||
|
||||
nlp_de = spacy.load("de_core_news_sm")
|
||||
doc_de = nlp_de("Ich bin ein Berliner.")
|
||||
print([t.text for t in doc_de])
|
||||
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`spacy.load()`](/api/top-level#spacy.load) **Usage:**
|
||||
[Models](/usage/models), [spaCy 101](/usage/spacy-101)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Get tokens, noun chunks & sentences {#lightning-tour-tokens-sentences model="parser"}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("Peach emoji is where it has always been. Peach is the superior "
|
||||
"emoji. It's outranking eggplant 🍑 ")
|
||||
print(doc[0].text) # 'Peach'
|
||||
print(doc[1].text) # 'emoji'
|
||||
print(doc[-1].text) # '🍑'
|
||||
print(doc[17:19].text) # 'outranking eggplant'
|
||||
|
||||
noun_chunks = list(doc.noun_chunks)
|
||||
print(noun_chunks[0].text) # 'Peach emoji'
|
||||
|
||||
sentences = list(doc.sents)
|
||||
assert len(sentences) == 3
|
||||
print(sentences[1].text) # 'Peach is the superior emoji.'
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`Doc`](/api/doc), [`Token`](/api/token) **Usage:**
|
||||
[spaCy 101](/usage/spacy-101)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Get part-of-speech tags and flags {#lightning-tour-pos-tags model="tagger"}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
|
||||
apple = doc[0]
|
||||
print("Fine-grained POS tag", apple.pos_, apple.pos)
|
||||
print("Coarse-grained POS tag", apple.tag_, apple.tag)
|
||||
print("Word shape", apple.shape_, apple.shape)
|
||||
print("Alphabetic characters?", apple.is_alpha)
|
||||
print("Punctuation mark?", apple.is_punct)
|
||||
|
||||
billion = doc[10]
|
||||
print("Digit?", billion.is_digit)
|
||||
print("Like a number?", billion.like_num)
|
||||
print("Like an email address?", billion.like_email)
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`Token`](/api/token) **Usage:**
|
||||
[Part-of-speech tagging](/usage/linguistic-features#pos-tagging)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Use hash values for any string {#lightning-tour-hashes}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("I love coffee")
|
||||
|
||||
coffee_hash = nlp.vocab.strings["coffee"] # 3197928453018144401
|
||||
coffee_text = nlp.vocab.strings[coffee_hash] # 'coffee'
|
||||
print(coffee_hash, coffee_text)
|
||||
print(doc[2].orth, coffee_hash) # 3197928453018144401
|
||||
print(doc[2].text, coffee_text) # 'coffee'
|
||||
|
||||
beer_hash = doc.vocab.strings.add("beer") # 3073001599257881079
|
||||
beer_text = doc.vocab.strings[beer_hash] # 'beer'
|
||||
print(beer_hash, beer_text)
|
||||
|
||||
unicorn_hash = doc.vocab.strings.add("🦄") # 18234233413267120783
|
||||
unicorn_text = doc.vocab.strings[unicorn_hash] # '🦄'
|
||||
print(unicorn_hash, unicorn_text)
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`StringStore`](/api/stringstore) **Usage:**
|
||||
[Vocab, hashes and lexemes 101](/usage/spacy-101#vocab)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Recognize and update named entities {#lightning-tour-entities model="ner"}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
from spacy.tokens import Span
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("San Francisco considers banning sidewalk delivery robots")
|
||||
for ent in doc.ents:
|
||||
print(ent.text, ent.start_char, ent.end_char, ent.label_)
|
||||
|
||||
doc = nlp("FB is hiring a new VP of global policy")
|
||||
doc.ents = [Span(doc, 0, 1, label="ORG")]
|
||||
for ent in doc.ents:
|
||||
print(ent.text, ent.start_char, ent.end_char, ent.label_)
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**Usage:** [Named entity recognition](/usage/linguistic-features#named-entities)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Train and update neural network models {#lightning-tour-training"}
|
||||
|
||||
```python
|
||||
import random
|
||||
import spacy
|
||||
from spacy.gold import Example
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
train_data = [("Uber blew through $1 million", {"entities": [(0, 4, "ORG")]})]
|
||||
|
||||
with nlp.select_pipes(enable="ner"):
|
||||
optimizer = nlp.begin_training()
|
||||
for i in range(10):
|
||||
random.shuffle(train_data)
|
||||
for text, annotations in train_data:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
nlp.update([example], sgd=optimizer)
|
||||
nlp.to_disk("/model")
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`Language.update`](/api/language#update) **Usage:**
|
||||
[Training spaCy's statistical models](/usage/training)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Visualize a dependency parse and named entities in your browser {#lightning-tour-displacy model="parser, ner" new="2"}
|
||||
|
||||
> #### Output
|
||||
>
|
||||
> ![displaCy visualization](../images/displacy-small.svg)
|
||||
|
||||
```python
|
||||
from spacy import displacy
|
||||
|
||||
doc_dep = nlp("This is a sentence.")
|
||||
displacy.serve(doc_dep, style="dep")
|
||||
|
||||
doc_ent = nlp("When Sebastian Thrun started working on self-driving cars at Google "
|
||||
"in 2007, few people outside of the company took him seriously.")
|
||||
displacy.serve(doc_ent, style="ent")
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`displacy`](/api/top-level#displacy) **Usage:**
|
||||
[Visualizers](/usage/visualizers)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Get word vectors and similarity {#lightning-tour-word-vectors model="vectors"}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load("en_core_web_md")
|
||||
doc = nlp("Apple and banana are similar. Pasta and hippo aren't.")
|
||||
|
||||
apple = doc[0]
|
||||
banana = doc[2]
|
||||
pasta = doc[6]
|
||||
hippo = doc[8]
|
||||
|
||||
print("apple <-> banana", apple.similarity(banana))
|
||||
print("pasta <-> hippo", pasta.similarity(hippo))
|
||||
print(apple.has_vector, banana.has_vector, pasta.has_vector, hippo.has_vector)
|
||||
```
|
||||
|
||||
For the best results, you should run this example using the
|
||||
[`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg) model (currently
|
||||
not available in the live demo).
|
||||
|
||||
<Infobox>
|
||||
|
||||
**Usage:** [Word vectors and similarity](/usage/vectors-embeddings)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Simple and efficient serialization {#lightning-tour-serialization}
|
||||
|
||||
```python
|
||||
import spacy
|
||||
from spacy.tokens import Doc
|
||||
from spacy.vocab import Vocab
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
customer_feedback = open("customer_feedback_627.txt").read()
|
||||
doc = nlp(customer_feedback)
|
||||
doc.to_disk("/tmp/customer_feedback_627.bin")
|
||||
|
||||
new_doc = Doc(Vocab()).from_disk("/tmp/customer_feedback_627.bin")
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`Language`](/api/language), [`Doc`](/api/doc) **Usage:**
|
||||
[Saving and loading models](/usage/saving-loading#models)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Match text with token rules {#lightning-tour-rule-matcher}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
from spacy.matcher import Matcher
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
matcher = Matcher(nlp.vocab)
|
||||
|
||||
def set_sentiment(matcher, doc, i, matches):
|
||||
doc.sentiment += 0.1
|
||||
|
||||
pattern1 = [[{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}]]
|
||||
patterns = [[{"ORTH": emoji, "OP": "+"}] for emoji in ["😀", "😂", "🤣", "😍"]]
|
||||
matcher.add("GoogleIO", patterns1) # Match "Google I/O" or "Google i/o"
|
||||
matcher.add("HAPPY", patterns2, on_match=set_sentiment) # Match one or more happy emoji
|
||||
|
||||
doc = nlp("A text about Google I/O 😀😀")
|
||||
matches = matcher(doc)
|
||||
|
||||
for match_id, start, end in matches:
|
||||
string_id = nlp.vocab.strings[match_id]
|
||||
span = doc[start:end]
|
||||
print(string_id, span.text)
|
||||
print("Sentiment", doc.sentiment)
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`Matcher`](/api/matcher) **Usage:**
|
||||
[Rule-based matching](/usage/rule-based-matching)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Minibatched stream processing {#lightning-tour-minibatched}
|
||||
|
||||
```python
|
||||
texts = ["One document.", "...", "Lots of documents"]
|
||||
# .pipe streams input, and produces streaming output
|
||||
iter_texts = (texts[i % 3] for i in range(100000000))
|
||||
for i, doc in enumerate(nlp.pipe(iter_texts, batch_size=50)):
|
||||
assert doc.is_parsed
|
||||
if i == 100:
|
||||
break
|
||||
```
|
||||
|
||||
### Get syntactic dependencies {#lightning-tour-dependencies model="parser"}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("When Sebastian Thrun started working on self-driving cars at Google "
|
||||
"in 2007, few people outside of the company took him seriously.")
|
||||
|
||||
dep_labels = []
|
||||
for token in doc:
|
||||
while token.head != token:
|
||||
dep_labels.append(token.dep_)
|
||||
token = token.head
|
||||
print(dep_labels)
|
||||
```
|
||||
|
||||
<Infobox>
|
||||
|
||||
**API:** [`Token`](/api/token) **Usage:**
|
||||
[Using the dependency parse](/usage/linguistic-features#dependency-parse)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Export to numpy arrays {#lightning-tour-numpy-arrays}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
from spacy.attrs import ORTH, LIKE_URL
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("Check out https://spacy.io")
|
||||
for token in doc:
|
||||
print(token.text, token.orth, token.like_url)
|
||||
|
||||
attr_ids = [ORTH, LIKE_URL]
|
||||
doc_array = doc.to_array(attr_ids)
|
||||
print(doc_array.shape)
|
||||
print(len(doc), len(attr_ids))
|
||||
|
||||
assert doc[0].orth == doc_array[0, 0]
|
||||
assert doc[1].orth == doc_array[1, 0]
|
||||
assert doc[0].like_url == doc_array[0, 1]
|
||||
|
||||
assert list(doc_array[:, 1]) == [t.like_url for t in doc]
|
||||
print(list(doc_array[:, 1]))
|
||||
```
|
||||
|
||||
### Calculate inline markup on original string {#lightning-tour-inline}
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
def put_spans_around_tokens(doc):
|
||||
"""Here, we're building a custom "syntax highlighter" for
|
||||
part-of-speech tags and dependencies. We put each token in a
|
||||
span element, with the appropriate classes computed. All whitespace is
|
||||
preserved, outside of the spans. (Of course, HTML will only display
|
||||
multiple whitespace if enabled – but the point is, no information is lost
|
||||
and you can calculate what you need, e.g. <br />, <p> etc.)
|
||||
"""
|
||||
output = []
|
||||
for token in doc:
|
||||
if token.is_space:
|
||||
output.append(token.text)
|
||||
else:
|
||||
classes = f"pos-{token.pos_} dep-{token.dep_}"
|
||||
output.append(f'<span class="{classes}">{token.text}</span>{token.whitespace_}')
|
||||
string = "".join(output)
|
||||
string = string.replace("\\n", "")
|
||||
string = string.replace("\\t", " ")
|
||||
return f"<pre>{string}</pre>"
|
||||
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("This is a test.\\n\\nHello world.")
|
||||
html = put_spans_around_tokens(doc)
|
||||
print(html)
|
||||
```
|
||||
|
||||
## Architecture {#architecture}
|
||||
|
||||
import Architecture101 from 'usage/101/\_architecture.md'
|
||||
|
|
|
@ -114,8 +114,9 @@ Some of the main advantages and features of spaCy's training config are:
|
|||
passed into them. You can also register your own functions to define
|
||||
[custom architectures](#custom-models), reference them in your config and
|
||||
tweak their parameters.
|
||||
- **Interpolation.** If you have hyperparameters used by multiple components,
|
||||
define them once and reference them as variables.
|
||||
- **Interpolation.** If you have hyperparameters or other settings used by
|
||||
multiple components, define them once and reference them as
|
||||
[variables](#config-interpolation).
|
||||
- **Reproducibility with no hidden defaults.** The config file is the "single
|
||||
source of truth" and includes all settings. <!-- TODO: explain this better -->
|
||||
- **Automated checks and validation.** When you load a config, spaCy checks if
|
||||
|
@ -307,7 +308,66 @@ compound = 1.001
|
|||
|
||||
### Using variable interpolation {#config-interpolation}
|
||||
|
||||
<!-- TODO: describe and come up with good example showing both values and sections -->
|
||||
Another very useful feature of the config system is that it supports variable
|
||||
interpolation for both **values and sections**. This means that you only need to
|
||||
define a setting once and can reference it across your config using the
|
||||
`${section:value}` or `${section.block}` syntax. In this example, the value of
|
||||
`seed` is reused within the `[training]` block, and the whole block of
|
||||
`[training.optimizer]` is reused in `[pretraining]` and will become
|
||||
`pretraining.optimizer`.
|
||||
|
||||
> #### Note on syntax
|
||||
>
|
||||
> There are two different ways to format your variables, depending on whether
|
||||
> you want to reference a single value or a block. Values are specified after a
|
||||
> `:`, while blocks are specified with a `.`:
|
||||
>
|
||||
> 1. `${section:value}`, `${section.subsection:value}`
|
||||
> 2. `${section.block}`, `${section.subsection.block}`
|
||||
|
||||
```ini
|
||||
### config.cfg (excerpt) {highlight="5,18"}
|
||||
[system]
|
||||
seed = 0
|
||||
|
||||
[training]
|
||||
seed = ${system:seed}
|
||||
|
||||
[training.optimizer]
|
||||
@optimizers = "Adam.v1"
|
||||
beta1 = 0.9
|
||||
beta2 = 0.999
|
||||
L2_is_weight_decay = true
|
||||
L2 = 0.01
|
||||
grad_clip = 1.0
|
||||
use_averages = false
|
||||
eps = 1e-8
|
||||
|
||||
[pretraining]
|
||||
optimizer = ${training.optimizer}
|
||||
```
|
||||
|
||||
You can also use variables inside strings. In that case, it works just like
|
||||
f-strings in Python. If the value of a variable is not a string, it's converted
|
||||
to a string.
|
||||
|
||||
```ini
|
||||
[paths]
|
||||
version = 5
|
||||
root = "/Users/you/data"
|
||||
train = "${paths:root}/train_${paths:version}.spacy"
|
||||
# Result: /Users/you/data/train_5.spacy
|
||||
```
|
||||
|
||||
<Infobox title="Tip: Override variables on the CLI" emoji="💡">
|
||||
|
||||
If you need to change certain values between training runs, you can define them
|
||||
once, reference them as variables and then [override](#config-overrides) them on
|
||||
the CLI. For example, `--paths.root /other/root` will change the value of `root`
|
||||
in the block `[paths]` and the change will be reflected across all other values
|
||||
that reference this variable.
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Model architectures {#model-architectures}
|
||||
|
||||
|
|
|
@ -373,7 +373,7 @@ body [id]:target
|
|||
margin-right: -1.5em
|
||||
margin-left: -1.5em
|
||||
padding-right: 1.5em
|
||||
padding-left: 1.1em
|
||||
padding-left: 1.25em
|
||||
|
||||
&:empty:before
|
||||
// Fix issue where empty lines would disappear
|
||||
|
|
Loading…
Reference in New Issue
Block a user