Merge branch 'master' into spacy.io

This commit is contained in:
Ines Montani 2019-03-22 15:17:51 +01:00
commit 680eafab94
9 changed files with 276 additions and 18 deletions

View File

@ -4,7 +4,7 @@ preshed>=2.0.1,<2.1.0
thinc>=7.0.2,<7.1.0
blis>=0.2.2,<0.3.0
murmurhash>=0.28.0,<1.1.0
wasabi>=0.1.3,<1.1.0
wasabi>=0.2.0,<1.1.0
srsly>=0.0.5,<1.1.0
# Third party dependencies
numpy>=1.15.0

View File

@ -232,7 +232,7 @@ def setup_package():
"plac<1.0.0,>=0.9.6",
"requests>=2.13.0,<3.0.0",
"jsonschema>=2.6.0,<3.0.0",
"wasabi>=0.0.12,<1.1.0",
"wasabi>=0.2.0,<1.1.0",
"srsly>=0.0.5,<1.1.0",
'pathlib==1.0.1; python_version < "3.4"',
],

View File

@ -4,7 +4,7 @@
# fmt: off
__title__ = "spacy"
__version__ = "2.1.1"
__version__ = "2.1.2"
__summary__ = "Industrial-strength Natural Language Processing (NLP) with Python and Cython"
__uri__ = "https://spacy.io"
__author__ = "Explosion AI"

View File

@ -11,6 +11,7 @@ from __future__ import unicode_literals
import os
import sys
import itertools
import ast
from thinc.neural.util import copy_array
@ -150,3 +151,26 @@ def import_file(name, loc):
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
def unescape_unicode(string):
"""Python2.7's re module chokes when compiling patterns that have ranges
between escaped unicode codepoints if the two codepoints are unrecognised
in the unicode database. For instance:
re.compile('[\\uAA77-\\uAA79]').findall("hello")
Ends up matching every character (on Python 2). This problem doesn't occur
if we're dealing with unicode literals.
"""
if string is None:
return string
# We only want to unescape the unicode, so we first must protect the other
# backslashes.
string = string.replace("\\", "\\\\")
# Now we remove that protection for the unicode.
string = string.replace("\\\\u", "\\u")
string = string.replace("\\\\U", "\\U")
# Now we unescape by evaling the string with the AST. This can't execute
# code -- it only does the representational level.
return ast.literal_eval("u'''" + string + "'''")

View File

@ -1,13 +1,97 @@
# coding: utf8
from __future__ import unicode_literals
from ...symbols import LEMMA, PRON_LEMMA
from ...symbols import LEMMA, PRON_LEMMA, AUX
_subordinating_conjunctions = [
"that",
"if",
"as",
"because",
"of",
"for",
"before",
"in",
"while",
"after",
"since",
"like",
"with",
"so",
"to",
"by",
"on",
"about",
"than",
"whether",
"although",
"from",
"though",
"until",
"unless",
"once",
"without",
"at",
"into",
"cause",
"over",
"upon",
"till",
"whereas",
"beyond",
"whilst",
"except",
"despite",
"wether",
"then",
"but",
"becuse",
"whie",
"below",
"against",
"it",
"w/out",
"toward",
"albeit",
"save",
"besides",
"becouse",
"coz",
"til",
"ask",
"i'd",
"out",
"near",
"seince",
"towards",
"tho",
"sice",
"will",
]
_relative_pronouns = ["this", "that", "those", "these"]
MORPH_RULES = {
"DT": {word: {"POS": "PRON"} for word in _relative_pronouns},
"IN": {word: {"POS": "SCONJ"} for word in _subordinating_conjunctions},
"NN": {
"something": {"POS": "PRON"},
"anyone": {"POS": "PRON"},
"anything": {"POS": "PRON"},
"nothing": {"POS": "PRON"},
"someone": {"POS": "PRON"},
"everything": {"POS": "PRON"},
"everyone": {"POS": "PRON"},
"everybody": {"POS": "PRON"},
"nobody": {"POS": "PRON"},
"somebody": {"POS": "PRON"},
"anybody": {"POS": "PRON"},
"any1": {"POS": "PRON"},
},
"PRP": {
"I": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Sing",
@ -15,14 +99,16 @@ MORPH_RULES = {
},
"me": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Sing",
"Case": "Acc",
},
"you": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two"},
"you": {LEMMA: PRON_LEMMA, "POS": "PRON", "PronType": "Prs", "Person": "Two"},
"he": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -31,6 +117,7 @@ MORPH_RULES = {
},
"him": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -39,6 +126,7 @@ MORPH_RULES = {
},
"she": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -47,6 +135,7 @@ MORPH_RULES = {
},
"her": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -55,6 +144,7 @@ MORPH_RULES = {
},
"it": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -62,6 +152,7 @@ MORPH_RULES = {
},
"we": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Plur",
@ -69,6 +160,7 @@ MORPH_RULES = {
},
"us": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Plur",
@ -76,6 +168,7 @@ MORPH_RULES = {
},
"they": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Plur",
@ -83,6 +176,7 @@ MORPH_RULES = {
},
"them": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Plur",
@ -90,6 +184,7 @@ MORPH_RULES = {
},
"mine": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Sing",
@ -98,6 +193,7 @@ MORPH_RULES = {
},
"his": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -107,6 +203,7 @@ MORPH_RULES = {
},
"hers": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -116,6 +213,7 @@ MORPH_RULES = {
},
"its": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -125,6 +223,7 @@ MORPH_RULES = {
},
"ours": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Plur",
@ -133,6 +232,7 @@ MORPH_RULES = {
},
"yours": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Two",
"Number": "Plur",
@ -141,6 +241,7 @@ MORPH_RULES = {
},
"theirs": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Plur",
@ -149,6 +250,7 @@ MORPH_RULES = {
},
"myself": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Sing",
@ -157,6 +259,7 @@ MORPH_RULES = {
},
"yourself": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Two",
"Case": "Acc",
@ -164,6 +267,7 @@ MORPH_RULES = {
},
"himself": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -173,6 +277,7 @@ MORPH_RULES = {
},
"herself": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -182,6 +287,7 @@ MORPH_RULES = {
},
"itself": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -191,6 +297,7 @@ MORPH_RULES = {
},
"themself": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Sing",
@ -199,6 +306,7 @@ MORPH_RULES = {
},
"ourselves": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "One",
"Number": "Plur",
@ -207,6 +315,7 @@ MORPH_RULES = {
},
"yourselves": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Two",
"Case": "Acc",
@ -214,6 +323,7 @@ MORPH_RULES = {
},
"themselves": {
LEMMA: PRON_LEMMA,
"POS": "PRON",
"PronType": "Prs",
"Person": "Three",
"Number": "Plur",
@ -269,9 +379,17 @@ MORPH_RULES = {
"Poss": "Yes",
},
},
"RB": {word: {"POS": "PART"} for word in ["not", "n't", "nt", "nt"]},
"VB": {
word: {"POS": "AUX"}
for word in ["be", "have", "do", "get", "of", "am", "are", "'ve"]
},
"VBN": {"been": {LEMMA: "be", "POS": "AUX"}},
"VBG": {"being": {LEMMA: "be", "POS": "AUX"}},
"VBZ": {
"am": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Person": "One",
"Tense": "Pres",
@ -279,6 +397,7 @@ MORPH_RULES = {
},
"are": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Person": "Two",
"Tense": "Pres",
@ -286,6 +405,7 @@ MORPH_RULES = {
},
"is": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Person": "Three",
"Tense": "Pres",
@ -293,6 +413,7 @@ MORPH_RULES = {
},
"'re": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Person": "Two",
"Tense": "Pres",
@ -300,26 +421,65 @@ MORPH_RULES = {
},
"'s": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Person": "Three",
"Tense": "Pres",
"Mood": "Ind",
},
"has": {LEMMA: "have", "POS": "AUX"},
"does": {LEMMA: "do", "POS": "AUX"},
},
"VBP": {
"are": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"},
"'re": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"},
"are": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Tense": "Pres",
"Mood": "Ind",
},
"'re": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Tense": "Pres",
"Mood": "Ind",
},
"am": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Person": "One",
"Tense": "Pres",
"Mood": "Ind",
},
"do": {"POS": "AUX"},
"have": {"POS": "AUX"},
"'m": {"POS": "AUX", LEMMA: "be"},
"'ve": {"POS": "AUX"},
"'re": {"POS": "AUX", LEMMA: "be"},
"'s": {"POS": "AUX"},
"is": {"POS": "AUX"},
"'d": {"POS": "AUX"},
},
"VBD": {
"was": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"were": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"},
"was": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Tense": "Past",
"Number": "Sing",
},
"were": {
LEMMA: "be",
"POS": "AUX",
"VerbForm": "Fin",
"Tense": "Past",
"Number": "Plur",
},
"did": {LEMMA: "do", "POS": "AUX"},
"had": {LEMMA: "have", "POS": "AUX"},
"'d": {LEMMA: "have", "POS": "AUX"},
},
}

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX
TAG_MAP = {
@ -20,15 +20,15 @@ TAG_MAP = {
"CC": {POS: CCONJ, "ConjType": "coor"},
"CD": {POS: NUM, "NumType": "card"},
"DT": {POS: DET},
"EX": {POS: ADV, "AdvType": "ex"},
"EX": {POS: PRON, "AdvType": "ex"},
"FW": {POS: X, "Foreign": "yes"},
"HYPH": {POS: PUNCT, "PunctType": "dash"},
"IN": {POS: ADP},
"JJ": {POS: ADJ, "Degree": "pos"},
"JJR": {POS: ADJ, "Degree": "comp"},
"JJS": {POS: ADJ, "Degree": "sup"},
"LS": {POS: PUNCT, "NumType": "ord"},
"MD": {POS: VERB, "VerbType": "mod"},
"LS": {POS: X, "NumType": "ord"},
"MD": {POS: AUX, "VerbType": "mod"},
"NIL": {POS: ""},
"NN": {POS: NOUN, "Number": "sing"},
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
@ -37,11 +37,11 @@ TAG_MAP = {
"PDT": {POS: DET, "AdjType": "pdt", "PronType": "prn"},
"POS": {POS: PART, "Poss": "yes"},
"PRP": {POS: PRON, "PronType": "prs"},
"PRP$": {POS: DET, "PronType": "prs", "Poss": "yes"},
"PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"},
"RB": {POS: ADV, "Degree": "pos"},
"RBR": {POS: ADV, "Degree": "comp"},
"RBS": {POS: ADV, "Degree": "sup"},
"RP": {POS: PART},
"RP": {POS: ADP},
"SP": {POS: SPACE},
"SYM": {POS: SYM},
"TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"},
@ -58,9 +58,9 @@ TAG_MAP = {
"Number": "sing",
"Person": 3,
},
"WDT": {POS: DET, "PronType": "int|rel"},
"WDT": {POS: PRON, "PronType": "int|rel"},
"WP": {POS: PRON, "PronType": "int|rel"},
"WP$": {POS: DET, "Poss": "yes", "PronType": "int|rel"},
"WP$": {POS: PRON, "Poss": "yes", "PronType": "int|rel"},
"WRB": {POS: ADV, "PronType": "int|rel"},
"ADD": {POS: X},
"NFP": {POS: PUNCT},

View File

@ -0,0 +1,70 @@
import pytest
import re
from ... import compat
prefix_search = (
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
b"|^\xe2\x80\xa6|^\xe2\x80\xa6\xe2\x80\xa6|^,|^:|^;|^\\!|^\\?"
b"|^\xc2\xbf|^\xd8\x9f|^\xc2\xa1|^\\(|^\\)|^\\[|^\\]|^\\{|^\\}"
b"|^<|^>|^_|^#|^\\*|^&|^\xe3\x80\x82|^\xef\xbc\x9f|^\xef\xbc\x81|"
b"^\xef\xbc\x8c|^\xe3\x80\x81|^\xef\xbc\x9b|^\xef\xbc\x9a|"
b"^\xef\xbd\x9e|^\xc2\xb7|^\xe0\xa5\xa4|^\xd8\x8c|^\xd8\x9b|"
b"^\xd9\xaa|^\\.\\.+|^\xe2\x80\xa6|^\\'|^\"|^\xe2\x80\x9d|"
b"^\xe2\x80\x9c|^`|^\xe2\x80\x98|^\xc2\xb4|^\xe2\x80\x99|"
b"^\xe2\x80\x9a|^,|^\xe2\x80\x9e|^\xc2\xbb|^\xc2\xab|^\xe3\x80\x8c|"
b"^\xe3\x80\x8d|^\xe3\x80\x8e|^\xe3\x80\x8f|^\xef\xbc\x88|"
b"^\xef\xbc\x89|^\xe3\x80\x94|^\xe3\x80\x95|^\xe3\x80\x90|"
b"^\xe3\x80\x91|^\xe3\x80\x8a|^\xe3\x80\x8b|^\xe3\x80\x88|"
b"^\xe3\x80\x89|^\\$|^\xc2\xa3|^\xe2\x82\xac|^\xc2\xa5|^\xe0\xb8\xbf|"
b"^US\\$|^C\\$|^A\\$|^\xe2\x82\xbd|^\xef\xb7\xbc|^\xe2\x82\xb4|"
b"^[\\u00A6\\u00A9\\u00AE\\u00B0\\u0482\\u058D\\u058E\\u060E\\u060F"
b"\\u06DE\\u06E9\\u06FD\\u06FE\\u07F6\\u09FA\\u0B70\\u0BF3-\\u0BF8"
b"\\u0BFA\\u0C7F\\u0D4F\\u0D79\\u0F01-\\u0F03\\u0F13\\u0F15-\\u0F17"
b"\\u0F1A-\\u0F1F\\u0F34\\u0F36\\u0F38\\u0FBE-\\u0FC5\\u0FC7-\\u0FCC"
b"\\u0FCE\\u0FCF\\u0FD5-\\u0FD8\\u109E\\u109F\\u1390-\\u1399\\u1940"
b"\\u19DE-\\u19FF\\u1B61-\\u1B6A\\u1B74-\\u1B7C\\u2100\\u2101\\u2103"
b"-\\u2106\\u2108\\u2109\\u2114\\u2116\\u2117\\u211E-\\u2123\\u2125"
b"\\u2127\\u2129\\u212E\\u213A\\u213B\\u214A\\u214C\\u214D\\u214F"
b"\\u218A\\u218B\\u2195-\\u2199\\u219C-\\u219F\\u21A1\\u21A2\\u21A4"
b"\\u21A5\\u21A7-\\u21AD\\u21AF-\\u21CD\\u21D0\\u21D1\\u21D3\\u21D5"
b"-\\u21F3\\u2300-\\u2307\\u230C-\\u231F\\u2322-\\u2328\\u232B"
b"-\\u237B\\u237D-\\u239A\\u23B4-\\u23DB\\u23E2-\\u2426\\u2440"
b"-\\u244A\\u249C-\\u24E9\\u2500-\\u25B6\\u25B8-\\u25C0\\u25C2"
b"-\\u25F7\\u2600-\\u266E\\u2670-\\u2767\\u2794-\\u27BF\\u2800"
b"-\\u28FF\\u2B00-\\u2B2F\\u2B45\\u2B46\\u2B4D-\\u2B73\\u2B76"
b"-\\u2B95\\u2B98-\\u2BC8\\u2BCA-\\u2BFE\\u2CE5-\\u2CEA\\u2E80"
b"-\\u2E99\\u2E9B-\\u2EF3\\u2F00-\\u2FD5\\u2FF0-\\u2FFB\\u3004"
b"\\u3012\\u3013\\u3020\\u3036\\u3037\\u303E\\u303F\\u3190\\u3191"
b"\\u3196-\\u319F\\u31C0-\\u31E3\\u3200-\\u321E\\u322A-\\u3247\\u3250"
b"\\u3260-\\u327F\\u328A-\\u32B0\\u32C0-\\u32FE\\u3300-\\u33FF\\u4DC0"
b"-\\u4DFF\\uA490-\\uA4C6\\uA828-\\uA82B\\uA836\\uA837\\uA839\\uAA77"
b"-\\uAA79\\uFDFD\\uFFE4\\uFFE8\\uFFED\\uFFEE\\uFFFC\\uFFFD\\U00010137"
b"-\\U0001013F\\U00010179-\\U00010189\\U0001018C-\\U0001018E"
b"\\U00010190-\\U0001019B\\U000101A0\\U000101D0-\\U000101FC\\U00010877"
b"\\U00010878\\U00010AC8\\U0001173F\\U00016B3C-\\U00016B3F\\U00016B45"
b"\\U0001BC9C\\U0001D000-\\U0001D0F5\\U0001D100-\\U0001D126\\U0001D129"
b"-\\U0001D164\\U0001D16A-\\U0001D16C\\U0001D183\\U0001D184\\U0001D18C"
b"-\\U0001D1A9\\U0001D1AE-\\U0001D1E8\\U0001D200-\\U0001D241\\U0001D245"
b"\\U0001D300-\\U0001D356\\U0001D800-\\U0001D9FF\\U0001DA37-\\U0001DA3A"
b"\\U0001DA6D-\\U0001DA74\\U0001DA76-\\U0001DA83\\U0001DA85\\U0001DA86"
b"\\U0001ECAC\\U0001F000-\\U0001F02B\\U0001F030-\\U0001F093\\U0001F0A0"
b"-\\U0001F0AE\\U0001F0B1-\\U0001F0BF\\U0001F0C1-\\U0001F0CF\\U0001F0D1"
b"-\\U0001F0F5\\U0001F110-\\U0001F16B\\U0001F170-\\U0001F1AC\\U0001F1E6"
b"-\\U0001F202\\U0001F210-\\U0001F23B\\U0001F240-\\U0001F248\\U0001F250"
b"\\U0001F251\\U0001F260-\\U0001F265\\U0001F300-\\U0001F3FA\\U0001F400"
b"-\\U0001F6D4\\U0001F6E0-\\U0001F6EC\\U0001F6F0-\\U0001F6F9\\U0001F700"
b"-\\U0001F773\\U0001F780-\\U0001F7D8\\U0001F800-\\U0001F80B\\U0001F810"
b"-\\U0001F847\\U0001F850-\\U0001F859\\U0001F860-\\U0001F887\\U0001F890"
b"-\\U0001F8AD\\U0001F900-\\U0001F90B\\U0001F910-\\U0001F93E\\U0001F940"
b"-\\U0001F970\\U0001F973-\\U0001F976\\U0001F97A\\U0001F97C-\\U0001F9A2"
b"\\U0001F9B0-\\U0001F9B9\\U0001F9C0-\\U0001F9C2\\U0001F9D0-\\U0001F9FF"
b"\\U0001FA60-\\U0001FA6D]"
)
if compat.is_python2:
# If we have this test in Python 3, pytest chokes, as it can't print the
# string above in the xpass message.
def test_issue3356():
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
assert not pattern.search(u"hello")

View File

@ -14,6 +14,7 @@ import re
from .tokens.doc cimport Doc
from .strings cimport hash_string
from .compat import unescape_unicode
from .errors import Errors, Warnings, deprecation_warning
from . import util
@ -428,6 +429,9 @@ cdef class Tokenizer:
))
exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
msg = util.from_bytes(bytes_data, deserializers, exclude)
for key in ["prefix_search", "suffix_search", "infix_finditer"]:
if key in data:
data[key] = unescape_unicode(data[key])
if data.get("prefix_search"):
self.prefix_search = re.compile(data["prefix_search"]).search
if data.get("suffix_search"):

View File

@ -218,7 +218,7 @@ const Landing = ({ data }) => {
<H2>Benchmarks</H2>
<p>
In 2015, independent researchers from Emory University and Yahoo! Labs
showed that spaCy offered the
showed that spaCy offered the{' '}
<strong>fastest syntactic parser in the world</strong> and that its accuracy
was <strong>within 1% of the best</strong> available (
<Link to="https://aclweb.org/anthology/P/P15/P15-1038.pdf">