Merge branch 'master' into spacy.io

This commit is contained in:
Ines Montani 2019-09-05 10:41:56 +02:00
commit 26f92826f0
2 changed files with 27 additions and 1 deletions

View File

@ -13,6 +13,28 @@ from spacy.lemmatizer import Lemmatizer
from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part
@pytest.mark.xfail
def test_issue1061():
'''Test special-case works after tokenizing. Was caching problem.'''
text = 'I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_.'
tokenizer = English.Defaults.create_tokenizer()
doc = tokenizer(text)
assert 'MATH' in [w.text for w in doc]
assert '_MATH_' not in [w.text for w in doc]
tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}])
doc = tokenizer(text)
assert '_MATH_' in [w.text for w in doc]
assert 'MATH' not in [w.text for w in doc]
# For sanity, check it works when pipeline is clean.
tokenizer = English.Defaults.create_tokenizer()
tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}])
doc = tokenizer(text)
assert '_MATH_' in [w.text for w in doc]
assert 'MATH' not in [w.text for w in doc]
@pytest.mark.xfail(
reason="g is split of as a unit, as the suffix regular expression can not look back further (variable-width)"
)

View File

@ -8,6 +8,8 @@ import Icon from './icon'
import classes from '../styles/link.module.sass'
import { isString } from './util'
const internalRegex = /(http(s?)):\/\/(prodi.gy|spacy.io|irl.spacy.io)/gi
const Whitespace = ({ children }) => (
// Ensure that links are always wrapped in spaces
<> {children} </>
@ -68,13 +70,15 @@ const Link = ({
</Wrapper>
)
}
const isInternal = internalRegex.test(dest)
const rel = isInternal ? null : 'noopener nofollow noreferrer'
return (
<Wrapper>
<OutboundLink
href={dest}
className={linkClassNames}
target="_blank"
rel="noopener nofollow noreferrer"
rel={rel}
{...other}
>
{content}