From 0f28418446581dd5df8807f44dcfa72371bdbd98 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 4 Sep 2019 20:42:24 +0200 Subject: [PATCH 1/2] Add regression test for #1061 back to test suite --- spacy/tests/regression/test_issue1001-1500.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/spacy/tests/regression/test_issue1001-1500.py b/spacy/tests/regression/test_issue1001-1500.py index 15da1061b..9074b34b7 100644 --- a/spacy/tests/regression/test_issue1001-1500.py +++ b/spacy/tests/regression/test_issue1001-1500.py @@ -13,6 +13,28 @@ from spacy.lemmatizer import Lemmatizer from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part +@pytest.mark.xfail +def test_issue1061(): + '''Test special-case works after tokenizing. Was caching problem.''' + text = 'I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_.' + tokenizer = English.Defaults.create_tokenizer() + doc = tokenizer(text) + assert 'MATH' in [w.text for w in doc] + assert '_MATH_' not in [w.text for w in doc] + + tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}]) + doc = tokenizer(text) + assert '_MATH_' in [w.text for w in doc] + assert 'MATH' not in [w.text for w in doc] + + # For sanity, check it works when pipeline is clean. + tokenizer = English.Defaults.create_tokenizer() + tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}]) + doc = tokenizer(text) + assert '_MATH_' in [w.text for w in doc] + assert 'MATH' not in [w.text for w in doc] + + @pytest.mark.xfail( reason="g is split of as a unit, as the suffix regular expression can not look back further (variable-width)" ) From 232a029de68e7e238dfb066a74185ae78c75da49 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 5 Sep 2019 10:41:46 +0200 Subject: [PATCH 2/2] Send referrer for internal links [ci skip] --- website/src/components/link.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/website/src/components/link.js b/website/src/components/link.js index 35b3c23cf..4c4aa9492 100644 --- a/website/src/components/link.js +++ b/website/src/components/link.js @@ -8,6 +8,8 @@ import Icon from './icon' import classes from '../styles/link.module.sass' import { isString } from './util' +const internalRegex = /(http(s?)):\/\/(prodi.gy|spacy.io|irl.spacy.io)/gi + const Whitespace = ({ children }) => ( // Ensure that links are always wrapped in spaces <> {children} @@ -68,13 +70,15 @@ const Link = ({ ) } + const isInternal = internalRegex.test(dest) + const rel = isInternal ? null : 'noopener nofollow noreferrer' return ( {content}