mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-09 16:58:17 +03:00
Merge branch 'master' into spacy.io
This commit is contained in:
commit
26f92826f0
|
@ -13,6 +13,28 @@ from spacy.lemmatizer import Lemmatizer
|
||||||
from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part
|
from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail
|
||||||
|
def test_issue1061():
|
||||||
|
'''Test special-case works after tokenizing. Was caching problem.'''
|
||||||
|
text = 'I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_.'
|
||||||
|
tokenizer = English.Defaults.create_tokenizer()
|
||||||
|
doc = tokenizer(text)
|
||||||
|
assert 'MATH' in [w.text for w in doc]
|
||||||
|
assert '_MATH_' not in [w.text for w in doc]
|
||||||
|
|
||||||
|
tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}])
|
||||||
|
doc = tokenizer(text)
|
||||||
|
assert '_MATH_' in [w.text for w in doc]
|
||||||
|
assert 'MATH' not in [w.text for w in doc]
|
||||||
|
|
||||||
|
# For sanity, check it works when pipeline is clean.
|
||||||
|
tokenizer = English.Defaults.create_tokenizer()
|
||||||
|
tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}])
|
||||||
|
doc = tokenizer(text)
|
||||||
|
assert '_MATH_' in [w.text for w in doc]
|
||||||
|
assert 'MATH' not in [w.text for w in doc]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail(
|
@pytest.mark.xfail(
|
||||||
reason="g is split of as a unit, as the suffix regular expression can not look back further (variable-width)"
|
reason="g is split of as a unit, as the suffix regular expression can not look back further (variable-width)"
|
||||||
)
|
)
|
||||||
|
|
|
@ -8,6 +8,8 @@ import Icon from './icon'
|
||||||
import classes from '../styles/link.module.sass'
|
import classes from '../styles/link.module.sass'
|
||||||
import { isString } from './util'
|
import { isString } from './util'
|
||||||
|
|
||||||
|
const internalRegex = /(http(s?)):\/\/(prodi.gy|spacy.io|irl.spacy.io)/gi
|
||||||
|
|
||||||
const Whitespace = ({ children }) => (
|
const Whitespace = ({ children }) => (
|
||||||
// Ensure that links are always wrapped in spaces
|
// Ensure that links are always wrapped in spaces
|
||||||
<> {children} </>
|
<> {children} </>
|
||||||
|
@ -68,13 +70,15 @@ const Link = ({
|
||||||
</Wrapper>
|
</Wrapper>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
const isInternal = internalRegex.test(dest)
|
||||||
|
const rel = isInternal ? null : 'noopener nofollow noreferrer'
|
||||||
return (
|
return (
|
||||||
<Wrapper>
|
<Wrapper>
|
||||||
<OutboundLink
|
<OutboundLink
|
||||||
href={dest}
|
href={dest}
|
||||||
className={linkClassNames}
|
className={linkClassNames}
|
||||||
target="_blank"
|
target="_blank"
|
||||||
rel="noopener nofollow noreferrer"
|
rel={rel}
|
||||||
{...other}
|
{...other}
|
||||||
>
|
>
|
||||||
{content}
|
{content}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user