mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
35 lines
998 B
Python
35 lines
998 B
Python
from __future__ import unicode_literals
|
|
|
|
import pytest
|
|
|
|
from spacy import en
|
|
from spacy.lexeme import lex_of
|
|
|
|
from spacy import LEX, NORM, SHAPE, LAST3
|
|
|
|
def test_group_by_lex():
|
|
tokens = en.tokenize("I like the red one and I like the blue one")
|
|
names, hashes, groups = tokens.group_by(LEX)
|
|
|
|
assert len(groups[0]) == 2
|
|
assert en.unhash(lex_of(groups[0][0])) == 'I'
|
|
assert names[0] == 'I'
|
|
assert len(groups[1]) == 2
|
|
assert en.unhash(lex_of(groups[1][0])) == 'like'
|
|
assert names[1] == "like"
|
|
assert len(groups[2]) == 2
|
|
assert len(groups[3]) == 1
|
|
|
|
|
|
def test_group_by_last3():
|
|
tokens = en.tokenize("I the blithe swarthy mate ate on the filthy deck")
|
|
names, hashes, groups = tokens.group_by(LAST3)
|
|
|
|
assert len(groups[0]) == 1
|
|
assert en.unhash(lex_of(groups[0][0])) == 'I'
|
|
assert len(groups[1]) == 3
|
|
assert en.unhash(lex_of(groups[1][0])) == 'the'
|
|
assert len(groups[2]) == 2
|
|
assert len(groups[3]) == 2
|
|
assert len(groups[4]) == 1
|