Enabling tests/lang/ru/test_lemmatizer.py, fixing a unicode issue (#3084)

<!--- Provide a general summary of your changes in the title. -->

## Description

See #3079. Here I'm merging into `develop` instead of `master`.

### Types of change
<!-- What type of change does your PR cover? Is it a bug fix, an enhancement
or new feature, or a change to the documentation? -->

Bug fix.

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
This commit is contained in:
Kirill Bulygin 2018-12-30 16:10:26 +05:00 committed by Ines Montani
parent 9bc4cc1352
commit b665a32b95
2 changed files with 4 additions and 2 deletions

View File

@ -1,6 +1,9 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals
from ...symbols import ADJ, DET, NOUN, NUM, PRON, PROPN, PUNCT, VERB, POS from ...symbols import ADJ, DET, NOUN, NUM, PRON, PROPN, PUNCT, VERB, POS
from ...lemmatizer import Lemmatizer from ...lemmatizer import Lemmatizer
from ...compat import unicode_
class RussianLemmatizer(Lemmatizer): class RussianLemmatizer(Lemmatizer):
@ -81,7 +84,7 @@ class RussianLemmatizer(Lemmatizer):
@staticmethod @staticmethod
def normalize_univ_pos(univ_pos): def normalize_univ_pos(univ_pos):
if isinstance(univ_pos, str): if isinstance(univ_pos, unicode_):
return univ_pos.upper() return univ_pos.upper()
symbols_to_str = { symbols_to_str = {

View File

@ -39,7 +39,6 @@ def test_ru_lemmatizer_noun_lemmas(ru_lemmatizer, text, lemmas):
assert sorted(ru_lemmatizer.noun(text)) == lemmas assert sorted(ru_lemmatizer.noun(text)) == lemmas
@pytest.mark.models("ru")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"text,pos,morphology,lemma", "text,pos,morphology,lemma",
[ [