Try to fix memory error by moving fr_tokenizer to module scope

This commit is contained in:
Matthew Honnibal 2018-07-24 20:09:06 +02:00
parent afe3fa4449
commit 6303ce3d0e
4 changed files with 21 additions and 6 deletions

View File

@ -92,11 +92,6 @@ def de_tokenizer():
return util.get_lang_class('de').Defaults.create_tokenizer() return util.get_lang_class('de').Defaults.create_tokenizer()
@pytest.fixture(scope='session')
def fr_tokenizer():
return util.get_lang_class('fr').Defaults.create_tokenizer()
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def hu_tokenizer(): def hu_tokenizer():
return util.get_lang_class('hu').Defaults.create_tokenizer() return util.get_lang_class('hu').Defaults.create_tokenizer()

View File

@ -3,6 +3,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest import pytest
from .... import util
@pytest.fixture(scope='module')
def fr_tokenizer():
return util.get_lang_class('fr').Defaults.create_tokenizer()
@pytest.mark.parametrize('text', ["aujourd'hui", "Aujourd'hui", "prud'hommes", @pytest.mark.parametrize('text', ["aujourd'hui", "Aujourd'hui", "prud'hommes",
@ -62,4 +67,4 @@ def test_tokenizer_handles_title_2(fr_tokenizer):
tokens = fr_tokenizer(text) tokens = fr_tokenizer(text)
assert len(tokens) == 7 assert len(tokens) == 7
assert tokens[0].text == "Qu'" assert tokens[0].text == "Qu'"
assert tokens[0].lemma_ == "que" assert tokens[0].lemma_ == "que"

View File

@ -1,5 +1,13 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest
from .... import util
@pytest.fixture(scope='module')
def fr_tokenizer():
return util.get_lang_class('fr').Defaults.create_tokenizer()
def test_tokenizer_handles_long_text(fr_tokenizer): def test_tokenizer_handles_long_text(fr_tokenizer):

View File

@ -3,6 +3,13 @@ from __future__ import unicode_literals
import pytest import pytest
from ... import util
@pytest.fixture(scope='module')
def fr_tokenizer():
return util.get_lang_class('fr').Defaults.create_tokenizer()
@pytest.mark.parametrize('text', ["au-delàs", "pair-programmâmes", @pytest.mark.parametrize('text', ["au-delàs", "pair-programmâmes",
"terra-formées", "σ-compacts"]) "terra-formées", "σ-compacts"])