diff --git a/spacy/cli/model.py b/spacy/cli/model.py index 486bbea2f..3b9a77b93 100644 --- a/spacy/cli/model.py +++ b/spacy/cli/model.py @@ -6,9 +6,9 @@ import math from ast import literal_eval from pathlib import Path from preshed.counter import PreshCounter -import ftfy from ..vocab import write_binary_vectors +from ..compat import fix_text from .. import util @@ -77,7 +77,7 @@ def read_clusters(clusters_path): for line in f: try: cluster, word, freq = line.split() - word = ftfy.fix_text(word) + word = fix_text(word) except ValueError: continue # If the clusterer has only seen the word a few times, its diff --git a/spacy/compat.py b/spacy/compat.py index d216994cc..8458df7b0 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import six +import ftfy import sys import ujson @@ -38,6 +39,9 @@ elif is_python3: json_dumps = lambda data: ujson.dumps(data, indent=2) +fix_text = lambda text: ftfy.fix_text(text) + + def symlink_to(orig, dest): if is_python2 and is_windows: import subprocess