Revert "Merge pull request #836 from raphael0202/load_vectors (closes #834)"

This reverts commit 7d8c9eee7f, reversing changes made to f6b69babcc.
2025-11-07 03:17:37 +03:00 · 2017-02-16 15:27:12 +01:00 · 2017-02-16 15:27:12 +01:00 · ea05f78660
commit ea05f78660
parent 0836cbe064
2 changed files with 8 additions and 20 deletions
--- a/spacy/tests/regression/test_issue834.py
+++ b/spacy/tests/regression/test_issue834.py
@ -1,15 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from io import StringIO
 word2vec_str = """, -0.046107 -0.035951 -0.560418
 de -0.648927 -0.400976 -0.527124
 . 0.113685 0.439990 -0.634510
   -1.499184 -0.184280 -0.598371"""
 def test_issue834(en_vocab):
    f = StringIO(word2vec_str)
    vector_length = en_vocab.load_vectors(f)
    assert vector_length == 3
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -1,17 +1,22 @@
 from __future__ import unicode_literals
 from libc.stdio cimport fopen, fclose, fread, fwrite, FILE
 from libc.string cimport memset
 from libc.stdint cimport int32_t
 from libc.stdint cimport uint64_t
 from libc.math cimport sqrt
 from pathlib import Path
 import bz2
 import io
 import math
 import ujson as json
-import re
+import tempfile
 from .lexeme cimport EMPTY_LEXEME
 from .lexeme cimport Lexeme
 from .strings cimport hash_string
 from .orth cimport word_shape
 from .typedefs cimport attr_t
 from .cfile cimport CFile
 from .lemmatizer import Lemmatizer
@ -24,6 +29,7 @@ from . import symbols
 from cymem.cymem cimport Address
 from .serialize.packer cimport Packer
 from .attrs cimport PROB, LANG
 from . import deprecated
 from . import util
@ -471,12 +477,9 @@ cdef class Vocab:
        cdef attr_t orth
        cdef int32_t vec_len = -1
        cdef double norm = 0.0
        whitespace_pattern = re.compile(r'\s')
        for line_num, line in enumerate(file_):
            pieces = line.split()
-            word_str = " " if whitespace_pattern.match(line) else pieces.pop(0)
+            word_str = " " if line.startswith(" ") else pieces.pop(0)
            if vec_len == -1:
                vec_len = len(pieces)
            elif vec_len != len(pieces):