mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 21:50:35 +03:00
This reverts commit7d8c9eee7f
, reversing changes made tof6b69babcc
.
This commit is contained in:
parent
0836cbe064
commit
ea05f78660
|
@ -1,15 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
word2vec_str = """, -0.046107 -0.035951 -0.560418
|
|
||||||
de -0.648927 -0.400976 -0.527124
|
|
||||||
. 0.113685 0.439990 -0.634510
|
|
||||||
-1.499184 -0.184280 -0.598371"""
|
|
||||||
|
|
||||||
|
|
||||||
def test_issue834(en_vocab):
|
|
||||||
f = StringIO(word2vec_str)
|
|
||||||
vector_length = en_vocab.load_vectors(f)
|
|
||||||
assert vector_length == 3
|
|
|
@ -1,17 +1,22 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from libc.stdio cimport fopen, fclose, fread, fwrite, FILE
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
from libc.stdint cimport int32_t
|
from libc.stdint cimport int32_t
|
||||||
|
from libc.stdint cimport uint64_t
|
||||||
from libc.math cimport sqrt
|
from libc.math cimport sqrt
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import bz2
|
import bz2
|
||||||
|
import io
|
||||||
|
import math
|
||||||
import ujson as json
|
import ujson as json
|
||||||
import re
|
import tempfile
|
||||||
|
|
||||||
from .lexeme cimport EMPTY_LEXEME
|
from .lexeme cimport EMPTY_LEXEME
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
from .strings cimport hash_string
|
from .strings cimport hash_string
|
||||||
|
from .orth cimport word_shape
|
||||||
from .typedefs cimport attr_t
|
from .typedefs cimport attr_t
|
||||||
from .cfile cimport CFile
|
from .cfile cimport CFile
|
||||||
from .lemmatizer import Lemmatizer
|
from .lemmatizer import Lemmatizer
|
||||||
|
@ -24,6 +29,7 @@ from . import symbols
|
||||||
from cymem.cymem cimport Address
|
from cymem.cymem cimport Address
|
||||||
from .serialize.packer cimport Packer
|
from .serialize.packer cimport Packer
|
||||||
from .attrs cimport PROB, LANG
|
from .attrs cimport PROB, LANG
|
||||||
|
from . import deprecated
|
||||||
from . import util
|
from . import util
|
||||||
|
|
||||||
|
|
||||||
|
@ -471,12 +477,9 @@ cdef class Vocab:
|
||||||
cdef attr_t orth
|
cdef attr_t orth
|
||||||
cdef int32_t vec_len = -1
|
cdef int32_t vec_len = -1
|
||||||
cdef double norm = 0.0
|
cdef double norm = 0.0
|
||||||
|
|
||||||
whitespace_pattern = re.compile(r'\s')
|
|
||||||
|
|
||||||
for line_num, line in enumerate(file_):
|
for line_num, line in enumerate(file_):
|
||||||
pieces = line.split()
|
pieces = line.split()
|
||||||
word_str = " " if whitespace_pattern.match(line) else pieces.pop(0)
|
word_str = " " if line.startswith(" ") else pieces.pop(0)
|
||||||
if vec_len == -1:
|
if vec_len == -1:
|
||||||
vec_len = len(pieces)
|
vec_len = len(pieces)
|
||||||
elif vec_len != len(pieces):
|
elif vec_len != len(pieces):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user