mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'master' into develop
This commit is contained in:
		
						commit
						de33b6d566
					
				| 
						 | 
					@ -105,6 +105,10 @@ class Warnings(object):
 | 
				
			||||||
    W025 = ("'{name}' requires '{attr}' to be assigned, but none of the "
 | 
					    W025 = ("'{name}' requires '{attr}' to be assigned, but none of the "
 | 
				
			||||||
            "previous components in the pipeline declare that they assign it.")
 | 
					            "previous components in the pipeline declare that they assign it.")
 | 
				
			||||||
    W026 = ("Unable to set all sentence boundaries from dependency parses.")
 | 
					    W026 = ("Unable to set all sentence boundaries from dependency parses.")
 | 
				
			||||||
 | 
					    W027 = ("Found a large training file of {size} bytes. Note that it may "
 | 
				
			||||||
 | 
					            "be more efficient to split your training data into multiple "
 | 
				
			||||||
 | 
					            "smaller JSON files instead.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@add_codes
 | 
					@add_codes
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,7 +13,7 @@ import srsly
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .syntax import nonproj
 | 
					from .syntax import nonproj
 | 
				
			||||||
from .tokens import Doc, Span
 | 
					from .tokens import Doc, Span
 | 
				
			||||||
from .errors import Errors, AlignmentError
 | 
					from .errors import Errors, AlignmentError, user_warning, Warnings
 | 
				
			||||||
from .compat import path2str, basestring_
 | 
					from .compat import path2str, basestring_
 | 
				
			||||||
from . import util
 | 
					from . import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -537,12 +537,16 @@ def _json_iterate(loc):
 | 
				
			||||||
    loc = util.ensure_path(loc)
 | 
					    loc = util.ensure_path(loc)
 | 
				
			||||||
    with loc.open("rb") as file_:
 | 
					    with loc.open("rb") as file_:
 | 
				
			||||||
        py_raw = file_.read()
 | 
					        py_raw = file_.read()
 | 
				
			||||||
 | 
					    cdef long file_length = len(py_raw)
 | 
				
			||||||
 | 
					    if file_length > 2 ** 30:
 | 
				
			||||||
 | 
					        user_warning(Warnings.W027.format(size=file_length))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    raw = <char*>py_raw
 | 
					    raw = <char*>py_raw
 | 
				
			||||||
    cdef int square_depth = 0
 | 
					    cdef int square_depth = 0
 | 
				
			||||||
    cdef int curly_depth = 0
 | 
					    cdef int curly_depth = 0
 | 
				
			||||||
    cdef int inside_string = 0
 | 
					    cdef int inside_string = 0
 | 
				
			||||||
    cdef int escape = 0
 | 
					    cdef int escape = 0
 | 
				
			||||||
    cdef int start = -1
 | 
					    cdef long start = -1
 | 
				
			||||||
    cdef char c
 | 
					    cdef char c
 | 
				
			||||||
    cdef char quote = ord('"')
 | 
					    cdef char quote = ord('"')
 | 
				
			||||||
    cdef char backslash = ord("\\")
 | 
					    cdef char backslash = ord("\\")
 | 
				
			||||||
| 
						 | 
					@ -550,7 +554,7 @@ def _json_iterate(loc):
 | 
				
			||||||
    cdef char close_square = ord("]")
 | 
					    cdef char close_square = ord("]")
 | 
				
			||||||
    cdef char open_curly = ord("{")
 | 
					    cdef char open_curly = ord("{")
 | 
				
			||||||
    cdef char close_curly = ord("}")
 | 
					    cdef char close_curly = ord("}")
 | 
				
			||||||
    for i in range(len(py_raw)):
 | 
					    for i in range(file_length):
 | 
				
			||||||
        c = raw[i]
 | 
					        c = raw[i]
 | 
				
			||||||
        if escape:
 | 
					        if escape:
 | 
				
			||||||
            escape = False
 | 
					            escape = False
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,11 +1,12 @@
 | 
				
			||||||
# coding: utf-8
 | 
					# coding: utf-8
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import pytest
 | 
				
			||||||
from spacy.kb import KnowledgeBase
 | 
					from spacy.kb import KnowledgeBase
 | 
				
			||||||
from spacy.util import ensure_path
 | 
					from spacy.util import ensure_path
 | 
				
			||||||
 | 
					 | 
				
			||||||
from spacy.lang.en import English
 | 
					from spacy.lang.en import English
 | 
				
			||||||
from spacy.tests.util import make_tempdir
 | 
					
 | 
				
			||||||
 | 
					from ..tests.util import make_tempdir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_issue4674():
 | 
					def test_issue4674():
 | 
				
			||||||
| 
						 | 
					@ -15,9 +16,12 @@ def test_issue4674():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    vector1 = [0.9, 1.1, 1.01]
 | 
					    vector1 = [0.9, 1.1, 1.01]
 | 
				
			||||||
    vector2 = [1.8, 2.25, 2.01]
 | 
					    vector2 = [1.8, 2.25, 2.01]
 | 
				
			||||||
    kb.set_entities(
 | 
					    with pytest.warns(UserWarning):
 | 
				
			||||||
        entity_list=["Q1", "Q1"], freq_list=[32, 111], vector_list=[vector1, vector2]
 | 
					        kb.set_entities(
 | 
				
			||||||
    )
 | 
					            entity_list=["Q1", "Q1"],
 | 
				
			||||||
 | 
					            freq_list=[32, 111],
 | 
				
			||||||
 | 
					            vector_list=[vector1, vector2],
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert kb.get_size_entities() == 1
 | 
					    assert kb.get_size_entities() == 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user