* Delete ununused _split method

2026-02-01 21:16:05 +03:00 · 2014-09-12 04:03:52 +02:00 · 2014-09-12 04:03:52 +02:00 · 8b20e9ad97
commit 8b20e9ad97
parent a4863686ec
2 changed files with 0 additions and 23 deletions
--- a/spacy/lang.pxd
+++ b/spacy/lang.pxd
@ -79,5 +79,4 @@ cdef class Language:
    cpdef Lexeme lookup(self, unicode text)

    cdef int _tokenize(self, Tokens tokens, Py_UNICODE* characters, size_t length) except -1
-    cdef list _split(self, unicode string)
    cdef int _split_one(self, unicode word)
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@ -134,28 +134,6 @@ cdef class Language:
                    node = node.tail
                break

-    cdef list _split(self, unicode string):
-        """Find how to split a contiguous span of non-space characters into substrings.
-
-        This method calls find_split repeatedly. Most languages will want to
-        override _split_one, but it may be useful to override this instead.
-
-        Args:
-            chunk (unicode): The string to be split, e.g. u"Mike's!"
-
-        Returns:
-            substrings (list): The component substrings, e.g. [u"Mike", "'s", "!"].
-        """
-        substrings = []
-        while string:
-            split = self._split_one(string)
-            if split == 0:
-                substrings.append(string)
-                break
-            substrings.append(string[:split])
-            string = string[split:]
-        return substrings
-
    cdef int _split_one(self, unicode word):
        return len(word)