diff --git a/spacy/string_tools.pxd b/spacy/string_tools.pxd index 7f27c19e5..a034bf230 100644 --- a/spacy/string_tools.pxd +++ b/spacy/string_tools.pxd @@ -1,3 +1,7 @@ +cpdef bytes to_bytes(unicode string) + +cpdef unicode from_bytes(bytes string) + cpdef unicode substr(unicode string, int start, int end, size_t length) cdef bint is_whitespace(Py_UNICODE c) diff --git a/spacy/string_tools.pyx b/spacy/string_tools.pyx index 2f199766f..f1854a0b0 100644 --- a/spacy/string_tools.pyx +++ b/spacy/string_tools.pyx @@ -1,6 +1,14 @@ # cython: profile=True +cpdef bytes to_bytes(unicode string): + return string.encode('utf8') + + +cpdef unicode from_bytes(bytes string): + return string.decode('utf8') + + cpdef unicode substr(unicode string, int start, int end, size_t length): if end >= length: end = -1