From f97d6e682678ccd62a92b8c7e2b179c549b1c915 Mon Sep 17 00:00:00 2001 From: richardpaulhudson Date: Fri, 4 Nov 2022 12:36:14 +0100 Subject: [PATCH] Updated example config --- spacy/tokens/doc.pyx | 5 ++++- website/docs/api/architectures.md | 14 +++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 20880e528..cbc1bc3c7 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -1782,8 +1782,11 @@ cdef class Doc: the fact that we are hashing short affixes and searching for small groups of characters. The calling code is responsible for ensuring that lengths being passed in cannot exceed 63 and hence that resulting values with a maximum of four-byte character widths can never exceed 255. - """ + Note that this method performs no data validation itself as it expects the calling code will already have done so, and + that the behaviour of the code may be erratic if the supplied parameters do not conform to expectations. + """ + # Work out lengths cdef int p_lengths_l = strlen( p_lengths) cdef int s_lengths_l = strlen( s_lengths) diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md index 78c1ad401..0c77da372 100644 --- a/website/docs/api/architectures.md +++ b/website/docs/api/architectures.md @@ -177,18 +177,18 @@ updated). > ```ini > [model] > @architectures = "spacy.RichMultiHashEmbed.v1" -> width = 64 -> attrs = ["LOWER","SHAPE"] -> rows = [2000,1000] +> width = ${components.tok2vec.model.encode:width} +> attrs = ["LOWER","SHAPE","SPACY"] +> rows = [5000,2500,50] > include_static_vectors = "False" > case_sensitive = "False" > pref_lengths = [2, 3, 5] -> pref_rows = [2000,2000,2000] +> pref_rows = [10000, 10000, 10000] > suff_lengths = [2, 3, 4, 5] -> suff_rows = [2000,2000,2000,2000] -> suff_search_chars = "aeiouäöüyß" +> suff_rows = [10000, 10000,10000,10000] +> suff_search_chars = "aeiouäöüß" > suff_search_lengths = [2, 3] -> suff_search_rows = [2000,2000] +> suff_search_rows = [10000,10000] > ``` Construct an embedding layer with the features of