diff --git a/website/docs/api/stringstore.jade b/website/docs/api/stringstore.jade index 0665f6060..969c8a6a5 100644 --- a/website/docs/api/stringstore.jade +++ b/website/docs/api/stringstore.jade @@ -113,9 +113,11 @@ p Add a string to the #[code StringStore]. +aside-code("Example"). stringstore = StringStore([u'apple', u'orange']) - stringstore.add(u'banana') + banana_hash = stringstore.add(u'banana') assert len(stringstore) == 3 - assert stringstore[u'banana'] == 2525716904149915114L + assert banana_hash == 2525716904149915114L + assert stringstore[banana_hash] == u'banana' + assert stringstore[u'banana'] == banana_hash +table(["Name", "Type", "Description"]) +row diff --git a/website/docs/usage/entity-recognition.jade b/website/docs/usage/entity-recognition.jade index f9bfd4df9..f33ef70df 100644 --- a/website/docs/usage/entity-recognition.jade +++ b/website/docs/usage/entity-recognition.jade @@ -52,15 +52,15 @@ p assert ent_san == [u'San', u'B', u'GPE'] assert ent_francisco == [u'Francisco', u'I', u'GPE'] -+table(["Text", "ent_iob", "ent_iob_", "ent_type", "ent_type_", "Description"]) ++table(["Text", "ent_iob", "ent_iob_", "ent_type_", "Description"]) - var style = [0, 1, 1, 1, 1, 0] - +annotation-row(["San", 3, "B", 381, "GPE", "beginning of an entity"], style) - +annotation-row(["Francisco", 1, "I", 381, "GPE", "inside an entity"], style) - +annotation-row(["considers", 2, "O", 0, '""', "outside an entity"], style) - +annotation-row(["banning", 2, "O", 0, '""', "outside an entity"], style) - +annotation-row(["sidewalk", 2, "O", 0, '""', "outside an entity"], style) - +annotation-row(["delivery", 2, "O", 0, '""', "outside an entity"], style) - +annotation-row(["robots", 2, "O", 0, '""', "outside an entity"], style) + +annotation-row(["San", 3, "B", "GPE", "beginning of an entity"], style) + +annotation-row(["Francisco", 1, "I", "GPE", "inside an entity"], style) + +annotation-row(["considers", 2, "O", '""', "outside an entity"], style) + +annotation-row(["banning", 2, "O", '""', "outside an entity"], style) + +annotation-row(["sidewalk", 2, "O", '""', "outside an entity"], style) + +annotation-row(["delivery", 2, "O", '""', "outside an entity"], style) + +annotation-row(["robots", 2, "O", '""', "outside an entity"], style) +h(2, "setting") Setting entity annotations @@ -148,6 +148,8 @@ include ../api/_annotation/_named-entities +h(2, "updating") Training and updating ++under-construction + p | To provide training examples to the entity recogniser, you'll first need | to create an instance of the #[+api("goldparse") #[code GoldParse]] class. @@ -173,7 +175,6 @@ p nlp.tagger(doc) ner.update(doc, gold) - ner.model.end_training() p | If a character offset in your entity annotations don't fall on a token diff --git a/website/docs/usage/lightning-tour.jade b/website/docs/usage/lightning-tour.jade index a87e763a6..f144b4f05 100644 --- a/website/docs/usage/lightning-tour.jade +++ b/website/docs/usage/lightning-tour.jade @@ -53,9 +53,9 @@ p +code. doc = nlp(u'Apple is looking at buying U.K. startup for $1 billion') apple = doc[0] - assert [apple.pos_, apple.pos] == [u'PROPN', 94] - assert [apple.tag_, apple.tag] == [u'NNP', 475] - assert [apple.shape_, apple.shape] == [u'Xxxxx', 684] + assert [apple.pos_, apple.pos] == [u'PROPN', 17049293600679659579L] + assert [apple.tag_, apple.tag] == [u'NNP', 15794550382381185553L] + assert [apple.shape_, apple.shape] == [u'Xxxxx', 16072095006890171862L] assert apple.is_alpha == True assert apple.is_punct == False @@ -78,10 +78,16 @@ p assert doc[2].orth == coffee_hash == 3197928453018144401L assert doc[2].text == coffee_text == u'coffee' - doc.vocab.strings.add(u'beer') - beer_hash = doc.vocab.strings[u'beer'] # 3073001599257881079L + beer_hash = doc.vocab.strings.add(u'beer') # 3073001599257881079L beer_text = doc.vocab.strings[beer_hash] # 'beer' + unicorn_hash = doc.vocab.strings.add(u'🦄 ') # 18234233413267120783L + unicorn_text = doc.vocab.strings[unicorn_hash] # '🦄 ' + ++infobox + | #[strong API:] #[+api("stringstore") #[code stringstore]] + | #[strong Usage:] #[+a("/docs/usage/spacy-101#vocab") Vocab, hashes and lexemes 101] + +h(2, "examples-entities") Recongnise and update named entities +tag-model("NER") diff --git a/website/docs/usage/v2.jade b/website/docs/usage/v2.jade index c28863d6c..7b9f282a6 100644 --- a/website/docs/usage/v2.jade +++ b/website/docs/usage/v2.jade @@ -57,8 +57,9 @@ p assert doc.vocab.strings[u'coffee'] == 3197928453018144401L assert doc.vocab.strings[3197928453018144401L] == u'coffee' - doc.vocab.strings.add(u'beer') - assert doc.vocab.strings[u'beer'] == 3073001599257881079L + beer_hash = doc.vocab.strings.add(u'beer') + assert doc.vocab.strings[u'beer'] == beer_hash + assert doc.vocab.strings[beer_hash] == u'beer' p | The #[+api("stringstore") #[code StringStore]] now resolves all strings