mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix formatting
This commit is contained in:
parent
5683fd65ed
commit
4b196fdf7f
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
"""
|
"""A simple example of extracting relations between phrases and entities using
|
||||||
A simple example of extracting relations between phrases and entities using
|
|
||||||
spaCy's named entity recognizer and the dependency parse. Here, we extract
|
spaCy's named entity recognizer and the dependency parse. Here, we extract
|
||||||
money and currency values (entities labelled as MONEY) and then check the
|
money and currency values (entities labelled as MONEY) and then check the
|
||||||
dependency tree to find the noun phrase they are referring to – for example:
|
dependency tree to find the noun phrase they are referring to – for example:
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
"""
|
"""This example shows how to navigate the parse tree including subtrees
|
||||||
This example shows how to navigate the parse tree including subtrees attached
|
attached to a word.
|
||||||
to a word.
|
|
||||||
|
|
||||||
Based on issue #252:
|
Based on issue #252:
|
||||||
"In the documents and tutorials the main thing I haven't found is
|
"In the documents and tutorials the main thing I haven't found is
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf8
|
||||||
"""Match a large set of multi-word expressions in O(1) time.
|
"""Match a large set of multi-word expressions in O(1) time.
|
||||||
|
|
||||||
The idea is to associate each word in the vocabulary with a tag, noting whether
|
The idea is to associate each word in the vocabulary with a tag, noting whether
|
||||||
they begin, end, or are inside at least one pattern. An additional tag is used
|
they begin, end, or are inside at least one pattern. An additional tag is used
|
||||||
for single-word patterns. Complete patterns are also stored in a hash set.
|
for single-word patterns. Complete patterns are also stored in a hash set.
|
||||||
|
|
||||||
When we process a document, we look up the words in the vocabulary, to
|
When we process a document, we look up the words in the vocabulary, to
|
||||||
associate the words with the tags. We then search for tag-sequences that
|
associate the words with the tags. We then search for tag-sequences that
|
||||||
correspond to valid candidates. Finally, we look up the candidates in the hash
|
correspond to valid candidates. Finally, we look up the candidates in the hash
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
"""
|
#!/usr/bin/env python
|
||||||
Example of multi-processing with Joblib. Here, we're exporting
|
# coding: utf8
|
||||||
|
"""Example of multi-processing with Joblib. Here, we're exporting
|
||||||
part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with
|
part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with
|
||||||
each "sentence" on a newline, and spaces between tokens. Data is loaded from
|
each "sentence" on a newline, and spaces between tokens. Data is loaded from
|
||||||
the IMDB movie reviews dataset and will be loaded automatically via Thinc's
|
the IMDB movie reviews dataset and will be loaded automatically via Thinc's
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
"""
|
"""Example of training spaCy's named entity recognizer, starting off with an
|
||||||
Example of training spaCy's named entity recognizer, starting off with an
|
|
||||||
existing model or a blank model.
|
existing model or a blank model.
|
||||||
|
|
||||||
For more details, see the documentation:
|
For more details, see the documentation:
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
"""
|
"""Example of training an additional entity type
|
||||||
Example of training an additional entity type
|
|
||||||
|
|
||||||
This script shows how to add a new entity type to an existing pre-trained NER
|
This script shows how to add a new entity type to an existing pre-trained NER
|
||||||
model. To keep the example short and simple, only four sentences are provided
|
model. To keep the example short and simple, only four sentences are provided
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
"""
|
"""Example of training spaCy dependency parser, starting off with an existing
|
||||||
Example of training spaCy dependency parser, starting off with an existing model
|
model or a blank model. For more details, see the documentation:
|
||||||
or a blank model.
|
|
||||||
|
|
||||||
For more details, see the documentation:
|
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* Dependency Parse: https://alpha.spacy.io/usage/linguistic-features#dependency-parse
|
* Dependency Parse: https://alpha.spacy.io/usage/linguistic-features#dependency-parse
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,8 @@
|
||||||
"""
|
"""
|
||||||
A simple example for training a part-of-speech tagger with a custom tag map.
|
A simple example for training a part-of-speech tagger with a custom tag map.
|
||||||
To allow us to update the tag map with our custom one, this example starts off
|
To allow us to update the tag map with our custom one, this example starts off
|
||||||
with a blank Language class and modifies its defaults.
|
with a blank Language class and modifies its defaults. For more details, see
|
||||||
|
the documentation:
|
||||||
For more details, see the documentation:
|
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* POS Tagging: https://alpha.spacy.io/usage/linguistic-features#pos-tagging
|
* POS Tagging: https://alpha.spacy.io/usage/linguistic-features#pos-tagging
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,8 @@
|
||||||
"""Train a multi-label convolutional neural network text classifier on the
|
"""Train a multi-label convolutional neural network text classifier on the
|
||||||
IMDB dataset, using the TextCategorizer component. The dataset will be loaded
|
IMDB dataset, using the TextCategorizer component. The dataset will be loaded
|
||||||
automatically via Thinc's built-in dataset loader. The model is added to
|
automatically via Thinc's built-in dataset loader. The model is added to
|
||||||
spacy.pipeline, and predictions are available via `doc.cats`.
|
spacy.pipeline, and predictions are available via `doc.cats`. For more details,
|
||||||
|
see the documentation:
|
||||||
For more details, see the documentation:
|
|
||||||
* Training: https://alpha.spacy.io/usage/training
|
* Training: https://alpha.spacy.io/usage/training
|
||||||
* Text classification: https://alpha.spacy.io/usage/text-classification
|
* Text classification: https://alpha.spacy.io/usage/text-classification
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,7 @@ import from spacy.language import Language
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
vectors_loc=("Path to vectors", "positional", None, str))
|
vectors_loc=("Path to vectors", "positional", None, str))
|
||||||
def main(vectors_loc):
|
def main(vectors_loc):
|
||||||
nlp = Language()
|
nlp = Language() # start off with a blank Language class
|
||||||
|
|
||||||
with open(vectors_loc, 'rb') as file_:
|
with open(vectors_loc, 'rb') as file_:
|
||||||
header = file_.readline()
|
header = file_.readline()
|
||||||
nr_row, nr_dim = header.split()
|
nr_row, nr_dim = header.split()
|
||||||
|
@ -24,9 +23,11 @@ def main(vectors_loc):
|
||||||
pieces = line.split()
|
pieces = line.split()
|
||||||
word = pieces[0]
|
word = pieces[0]
|
||||||
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
|
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
|
||||||
nlp.vocab.set_vector(word, vector)
|
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
|
||||||
doc = nlp(u'class colspan')
|
# test the vectors and similarity
|
||||||
print(doc[0].similarity(doc[1]))
|
text = 'class colspan'
|
||||||
|
doc = nlp(text)
|
||||||
|
print(text, doc[0].similarity(doc[1]))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue
Block a user