mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
* Improving docs
This commit is contained in:
parent
cab7f63fc2
commit
cbda38e2d9
|
@ -1,5 +1,45 @@
|
||||||
Python API
|
Python API
|
||||||
==========
|
==========
|
||||||
|
|
||||||
Cheat Sheet
|
.. py:currentmodule:: spacy.en
|
||||||
-----------
|
|
||||||
|
To and from unicode strings
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
.. autofunction:: tokenize
|
||||||
|
.. autofunction:: lookup
|
||||||
|
.. autofunction:: unhash
|
||||||
|
|
||||||
|
Access (Hashed) String Views
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. autofunction:: lex_of
|
||||||
|
.. autofunction:: norm_of
|
||||||
|
.. autofunction:: shape_of
|
||||||
|
.. autofunction:: last3_of
|
||||||
|
|
||||||
|
Access String Properties
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
.. autofunction:: length_of
|
||||||
|
.. autofunction:: first_of
|
||||||
|
|
||||||
|
Check Orthographic Flags
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
.. autofunction:: is_alpha
|
||||||
|
.. autofunction:: is_digit
|
||||||
|
.. autofunction:: is_punct
|
||||||
|
.. autofunction:: is_space
|
||||||
|
.. autofunction:: is_lower
|
||||||
|
.. autofunction:: is_upper
|
||||||
|
.. autofunction:: is_title
|
||||||
|
.. autofunction:: is_ascii
|
||||||
|
|
||||||
|
Access Distributional Information
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
.. autofunction:: prob_of
|
||||||
|
.. autofunction:: cluster_of
|
||||||
|
.. autofunction:: check_tag_flag
|
||||||
|
.. autofunction:: check_dist_flag
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
|
import sphinx_rtd_theme
|
||||||
|
|
||||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
|
@ -105,7 +106,8 @@ pygments_style = 'sphinx'
|
||||||
|
|
||||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
# a list of builtin themes.
|
# a list of builtin themes.
|
||||||
html_theme = 'default'
|
|
||||||
|
html_theme = 'sphinx_rtd_theme'
|
||||||
|
|
||||||
# Theme options are theme-specific and customize the look and feel of a theme
|
# Theme options are theme-specific and customize the look and feel of a theme
|
||||||
# further. For a list of options available for each theme, see the
|
# further. For a list of options available for each theme, see the
|
||||||
|
@ -113,7 +115,7 @@ html_theme = 'default'
|
||||||
#html_theme_options = {}
|
#html_theme_options = {}
|
||||||
|
|
||||||
# Add any paths that contain custom themes here, relative to this directory.
|
# Add any paths that contain custom themes here, relative to this directory.
|
||||||
#html_theme_path = []
|
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
|
||||||
|
|
||||||
# The name for this set of Sphinx documents. If None, it defaults to
|
# The name for this set of Sphinx documents. If None, it defaults to
|
||||||
# "<project> v<release> documentation".
|
# "<project> v<release> documentation".
|
||||||
|
@ -166,7 +168,7 @@ html_static_path = ['_static']
|
||||||
#html_split_index = False
|
#html_split_index = False
|
||||||
|
|
||||||
# If true, links to the reST sources are added to the pages.
|
# If true, links to the reST sources are added to the pages.
|
||||||
#html_show_sourcelink = True
|
html_show_sourcelink = False
|
||||||
|
|
||||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||||
#html_show_sphinx = True
|
#html_show_sphinx = True
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
Installation
|
||||||
|
============
|
||||||
|
|
||||||
|
Installation via pip::
|
||||||
|
|
||||||
|
pip install spacy
|
||||||
|
|
||||||
|
Installation From source via `GitHub <https://github.com/honnibal/spaCy>`_, using virtualenv::
|
||||||
|
|
||||||
|
$ git clone http://github.com/honnibal/spaCy.git
|
||||||
|
$ cd spaCy
|
||||||
|
$ virtualenv .env
|
||||||
|
$ source .env/bin/activate
|
||||||
|
$ pip install -r requirements.txt
|
||||||
|
$ fab make
|
||||||
|
$ fab test
|
|
@ -3,81 +3,28 @@
|
||||||
You can adapt this file completely to your liking, but it should at least
|
You can adapt this file completely to your liking, but it should at least
|
||||||
contain the root `toctree` directive.
|
contain the root `toctree` directive.
|
||||||
|
|
||||||
spaCy API Reference
|
spaCy Natural Language Tokenizer
|
||||||
=================================
|
================================
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 3
|
||||||
|
|
||||||
|
guide/overview
|
||||||
|
guide/install
|
||||||
|
api/languages/index.rst
|
||||||
|
api/modules/index.rst
|
||||||
|
|
||||||
api/python
|
Project Home
|
||||||
api/cython
|
|
||||||
api/extending
|
|
||||||
|
|
||||||
Overview
|
|
||||||
--------
|
|
||||||
|
|
||||||
spaCy is a tokenizer for natural languages, tightly coupled to a global
|
|
||||||
vocabulary store.
|
|
||||||
|
|
||||||
Instead of a list of strings, spaCy returns references to lexical types. All
|
|
||||||
of the string-based features you might need are pre-computed for you:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
>>> from spacy import en
|
|
||||||
>>> example = u"Apples aren't oranges..."
|
|
||||||
>>> apples, are, nt, oranges, ellipses = en.tokenize(example)
|
|
||||||
>>> en.is_punct(ellipses)
|
|
||||||
True
|
|
||||||
>>> en.get_string(en.word_shape(apples))
|
|
||||||
'Xxxx'
|
|
||||||
|
|
||||||
You also get lots of distributional features, calculated from a large
|
|
||||||
sample of text:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
>>> en.prob_of(are) > en.prob_of(oranges)
|
|
||||||
True
|
|
||||||
>>> en.can_noun(are)
|
|
||||||
False
|
|
||||||
>>> en.is_oft_title(apples)
|
|
||||||
False
|
|
||||||
|
|
||||||
Pros and Cons
|
|
||||||
-------------
|
|
||||||
|
|
||||||
Pros:
|
|
||||||
|
|
||||||
- All tokens come with indices into the original string
|
|
||||||
- Full unicode support
|
|
||||||
- Extensible to other languages
|
|
||||||
- Batch operations computed efficiently in Cython
|
|
||||||
- Cython API
|
|
||||||
- numpy interoperability
|
|
||||||
|
|
||||||
Cons:
|
|
||||||
|
|
||||||
- It's new (released September 2014)
|
|
||||||
- Higher memory usage (up to 1gb)
|
|
||||||
- More conceptually complicated
|
|
||||||
- Tokenization rules expressed in code, not as data
|
|
||||||
|
|
||||||
Installation
|
|
||||||
------------
|
------------
|
||||||
|
|
||||||
Installation via pip:
|
http://honnibal.github.io/spaCy/
|
||||||
|
|
||||||
pip install spacy
|
Source (GitHub)
|
||||||
|
----------------
|
||||||
|
|
||||||
From source, using virtualenv:
|
http://github.com/honnibal/spaCy
|
||||||
|
|
||||||
::
|
License
|
||||||
|
-------
|
||||||
|
|
||||||
$ git clone http://github.com/honnibal/spaCy.git
|
TODO
|
||||||
$ cd spaCy
|
|
||||||
$ virtualenv .env
|
|
||||||
$ source .env/bin/activate
|
|
||||||
$ pip install -r requirements.txt
|
|
||||||
$ fab make
|
|
||||||
$ fab test
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user