diff --git a/website/Makefile b/website/Makefile index 87c9077ec..ef26d410d 100644 --- a/website/Makefile +++ b/website/Makefile @@ -1,12 +1,13 @@ -all: dir site +all: src/code site -dir: - mkdir -p site +src/code: + mkdir -p src/code/ + ./create_code_samples tests/ src/code/ site: site/index.html site/blog/ site/docs/ site/license/ site/blog/introducing-spacy/ site/blog/parsing-english-in-python/ site/blog/part-of-speech-POS-tagger-in-python/ site/tutorials/twitter-filter/ site/tutorials/syntax-search/ site/tutorials/mark-adverbs/ site/blog/writing-c-in-cython/ site/blog/how-spacy-works/ site/index.html: src/jade/header.jade src/jade/*.jade - ./run_jade src/jade/home/index.jade $@ + jade -P src/jade/home/index.jade --out site/ site/docs/: src/jade/docs/*.jade src/jade/header.jade jade -P src/jade/docs/index.jade --out $@ diff --git a/website/create_code_samples b/website/create_code_samples new file mode 100755 index 000000000..a87d843c9 --- /dev/null +++ b/website/create_code_samples @@ -0,0 +1,54 @@ +#!/usr/bin/env python +import sys +import re +import os +import ast + + +src_dirname = sys.argv[1] +dst_dirname = sys.argv[2] +prefix = "test_" + + +for filename in os.listdir(src_dirname): + match = re.match(re.escape(prefix) + r"(.+)\.py", filename) + if not match: + continue + + name = match.group(1) + source = open(os.path.join(src_dirname, filename)).readlines() + tree = ast.parse("".join(source)) + + for item in tree.body: + if isinstance(item, ast.FunctionDef) and item.name.startswith(prefix): + + # only ast.expr and ast.stmt have line numbers, see: + # https://docs.python.org/2/library/ast.html#ast.AST.lineno + line_numbers = [x.lineno for x in ast.iter_child_nodes(item) + if isinstance(x, ast.expr) or + isinstance(x, ast.stmt)] + + body = source[min(line_numbers)-1:max(line_numbers)] + + # make sure we are inside an indented function body + assert all([re.match(r"\s", l[0]) for l in body]) + + offset = 0 + for line in body: + match = re.search(r"[^\s]", line) + if match: + offset = match.start(0) + + # remove indentation + assert offset > 0 + + for i in range(len(body)): + body[i] = body[i][offset:] if len(body[i]) > offset else "\n" + + # make sure empty lines contain a newline + assert all([l[-1] == "\n" for l in body]) + + code_filename = "%s.%s" % (name, item.name[len(prefix):]) + + with open(os.path.join(dst_dirname, code_filename), "w") as f: + f.write("".join(body)) diff --git a/website/run_jade b/website/run_jade deleted file mode 100755 index 83ac87ef7..000000000 --- a/website/run_jade +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env node -'use strict'; - -var fs = require('fs'); -var jade = require('jade'); - -// returns all: code and return value (default) -jade.filters.doctest_all = function (html, _, use_rv) { - use_rv = use_rv === undefined ? true : use_rv; - - var lines = html.trim().split(/\n/), - block = [], - res = ''; - - lines.forEach(function (line) { - if (line.indexOf('>>> ') === 0) { - // we use ### to suppress lines - if (line.indexOf("###") === -1) { - block.push(line.replace(/^>>> /gm, '')); - } - } else if (block.length > 0) { - res += '
' + block.join('\n') + '
'; - block = []; - - if (use_rv) { - res += '

Which produces:

'; - res += '
' + line + '
'; - } - } - }); - - if (block.length > 0) { - res += '
' + block.join('\n') + '
'; - } - - return res; -}; - -// returns only code -jade.filters.doctest = function (html) { - return jade.filters.doctest_all(html, null, false); -}; - -if (process.argv[0] === "node") { - process.argv.shift(); -} - -var in_file = process.argv[1]; -var out_file = process.argv[2]; - -var html = jade.renderFile(in_file, { - pretty: true -}); - -fs.writeFile(out_file, html, function (err) { - if (err) { - throw err; - } -}); diff --git a/website/tests/test_home.py b/website/tests/test_home.py new file mode 100644 index 000000000..7fad47e6b --- /dev/null +++ b/website/tests/test_home.py @@ -0,0 +1,40 @@ +from __future__ import unicode_literals +import pytest + + +@pytest.fixture(scope="session") +def nlp(): + from spacy.en import English + return English() + + +@pytest.fixture() +def doc(nlp): + return nlp('Hello, world. Here are two sentences.') + + +@pytest.fixture() +def token(doc): + return doc[0] + + +def test_load_resources_and_process_text(): + from spacy.en import English + nlp = English() + doc = nlp('Hello, world. Here are two sentences.') + + +def test_get_tokens_and_sentences(doc): + token = doc[0] + sentence = doc.sents.next() + + assert token is sentence[0] + assert sentence.text == 'Hello, world.' + + +def test_use_integer_ids_for_any_strings(nlp, token): + hello_id = nlp.vocab.strings['Hello'] + hello_str = nlp.vocab.strings[hello_id] + + assert token.orth == hello_id == 3404 + assert token.orth_ == hello_str == 'Hello'