diff --git a/website/Makefile b/website/Makefile index 87c9077ec..ef26d410d 100644 --- a/website/Makefile +++ b/website/Makefile @@ -1,12 +1,13 @@ -all: dir site +all: src/code site -dir: - mkdir -p site +src/code: + mkdir -p src/code/ + ./create_code_samples tests/ src/code/ site: site/index.html site/blog/ site/docs/ site/license/ site/blog/introducing-spacy/ site/blog/parsing-english-in-python/ site/blog/part-of-speech-POS-tagger-in-python/ site/tutorials/twitter-filter/ site/tutorials/syntax-search/ site/tutorials/mark-adverbs/ site/blog/writing-c-in-cython/ site/blog/how-spacy-works/ site/index.html: src/jade/header.jade src/jade/*.jade - ./run_jade src/jade/home/index.jade $@ + jade -P src/jade/home/index.jade --out site/ site/docs/: src/jade/docs/*.jade src/jade/header.jade jade -P src/jade/docs/index.jade --out $@ diff --git a/website/create_code_samples b/website/create_code_samples new file mode 100755 index 000000000..a87d843c9 --- /dev/null +++ b/website/create_code_samples @@ -0,0 +1,54 @@ +#!/usr/bin/env python +import sys +import re +import os +import ast + + +src_dirname = sys.argv[1] +dst_dirname = sys.argv[2] +prefix = "test_" + + +for filename in os.listdir(src_dirname): + match = re.match(re.escape(prefix) + r"(.+)\.py", filename) + if not match: + continue + + name = match.group(1) + source = open(os.path.join(src_dirname, filename)).readlines() + tree = ast.parse("".join(source)) + + for item in tree.body: + if isinstance(item, ast.FunctionDef) and item.name.startswith(prefix): + + # only ast.expr and ast.stmt have line numbers, see: + # https://docs.python.org/2/library/ast.html#ast.AST.lineno + line_numbers = [x.lineno for x in ast.iter_child_nodes(item) + if isinstance(x, ast.expr) or + isinstance(x, ast.stmt)] + + body = source[min(line_numbers)-1:max(line_numbers)] + + # make sure we are inside an indented function body + assert all([re.match(r"\s", l[0]) for l in body]) + + offset = 0 + for line in body: + match = re.search(r"[^\s]", line) + if match: + offset = match.start(0) + + # remove indentation + assert offset > 0 + + for i in range(len(body)): + body[i] = body[i][offset:] if len(body[i]) > offset else "\n" + + # make sure empty lines contain a newline + assert all([l[-1] == "\n" for l in body]) + + code_filename = "%s.%s" % (name, item.name[len(prefix):]) + + with open(os.path.join(dst_dirname, code_filename), "w") as f: + f.write("".join(body)) diff --git a/website/run_jade b/website/run_jade deleted file mode 100755 index 83ac87ef7..000000000 --- a/website/run_jade +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env node -'use strict'; - -var fs = require('fs'); -var jade = require('jade'); - -// returns all: code and return value (default) -jade.filters.doctest_all = function (html, _, use_rv) { - use_rv = use_rv === undefined ? true : use_rv; - - var lines = html.trim().split(/\n/), - block = [], - res = ''; - - lines.forEach(function (line) { - if (line.indexOf('>>> ') === 0) { - // we use ### to suppress lines - if (line.indexOf("###") === -1) { - block.push(line.replace(/^>>> /gm, '')); - } - } else if (block.length > 0) { - res += '
' + block.join('\n') + '
';
- block = [];
-
- if (use_rv) {
- res += 'Which produces:
'; - res += '' + line + '
';
- }
- }
- });
-
- if (block.length > 0) {
- res += '' + block.join('\n') + '
';
- }
-
- return res;
-};
-
-// returns only code
-jade.filters.doctest = function (html) {
- return jade.filters.doctest_all(html, null, false);
-};
-
-if (process.argv[0] === "node") {
- process.argv.shift();
-}
-
-var in_file = process.argv[1];
-var out_file = process.argv[2];
-
-var html = jade.renderFile(in_file, {
- pretty: true
-});
-
-fs.writeFile(out_file, html, function (err) {
- if (err) {
- throw err;
- }
-});
diff --git a/website/tests/test_home.py b/website/tests/test_home.py
new file mode 100644
index 000000000..7fad47e6b
--- /dev/null
+++ b/website/tests/test_home.py
@@ -0,0 +1,40 @@
+from __future__ import unicode_literals
+import pytest
+
+
+@pytest.fixture(scope="session")
+def nlp():
+ from spacy.en import English
+ return English()
+
+
+@pytest.fixture()
+def doc(nlp):
+ return nlp('Hello, world. Here are two sentences.')
+
+
+@pytest.fixture()
+def token(doc):
+ return doc[0]
+
+
+def test_load_resources_and_process_text():
+ from spacy.en import English
+ nlp = English()
+ doc = nlp('Hello, world. Here are two sentences.')
+
+
+def test_get_tokens_and_sentences(doc):
+ token = doc[0]
+ sentence = doc.sents.next()
+
+ assert token is sentence[0]
+ assert sentence.text == 'Hello, world.'
+
+
+def test_use_integer_ids_for_any_strings(nlp, token):
+ hello_id = nlp.vocab.strings['Hello']
+ hello_str = nlp.vocab.strings[hello_id]
+
+ assert token.orth == hello_id == 3404
+ assert token.orth_ == hello_str == 'Hello'