From 442237787c7b25b38fb441a444f7c780f847f686 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 12 Jan 2017 21:56:52 +0100 Subject: [PATCH] Add assert_docs_equal util to compare two docs --- spacy/tests/util.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/spacy/tests/util.py b/spacy/tests/util.py index a1d6b4c17..c92129166 100644 --- a/spacy/tests/util.py +++ b/spacy/tests/util.py @@ -43,3 +43,23 @@ def apply_transition_sequence(parser, doc, sequence): def get_cosine(vec1, vec2): """Get cosine for two given vectors""" return numpy.dot(vec1, vec2) / (numpy.linalg.norm(vec1) * numpy.linalg.norm(vec2)) + + +def assert_docs_equal(doc1, doc2): + # tokens + assert [ t.orth for t in doc1 ] == [ t.orth for t in doc2 ] + + # tags + assert [ t.pos for t in doc1 ] == [ t.pos for t in doc2 ] + assert [ t.tag for t in doc1 ] == [ t.tag for t in doc2 ] + + # parse + assert [ t.head.i for t in doc1 ] == [ t.head.i for t in doc2 ] + assert [ t.dep for t in doc1 ] == [ t.dep for t in doc2 ] + if doc1.is_parsed and doc2.is_parsed: + assert [ s for s in doc1.sents ] == [ s for s in doc2.sents ] + + # entities + assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ] + assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ] + assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ]