diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index 4069e018a..86c7fbf72 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -272,3 +272,9 @@ def test_doc_is_nered(en_vocab):
# Test serialization
new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
assert new_doc.is_nered
+
+
+def test_doc_lang(en_vocab):
+ doc = Doc(en_vocab, words=["Hello", "world"])
+ assert doc.lang_ == "en"
+ assert doc.lang == en_vocab.strings["en"]
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 4d3ed084a..857c7b538 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -597,6 +597,16 @@ cdef class Doc:
if start != self.length:
yield Span(self, start, self.length)
+ @property
+ def lang(self):
+ """RETURNS (uint64): ID of the language of the doc's vocabulary."""
+ return self.vocab.strings[self.vocab.lang]
+
+ @property
+ def lang_(self):
+ """RETURNS (unicode): Language of the doc's vocabulary, e.g. 'en'."""
+ return self.vocab.lang
+
cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1:
if self.length == 0:
# Flip these to false when we see the first token.
@@ -748,7 +758,7 @@ cdef class Doc:
# Allow strings, e.g. 'lemma' or 'LEMMA'
attrs = [(IDS[id_.upper()] if hasattr(id_, "upper") else id_)
for id_ in attrs]
-
+
if SENT_START in attrs and HEAD in attrs:
raise ValueError(Errors.E032)
cdef int i, col
diff --git a/website/docs/api/doc.md b/website/docs/api/doc.md
index 953a31c2d..f5a94335f 100644
--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@@ -654,6 +654,8 @@ The L2 norm of the document's vector representation.
| `tensor` 2 | object | Container for dense vector representations. |
| `cats` 2 | dictionary | Maps either a label to a score for categories applied to whole document, or `(start_char, end_char, label)` to score for categories applied to spans. `start_char` and `end_char` should be character offsets, label can be either a string or an integer ID, and score should be a float. |
| `user_data` | - | A generic storage area, for user custom data. |
+| `lang` 2.1 | int | Language of the document's vocabulary. |
+| `lang_` 2.1 | unicode | Language of the document's vocabulary. |
| `is_tagged` | bool | A flag indicating that the document has been part-of-speech tagged. |
| `is_parsed` | bool | A flag indicating that the document has been syntactically parsed. |
| `is_sentenced` | bool | A flag indicating that sentence boundaries have been applied to the document. |