Update UD schemes

2025-10-26 05:31:15 +03:00 · 2017-11-05 18:46:24 +01:00 · 2017-11-05 18:46:24 +01:00 · a6ffa942bb
commit a6ffa942bb
parent 975e1042ff
3 changed files with 68 additions and 23 deletions
--- a/website/_includes/_mixins.jade
+++ b/website/_includes/_mixins.jade
@ -543,10 +543,11 @@ mixin pos-row(tag, pos, morph, desc)
                    |  #[code=m]
        +cell.u-text-small=desc

-mixin univ-pos-row(tag, desc, example)
+mixin ud-row(tag, desc, example)
        +row
            +cell #[code=tag]
            +cell.u-text-small=desc
+            if example
                +cell.u-text-small
                    em=example

--- a/website/api/_annotation/_dep-labels.jade
+++ b/website/api/_annotation/_dep-labels.jade
@ -5,6 +5,50 @@ p
    |  spaCy's #[+a("/models") models]. The individual labels are
    |  language-specific and depend on the training corpus.

+accordion("Universal Dependency Labels")
+    p
+        |  The #[+a("http://universaldependencies.org/u/dep/") Universal Dependencies scheme]
+        |  is used in all languages trained on Universal Dependency Corpora.
+
+    +table(["Dep", "Description"])
+        +ud-row("acl", "clausal modifier of noun (adjectival clause)")
+        +ud-row("advcl", "adverbial clause modifier")
+        +ud-row("advmod", "adverbial modifier")
+        +ud-row("amod", "adjectival modifier")
+        +ud-row("appos", "appositional modifier")
+        +ud-row("aux", "auxiliary")
+        +ud-row("case", "case marking")
+        +ud-row("cc", "coordinating conjunction")
+        +ud-row("ccomp", "clausal complement")
+        +ud-row("clf", "classifier")
+        +ud-row("compound", "compound")
+        +ud-row("conj", "conjunct")
+        +ud-row("cop", "copula")
+        +ud-row("csubj", "clausal subject")
+        +ud-row("dep", "unspecified dependency")
+        +ud-row("det", "determiner")
+        +ud-row("discourse", "discourse element")
+        +ud-row("dislocated", "dislocated elements")
+        +ud-row("expl", "expletive")
+        +ud-row("fixed", "fixed multiword expression")
+        +ud-row("flat", "flat multiword expression")
+        +ud-row("goeswith", "goes with")
+        +ud-row("iobj", "indirect object")
+        +ud-row("list", "list")
+        +ud-row("mark", "marker")
+        +ud-row("nmod", "nominal modifier")
+        +ud-row("nsubj", "nominal subject")
+        +ud-row("nummod", "numeric modifier")
+        +ud-row("obj", "object")
+        +ud-row("obl", "oblique nominal")
+        +ud-row("orphan", "orphan")
+        +ud-row("parataxis", "parataxis")
+        +ud-row("punct", "punctuation")
+        +ud-row("reparandum", "overridden disfluency")
+        +ud-row("root", "root")
+        +ud-row("vocative", "vocative")
+        +ud-row("xcomp", "open clausal complement")
+
 +accordion("English", "dependency-parsing-english")
    p
        |  The English dependency labels use the
--- a/website/api/_annotation/_pos-tags.jade
+++ b/website/api/_annotation/_pos-tags.jade
@ -6,7 +6,7 @@ p
    |  is specific to the training corpus and can be defined in the respective
    |  language data's #[+a("/usage/adding-languages#tag-map") #[code tag_map.py]].

-+accordion("Universal part-of-speech tags")
+accordion("Universal Part-of-speech Tags")
    p
        |  spaCy also maps all language-specific part-of-speech tags to a small,
        |  fixed set of word type tags following the
@ -17,25 +17,25 @@ p
        |  #[+api("token#attributes") #[code Token.pos_]] attributes.

    +table(["POS", "Description", "Examples"])
-        +univ-pos-row("ADJ", "adjective", "big, old, green, incomprehensible, first")
-        +univ-pos-row("ADP", "adposition", "in, to, during")
-        +univ-pos-row("ADV", "adverb", "very, tomorrow, down, where, there")
-        +univ-pos-row("AUX", "auxiliary", "is, has (done), will (do), should (do)")
-        +univ-pos-row("CONJ", "conjunction", "and, or, but")
-        +univ-pos-row("CCONJ", "coordinating conjunction", "and, or, but")
-        +univ-pos-row("DET", "determiner", "a, an, the")
-        +univ-pos-row("INTJ", "interjection", "psst, ouch, bravo, hello")
-        +univ-pos-row("NOUN", "noun", "girl, cat, tree, air, beauty")
-        +univ-pos-row("NUM", "numeral", "1, 2017, one, seventy-seven, IV, MMXIV")
-        +univ-pos-row("PART", "particle", "'s, not, ")
-        +univ-pos-row("PRON", "pronoun", "I, you, he, she, myself, themselves, somebody")
-        +univ-pos-row("PROPN", "proper noun", "Mary, John, Londin, NATO, HBO")
-        +univ-pos-row("PUNCT", "punctuation", "., (, ), ?")
-        +univ-pos-row("SCONJ", "subordinating conjunction", "if, while, that")
-        +univ-pos-row("SYM", "symbol", "$, %, §, ©, +, −, ×, ÷, =, :), 😝")
-        +univ-pos-row("VERB", "verb", "run, runs, running, eat, ate, eating")
-        +univ-pos-row("X", "other", "sfpksdpsxmsa")
-        +univ-pos-row("SPACE", "space", "")
+        +ud-row("ADJ", "adjective", "big, old, green, incomprehensible, first")
+        +ud-row("ADP", "adposition", "in, to, during")
+        +ud-row("ADV", "adverb", "very, tomorrow, down, where, there")
+        +ud-row("AUX", "auxiliary", "is, has (done), will (do), should (do)")
+        +ud-row("CONJ", "conjunction", "and, or, but")
+        +ud-row("CCONJ", "coordinating conjunction", "and, or, but")
+        +ud-row("DET", "determiner", "a, an, the")
+        +ud-row("INTJ", "interjection", "psst, ouch, bravo, hello")
+        +ud-row("NOUN", "noun", "girl, cat, tree, air, beauty")
+        +ud-row("NUM", "numeral", "1, 2017, one, seventy-seven, IV, MMXIV")
+        +ud-row("PART", "particle", "'s, not, ")
+        +ud-row("PRON", "pronoun", "I, you, he, she, myself, themselves, somebody")
+        +ud-row("PROPN", "proper noun", "Mary, John, Londin, NATO, HBO")
+        +ud-row("PUNCT", "punctuation", "., (, ), ?")
+        +ud-row("SCONJ", "subordinating conjunction", "if, while, that")
+        +ud-row("SYM", "symbol", "$, %, §, ©, +, −, ×, ÷, =, :), 😝")
+        +ud-row("VERB", "verb", "run, runs, running, eat, ate, eating")
+        +ud-row("X", "other", "sfpksdpsxmsa")
+        +ud-row("SPACE", "space", "")

 +accordion("English", "pos-en")
    p