diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index 6914eb8d6..7f2ebe34b 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -1,478 +1,487 @@ -from .structs cimport TokenC, Morphology, PosTag - - -cdef int set_morph_from_dict(Morphology* morph, dict props) except -1 - - -cdef enum Feature: - Abbr - AdpType - AdvType - ConjType - Connegative - Derivation - Echo - Foreign - Gender_dat - Gender_erg - Gender_psor - Hyph - InfForm - NameType - NounType - NumberAbs - NumberDat - NumberErg - NumberPsee - NumberPsor - NumForm - NumValue - PartForm - PartType - Person_abs - Person_dat - Person_psor - Polite - Polite_abs - Polite_dat - Prefix - PrepCase - PunctSide - PunctType - Style - Typo - Variant - VerbType -cpdef enum Animacy: - Anim - Inam - - -cpdef enum Aspect: - Freq - Imp - Mod - None_ - Perf - - -cpdef enum Case1: - Abe - Abl - Abs - Acc - Ade - All - Cau - Com - -cdef enum Case2: - Dat - Del - Dis - Ela - Ess - Gen - Ill - Ine - -cdef enum Case3: - Ins - Loc - Lat - Nom - Par - Sub - Sup - Tem - Ter - - -cdef enum Case4: - Tra - Voc - - -cpdef enum Definite: - Two - Def - Red - Ind - - -cpdef enum Degree: - Cmp - Comp - None_ - Pos - Sup - Abs - Com - Degree # du - - -cpdef enum Gender: - Com - Fem - Masc - Neut - - -cpdef enum Mood: - Cnd - Imp - Ind - N - Pot - Sub - Opt - - -cpdef enum Negative: - Neg - Pos - Yes - - -cpdef enum Number: - Com - Dual - None_ - Plur - Sing - Ptan # bg - Count # bg - - -cpdef enum NumType: - Card - Dist - Frac - Gen - Mult - None_ - Ord - Sets - - -cpdef enum Person: - One - Two - Three - None_ - - -cpdef enum Poss: - Yes - - -cpdef enum PronType1: - AdvPart - Art - Default - Dem - Ind - Int - Neg - -cpdef enum PronType2: - Prs - Rcp - Rel - Tot - Clit - Exc # es, ca, it, fa - Clit # it - - -cpdef enum Reflex: - Yes - - -cpdef enum Tense: - Fut - Imp - Past - Pres - -cpdef enum VerbForm1: - Fin - Ger - Inf - None_ - Part - PartFut - PartPast - -cpdef enum VerbForm2: - PartPres - Sup - Trans - Gdv # la - - -cpdef enum Voice: - Act - Cau - Pass - Mid # gkc - Int # hb - - -cpdef enum Abbr: - Yes # cz, fi, sl, U - -cpdef enum AdpType: - Prep # cz, U - Post # U - Voc # cz - Comprep # cz - Circ # U - Voc # U - - -cpdef enum AdvType1: - # U - Man - Loc - Tim - Deg - Cau - Mod - Sta - Ex - -cpdef enum AdvType2: - Adadj - -cpdef enum ConjType: - Oper # cz, U - Comp # cz, U - -cpdef enum Connegative: - Yes # fi - - -cpdef enum Derivation1: - Minen # fi - Sti # fi - Inen # fi - Lainen # fi - Ja # fi - Ton # fi - Vs # fi - Ttain # fi - -cpdef enum Derivation2: - Ttaa - - -cpdef enum Echo: - Rdp # U - Ech # U - - -cpdef enum Foreign: - Foreign # cz, fi, U - Fscript # cz, fi, U - Tscript # cz, U - Yes # sl - - -cpdef enum Gender_dat: - Masc # bq, U - Fem # bq, U - - -cpdef enum Gender_erg: - Masc # bq - Fem # bq - - -cpdef enum Gender_psor: - Masc # cz, sl, U - Fem # cz, sl, U - Neut # sl - - -cpdef enum Hyph: - Yes # cz, U - - -cpdef enum InfForm: - One # fi - Two # fi - Three # fi - - -cpdef enum NameType: - Geo # U, cz - Prs # U, cz - Giv # U, cz - Sur # U, cz - Nat # U, cz - Com # U, cz - Pro # U, cz - Oth # U, cz - - -cpdef enum NounType: - Com # U - Prop # U - Class # U - -cpdef enum Number_abs: - Sing # bq, U - Plur # bq, U - -cpdef enum Number_dat: - Sing # bq, U - Plur # bq, U - -cpdef enum Number_erg: - Sing # bq, U - Plur # bq, U - -cpdef enum Number_psee: - Sing # U - Plur # U - - -cpdef enum Number_psor: - Sing # cz, fi, sl, U - Plur # cz, fi, sl, U - - -cpdef enum NumForm: - Digit # cz, sl, U - Roman # cz, sl, U - Word # cz, sl, U - - -cpdef enum NumValue: - One # cz, U - Two # cz, U - Three # cz, U - - -cpdef enum PartForm: - Pres # fi - Past # fi - Agt # fi - Neg # fi - - -cpdef enum PartType: - Mod # U - Emp # U - Res # U - Inf # U - Vbp # U - -cpdef enum Person_abs: - One # bq, U - Two # bq, U - Three # bq, U - - -cpdef enum Person_dat: - One # bq, U - Two # bq, U - Three # bq, U - - -cpdef enum Person_erg: - One # bq, U - Two # bq, U - Three # bq, U - - -cpdef enum Person_psor: - One # fi, U - Two # fi, U - Three # fi, U - - -cpdef enum Polite: - Inf # bq, U - Pol # bq, U - - -cpdef enum Polite_abs: - Inf # bq, U - Pol # bq, U - - -cpdef enum Polite_erg: - Inf # bq, U - Pol # bq, U - - -cpdef enum Polite_dat: - Inf # bq, U - Pol # bq, U - - -cpdef enum Prefix: - Yes # U - - -cpdef enum PrepCase: - Npr # cz - Pre # U - - -cpdef enum PunctSide: - Ini # U - Fin # U - -cpdef enum PunctType1: - Peri # U - Qest # U - Excl # U - Quot # U - Brck # U - Comm # U - Colo # U - Semi # U - -cpdef enum PunctType2: - Dash # U - - -cpdef enum Style1: - Arch # cz, fi, U - Rare # cz, fi, U - Poet # cz, U - Norm # cz, U - Coll # cz, U - Vrnc # cz, U - Sing # cz, U - Expr # cz, U - - -cpdef enum Style2: - Derg # cz, U - Vulg # cz, U - - -cpdef enum Typo: - Yes # fi, U - - -cpdef enum Variant: - Short # cz - Bound # cz, sl - - -cpdef enum VerbType: - Aux # U - Cop # U - Mod # U - Light # U - - -cpdef enum FeatureValues: +from .structs cimport TokenC + + +cdef class Morphology: + cdef public object tag_map + cdef public object tag_names + cdef public object tag_ids + cdef public int n_tags + + cdef int assign_tag(self, TokenC* token, int tag) except -1 + + cdef int assign_from_dict(self, TokenC* token, props) except -1 + +# +#cpdef enum Feature_t: +# Abbr +# AdpType +# AdvType +# ConjType +# Connegative +# Derivation +# Echo +# Foreign +# Gender_dat +# Gender_erg +# Gender_psor +# Hyph +# InfForm +# NameType +# NounType +# NumberAbs +# NumberDat +# NumberErg +# NumberPsee +# NumberPsor +# NumForm +# NumValue +# PartForm +# PartType +# Person_abs +# Person_dat +# Person_psor +# Polite +# Polite_abs +# Polite_dat +# Prefix +# PrepCase +# PunctSide +# PunctType +# Style +# Typo +# Variant +# VerbType +# +# +#cpdef enum Animacy: +# Anim +# Inam +# +# +#cpdef enum Aspect: +# Freq +# Imp +# Mod +# None_ +# Perf +# +# +#cpdef enum Case1: +# Nom +# Gen +# Acc +# Dat +# Voc +# Abl +# +#cdef enum Case2: +# Abe +# Abs +# Ade +# All +# Cau +# Com +# Del +# Dis +# +#cdef enum Case3: +# Ela +# Ess +# Ill +# Ine +# Ins +# Loc +# Lat +# Par +# +#cdef enum Case4: +# Sub +# Sup +# Tem +# Ter +# Tra +# +# +#cpdef enum Definite: +# Two +# Def +# Red +# Ind +# +# +#cpdef enum Degree: +# Cmp +# Comp +# None_ +# Pos +# Sup +# Abs +# Com +# Degree # du +# +# +#cpdef enum Gender: +# Com +# Fem +# Masc +# Neut +# +# +#cpdef enum Mood: +# Cnd +# Imp +# Ind +# N +# Pot +# Sub +# Opt +# +# +#cpdef enum Negative: +# Neg +# Pos +# Yes +# +# +#cpdef enum Number: +# Com +# Dual +# None_ +# Plur +# Sing +# Ptan # bg +# Count # bg +# +# +#cpdef enum NumType: +# Card +# Dist +# Frac +# Gen +# Mult +# None_ +# Ord +# Sets +# +# +#cpdef enum Person: +# One +# Two +# Three +# None_ +# +# +#cpdef enum Poss: +# Yes +# +# +#cpdef enum PronType1: +# AdvPart +# Art +# Default +# Dem +# Ind +# Int +# Neg +# +#cpdef enum PronType2: +# Prs +# Rcp +# Rel +# Tot +# Clit +# Exc # es, ca, it, fa +# Clit # it +# +# +#cpdef enum Reflex: +# Yes +# +# +#cpdef enum Tense: +# Fut +# Imp +# Past +# Pres +# +#cpdef enum VerbForm1: +# Fin +# Ger +# Inf +# None_ +# Part +# PartFut +# PartPast +# +#cpdef enum VerbForm2: +# PartPres +# Sup +# Trans +# Gdv # la +# +# +#cpdef enum Voice: +# Act +# Cau +# Pass +# Mid # gkc +# Int # hb +# +# +#cpdef enum Abbr: +# Yes # cz, fi, sl, U +# +#cpdef enum AdpType: +# Prep # cz, U +# Post # U +# Voc # cz +# Comprep # cz +# Circ # U +# Voc # U +# +# +#cpdef enum AdvType1: +# # U +# Man +# Loc +# Tim +# Deg +# Cau +# Mod +# Sta +# Ex +# +#cpdef enum AdvType2: +# Adadj +# +#cpdef enum ConjType: +# Oper # cz, U +# Comp # cz, U +# +#cpdef enum Connegative: +# Yes # fi +# +# +#cpdef enum Derivation1: +# Minen # fi +# Sti # fi +# Inen # fi +# Lainen # fi +# Ja # fi +# Ton # fi +# Vs # fi +# Ttain # fi +# +#cpdef enum Derivation2: +# Ttaa +# +# +#cpdef enum Echo: +# Rdp # U +# Ech # U +# +# +#cpdef enum Foreign: +# Foreign # cz, fi, U +# Fscript # cz, fi, U +# Tscript # cz, U +# Yes # sl +# +# +#cpdef enum Gender_dat: +# Masc # bq, U +# Fem # bq, U +# +# +#cpdef enum Gender_erg: +# Masc # bq +# Fem # bq +# +# +#cpdef enum Gender_psor: +# Masc # cz, sl, U +# Fem # cz, sl, U +# Neut # sl +# +# +#cpdef enum Hyph: +# Yes # cz, U +# +# +#cpdef enum InfForm: +# One # fi +# Two # fi +# Three # fi +# +# +#cpdef enum NameType: +# Geo # U, cz +# Prs # U, cz +# Giv # U, cz +# Sur # U, cz +# Nat # U, cz +# Com # U, cz +# Pro # U, cz +# Oth # U, cz +# +# +#cpdef enum NounType: +# Com # U +# Prop # U +# Class # U +# +#cpdef enum Number_abs: +# Sing # bq, U +# Plur # bq, U +# +#cpdef enum Number_dat: +# Sing # bq, U +# Plur # bq, U +# +#cpdef enum Number_erg: +# Sing # bq, U +# Plur # bq, U +# +#cpdef enum Number_psee: +# Sing # U +# Plur # U +# +# +#cpdef enum Number_psor: +# Sing # cz, fi, sl, U +# Plur # cz, fi, sl, U +# +# +#cpdef enum NumForm: +# Digit # cz, sl, U +# Roman # cz, sl, U +# Word # cz, sl, U +# +# +#cpdef enum NumValue: +# One # cz, U +# Two # cz, U +# Three # cz, U +# +# +#cpdef enum PartForm: +# Pres # fi +# Past # fi +# Agt # fi +# Neg # fi +# +# +#cpdef enum PartType: +# Mod # U +# Emp # U +# Res # U +# Inf # U +# Vbp # U +# +#cpdef enum Person_abs: +# One # bq, U +# Two # bq, U +# Three # bq, U +# +# +#cpdef enum Person_dat: +# One # bq, U +# Two # bq, U +# Three # bq, U +# +# +#cpdef enum Person_erg: +# One # bq, U +# Two # bq, U +# Three # bq, U +# +# +#cpdef enum Person_psor: +# One # fi, U +# Two # fi, U +# Three # fi, U +# +# +#cpdef enum Polite: +# Inf # bq, U +# Pol # bq, U +# +# +#cpdef enum Polite_abs: +# Inf # bq, U +# Pol # bq, U +# +# +#cpdef enum Polite_erg: +# Inf # bq, U +# Pol # bq, U +# +# +#cpdef enum Polite_dat: +# Inf # bq, U +# Pol # bq, U +# +# +#cpdef enum Prefix: +# Yes # U +# +# +#cpdef enum PrepCase: +# Npr # cz +# Pre # U +# +# +#cpdef enum PunctSide: +# Ini # U +# Fin # U +# +#cpdef enum PunctType1: +# Peri # U +# Qest # U +# Excl # U +# Quot # U +# Brck # U +# Comm # U +# Colo # U +# Semi # U +# +#cpdef enum PunctType2: +# Dash # U +# +# +#cpdef enum Style1: +# Arch # cz, fi, U +# Rare # cz, fi, U +# Poet # cz, U +# Norm # cz, U +# Coll # cz, U +# Vrnc # cz, U +# Sing # cz, U +# Expr # cz, U +# +# +#cpdef enum Style2: +# Derg # cz, U +# Vulg # cz, U +# +# +#cpdef enum Typo: +# Yes # fi, U +# +# +#cpdef enum Variant: +# Short # cz +# Bound # cz, sl +# +# +#cpdef enum VerbType: +# Aux # U +# Cop # U +# Mod # U +# Light # U +# + +cpdef enum Value_t: Animacy_Anim Animacy_Inam Aspect_Freq @@ -566,7 +575,6 @@ cpdef enum FeatureValues: PronType_Tot PronType_Clit PronType_Exc # es, ca, it, fa - PronType_Clit # it Reflex_Yes Tense_Fut Tense_Imp @@ -594,7 +602,6 @@ cpdef enum FeatureValues: AdpType_Voc # cz AdpType_Comprep # cz AdpType_Circ # U - AdpType_Voc # U AdvType_Man AdvType_Loc AdvType_Tim @@ -607,16 +614,15 @@ cpdef enum FeatureValues: ConjType_Oper # cz, U ConjType_Comp # cz, U Connegative_Yes # fi - # fi - Derivation_Minen - Derivation_Sti - Derivation_Inen - Derivation_Lainen - Derivation_Ja - Derivation_Ton - Derivation_Vs - Derivation_Ttain - Derivation_Ttaa + Derivation_Minen # fi + Derivation_Sti # fi + Derivation_Inen # fi + Derivation_Lainen # fi + Derivation_Ja # fi + Derivation_Ton # fi + Derivation_Vs # fi + Derivation_Ttain # fi + Derivation_Ttaa # fi Echo_Rdp # U Echo_Ech # U Foreign_Foreign # cz, fi, U @@ -721,5 +727,3 @@ cpdef enum FeatureValues: VerbType_Cop # U VerbType_Mod # U VerbType_Light # U - -