mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						44bce8e53f
					
				
							
								
								
									
										106
									
								
								.github/contributors/jimregan.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								.github/contributors/jimregan.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,106 @@
 | 
				
			||||||
 | 
					# spaCy contributor agreement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This spaCy Contributor Agreement (**"SCA"**) is based on the
 | 
				
			||||||
 | 
					[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
 | 
				
			||||||
 | 
					The SCA applies to any contribution that you make to any product or project
 | 
				
			||||||
 | 
					managed by us (the **"project"**), and sets out the intellectual property rights
 | 
				
			||||||
 | 
					you grant to us in the contributed materials. The term **"us"** shall mean
 | 
				
			||||||
 | 
					[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
 | 
				
			||||||
 | 
					**"you"** shall mean the person or entity identified below.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you agree to be bound by these terms, fill in the information requested
 | 
				
			||||||
 | 
					below and include the filled-in version with your first pull request, under the
 | 
				
			||||||
 | 
					folder [`.github/contributors/`](/.github/contributors/). The name of the file
 | 
				
			||||||
 | 
					should be your GitHub username, with the extension `.md`. For example, the user
 | 
				
			||||||
 | 
					example_user would create the file `.github/contributors/example_user.md`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Read this agreement carefully before signing. These terms and conditions
 | 
				
			||||||
 | 
					constitute a binding legal agreement.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Contributor Agreement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. The term "contribution" or "contributed materials" means any source code,
 | 
				
			||||||
 | 
					object code, patch, tool, sample, graphic, specification, manual,
 | 
				
			||||||
 | 
					documentation, or any other material posted or submitted by you to the project.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. With respect to any worldwide copyrights, or copyright applications and
 | 
				
			||||||
 | 
					registrations, in your contribution:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you hereby assign to us joint ownership, and to the extent that such
 | 
				
			||||||
 | 
					    assignment is or becomes invalid, ineffective or unenforceable, you hereby
 | 
				
			||||||
 | 
					    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
 | 
				
			||||||
 | 
					    royalty-free, unrestricted license to exercise all rights under those
 | 
				
			||||||
 | 
					    copyrights. This includes, at our option, the right to sublicense these same
 | 
				
			||||||
 | 
					    rights to third parties through multiple levels of sublicensees or other
 | 
				
			||||||
 | 
					    licensing arrangements;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that each of us can do all things in relation to your
 | 
				
			||||||
 | 
					    contribution as if each of us were the sole owners, and if one of us makes
 | 
				
			||||||
 | 
					    a derivative work of your contribution, the one who makes the derivative
 | 
				
			||||||
 | 
					    work (or has it made will be the sole owner of that derivative work;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that you will not assert any moral rights in your contribution
 | 
				
			||||||
 | 
					    against us, our licensees or transferees;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that we may register a copyright in your contribution and
 | 
				
			||||||
 | 
					    exercise all ownership rights associated with it; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that neither of us has any duty to consult with, obtain the
 | 
				
			||||||
 | 
					    consent of, pay or render an accounting to the other for any use or
 | 
				
			||||||
 | 
					    distribution of your contribution.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. With respect to any patents you own, or that you can license without payment
 | 
				
			||||||
 | 
					to any third party, you hereby grant to us a perpetual, irrevocable,
 | 
				
			||||||
 | 
					non-exclusive, worldwide, no-charge, royalty-free license to:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * make, have made, use, sell, offer to sell, import, and otherwise transfer
 | 
				
			||||||
 | 
					    your contribution in whole or in part, alone or in combination with or
 | 
				
			||||||
 | 
					    included in any product, work or materials arising out of the project to
 | 
				
			||||||
 | 
					    which your contribution was submitted, and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * at our option, to sublicense these same rights to third parties through
 | 
				
			||||||
 | 
					    multiple levels of sublicensees or other licensing arrangements.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					4. Except as set out above, you keep all right, title, and interest in your
 | 
				
			||||||
 | 
					contribution. The rights that you grant to us under these terms are effective
 | 
				
			||||||
 | 
					on the date you first submitted a contribution to us, even if your submission
 | 
				
			||||||
 | 
					took place before the date you sign these terms.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					5. You covenant, represent, warrant and agree that:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * Each contribution that you submit is and shall be an original work of
 | 
				
			||||||
 | 
					    authorship and you can legally grant the rights set out in this SCA;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * to the best of your knowledge, each contribution will not violate any
 | 
				
			||||||
 | 
					    third party's copyrights, trademarks, patents, or other intellectual
 | 
				
			||||||
 | 
					    property rights; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * each contribution shall be in compliance with U.S. export control laws and
 | 
				
			||||||
 | 
					    other applicable export and import laws. You agree to notify us if you
 | 
				
			||||||
 | 
					    become aware of any circumstance which would make any of the foregoing
 | 
				
			||||||
 | 
					    representations inaccurate in any respect. We may publicly disclose your 
 | 
				
			||||||
 | 
					    participation in the project, including the fact that you have signed the SCA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					6. This SCA is governed by the laws of the State of California and applicable
 | 
				
			||||||
 | 
					U.S. Federal law. Any choice of law rules will not apply.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					7. Please place an “x” on one of the applicable statement below. Please do NOT
 | 
				
			||||||
 | 
					mark both statements:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * [x] I am signing on behalf of myself as an individual and no other person
 | 
				
			||||||
 | 
					    or entity, including my employer, has or will have rights with respect my
 | 
				
			||||||
 | 
					    contributions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * [ ] I am signing on behalf of my employer or a legal entity and I have the
 | 
				
			||||||
 | 
					    actual authority to contractually bind that entity.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Contributor Details
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					| Field                          | Entry                |
 | 
				
			||||||
 | 
					|------------------------------- | -------------------- |
 | 
				
			||||||
 | 
					| Name                           | Jim O'Regan          |
 | 
				
			||||||
 | 
					| Company name (if applicable)   |                      |
 | 
				
			||||||
 | 
					| Title or role (if applicable)  |                      |
 | 
				
			||||||
 | 
					| Date                           | 2017-06-24           |
 | 
				
			||||||
 | 
					| GitHub username                | jimregan             |
 | 
				
			||||||
 | 
					| Website (optional)             |                      |
 | 
				
			||||||
							
								
								
									
										25
									
								
								spacy/lang/ga/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								spacy/lang/ga/__init__.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,25 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 | 
				
			||||||
 | 
					from .stop_words import STOP_WORDS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
				
			||||||
 | 
					from ...language import Language
 | 
				
			||||||
 | 
					from ...attrs import LANG
 | 
				
			||||||
 | 
					from ...util import update_exc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class IrishDefaults(Language.Defaults):
 | 
				
			||||||
 | 
					    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
 | 
				
			||||||
 | 
					    lex_attr_getters[LANG] = lambda text: 'ga'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
 | 
				
			||||||
 | 
					    stop_words = set(STOP_WORDS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Irish(Language):
 | 
				
			||||||
 | 
					    lang = 'ga'
 | 
				
			||||||
 | 
					    Defaults = IrishDefaults
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__all__ = ['Irish']
 | 
				
			||||||
							
								
								
									
										33
									
								
								spacy/lang/ga/irish_morphology_helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								spacy/lang/ga/irish_morphology_helpers.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,33 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class IrishMorph:
 | 
				
			||||||
 | 
					    consonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z']
 | 
				
			||||||
 | 
					    broad_vowels = ['a', 'á', 'o', 'ó', 'u', 'ú']
 | 
				
			||||||
 | 
					    slender_vowels = ['e', 'é', 'i', 'í']
 | 
				
			||||||
 | 
					    vowels = broad_vowels + slender_vowels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def ends_dentals(word):
 | 
				
			||||||
 | 
					        if word != "" and word[-1] in ['d', 'n', 't', 's']:
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def devoice(word):
 | 
				
			||||||
 | 
					        if len(word) > 2 and word[-2] == 's' and word[-1] == 'd':
 | 
				
			||||||
 | 
					            return word[:-1] + 't'
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return word
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def ends_with_vowel(word):
 | 
				
			||||||
 | 
					        return word != "" and word[-1] in vowels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def starts_with_vowel(word):
 | 
				
			||||||
 | 
					        return word != "" and word[0] in vowels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def deduplicate(word):
 | 
				
			||||||
 | 
					        if len(word) > 2 and word[-2] == word[-1] and word[-1] in consonants:
 | 
				
			||||||
 | 
					            return word[:-1]
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return word
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										45
									
								
								spacy/lang/ga/stop_words.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								spacy/lang/ga/stop_words.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,45 @@
 | 
				
			||||||
 | 
					# encoding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					STOP_WORDS = set("""
 | 
				
			||||||
 | 
					a ach ag agus an aon ar arna as
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ba beirt bhúr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					caoga ceathair ceathrar chomh chuig chun cois céad cúig cúigear
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					daichead dar de deich deichniúr den dhá do don dtí dá dár dó
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					faoi faoin faoina faoinár fara fiche
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					gach gan go gur
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					haon hocht
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					i iad idir in ina ins inár is
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					le leis lena lenár
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mar mo muid mé
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					na nach naoi naonúr ná ní níor nó nócha
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ocht ochtar ochtó os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					roimh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sa seacht seachtar seachtó seasca seisear siad sibh sinn sna sé sí
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					tar thar thú triúr trí trína trínár tríocha tú
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					um
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ár
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					é éis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					í
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ó ón óna ónár
 | 
				
			||||||
 | 
					""".split())
 | 
				
			||||||
							
								
								
									
										368
									
								
								spacy/lang/ga/tag_map.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										368
									
								
								spacy/lang/ga/tag_map.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,368 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TAG_MAP = {
 | 
				
			||||||
 | 
					    "ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADJ__Case=Gen|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "ADJ__Case=Gen|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "strong"}},
 | 
				
			||||||
 | 
					    "ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "weak"}},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "notslender"}},
 | 
				
			||||||
 | 
					    "ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "slender"}},
 | 
				
			||||||
 | 
					    "ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADJ__Degree=Cmp,Sup": {"pos": "ADJ", "Degree": "cmp|sup"},
 | 
				
			||||||
 | 
					    "ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADJ__Degree=Pos": {"pos": "ADJ", "Degree": "pos"},
 | 
				
			||||||
 | 
					    "ADJ__Foreign=Yes": {"pos": "ADJ", "Foreign": "yes"},
 | 
				
			||||||
 | 
					    "ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADJ__Gender=Masc|Number=Sing|PartType=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
 | 
				
			||||||
 | 
					    "ADJ__Gender=Masc|Number=Sing|Case=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
 | 
				
			||||||
 | 
					    "ADJ__Number=Plur|PartType=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"},
 | 
				
			||||||
 | 
					    "ADJ__Number=Plur|Case=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"},
 | 
				
			||||||
 | 
					    "ADJ__Number=Plur": {"pos": "ADJ", "Number": "plur"},
 | 
				
			||||||
 | 
					    "ADJ___": {"pos": "ADJ"},
 | 
				
			||||||
 | 
					    "ADJ__VerbForm=Part": {"pos": "ADJ", "VerbForm": "part"},
 | 
				
			||||||
 | 
					    "ADP__Foreign=Yes": {"pos": "ADP", "Foreign": "yes"},
 | 
				
			||||||
 | 
					    "ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADP__Gender=Fem|Number=Sing|Person=3": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3},
 | 
				
			||||||
 | 
					    "ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"},
 | 
				
			||||||
 | 
					    "ADP__Gender=Masc|Number=Sing|Person=3": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3},
 | 
				
			||||||
 | 
					    "ADP__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "ADP__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"},
 | 
				
			||||||
 | 
					    "ADP__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=1|Poss=Yes": {"pos": "ADP", "Number": "plur", "Person": 1, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=1|PronType=Emp": {"pos": "ADP", "Number": "plur", "Person": 1, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=2": {"pos": "ADP", "Number": "plur", "Person": 2},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=3|Poss=Yes": {"pos": "ADP", "Number": "plur", "Person": 3, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Number": "plur", "Person": 3, "Poss": "yes", "PronType": "prs"},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|Person=3|PronType=Emp": {"pos": "ADP", "Number": "plur", "Person": 3, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "ADP__Number=Plur|PronType=Art": {"pos": "ADP", "Number": "plur", "PronType": "art"},
 | 
				
			||||||
 | 
					    "ADP__Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1},
 | 
				
			||||||
 | 
					    "ADP__Number=Sing|Person=1|Poss=Yes": {"pos": "ADP", "Number": "sing", "Person": 1, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "ADP__Number=Sing|Person=1|PronType=Emp": {"pos": "ADP", "Number": "sing", "Person": 1, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "ADP__Number=Sing|Person=2": {"pos": "ADP", "Number": "sing", "Person": 2},
 | 
				
			||||||
 | 
					    "ADP__Number=Sing|Person=3": {"pos": "ADP", "Number": "sing", "Person": 3},
 | 
				
			||||||
 | 
					    "ADP__Number=Sing|PronType=Art": {"pos": "ADP", "Number": "sing", "PronType": "art"},
 | 
				
			||||||
 | 
					    "ADP__Person=3|Poss=Yes": {"pos": "ADP", "Person": 3, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "ADP___": {"pos": "ADP"},
 | 
				
			||||||
 | 
					    "ADP__Poss=Yes": {"pos": "ADP", "Poss": "yes"},
 | 
				
			||||||
 | 
					    "ADP__PrepForm=Cmpd": {"pos": "ADP", "Other": {"PrepForm": "cmpd"}},
 | 
				
			||||||
 | 
					    "ADP__PronType=Art": {"pos": "ADP", "PronType": "art"},
 | 
				
			||||||
 | 
					    "ADV__Form=Len": {"pos": "ADV", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "ADV___": {"pos": "ADV"},
 | 
				
			||||||
 | 
					    "ADV__PronType=Int": {"pos": "ADV", "PronType": "int"},
 | 
				
			||||||
 | 
					    "AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Form": "vf", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Other": {"Mood": "int", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Mood": "int", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"PartType": "comp", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX___": {"pos": "AUX"},
 | 
				
			||||||
 | 
					    "AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "AUX__VerbForm=Cop": {"pos": "AUX", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "CCONJ___": {"pos": "CCONJ"},
 | 
				
			||||||
 | 
					    "DET__Case=Gen|Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
 | 
				
			||||||
 | 
					    "DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
 | 
				
			||||||
 | 
					    "DET__Definite=Def|Number=Plur|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "plur", "PronType": "art"},
 | 
				
			||||||
 | 
					    "DET__Definite=Def|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "sing", "PronType": "art"},
 | 
				
			||||||
 | 
					    "DET__Definite=Def": {"pos": "DET", "Definite": "def"},
 | 
				
			||||||
 | 
					    "DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "DET__Number=Plur|Person=1|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 1, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "DET__Number=Plur|Person=3|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 3, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "DET__Number=Sing|Person=1|Poss=Yes": {"pos": "DET", "Number": "sing", "Person": 1, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "DET__Number=Sing|Person=2|Poss=Yes": {"pos": "DET", "Number": "sing", "Person": 2, "Poss": "yes"},
 | 
				
			||||||
 | 
					    "DET__Number=Sing|PronType=Int": {"pos": "DET", "Number": "sing", "PronType": "int"},
 | 
				
			||||||
 | 
					    "DET___": {"pos": "DET"},
 | 
				
			||||||
 | 
					    "DET__PronType=Dem": {"pos": "DET", "PronType": "dem"},
 | 
				
			||||||
 | 
					    "DET__PronType=Ind": {"pos": "DET", "PronType": "ind"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Dat|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Definite": "ind", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Dat|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "ind", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "weak"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "strong"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "weak"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "weak"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|Number=Sing": {"pos": "NOUN", "Case": "gen", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Gen|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Definite=Def|Gender=Fem": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "fem"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Definite=Ind|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "ind", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "emp"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Voc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Definite": "def", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Degree=Pos": {"pos": "NOUN", "Degree": "pos"},
 | 
				
			||||||
 | 
					    "NOUN__Foreign=Yes": {"pos": "NOUN", "Foreign": "yes"},
 | 
				
			||||||
 | 
					    "NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NOUN__Gender=Fem|Number=Sing": {"pos": "NOUN", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "Other": {"PartType": "comp"}},
 | 
				
			||||||
 | 
					    "NOUN__Number=Sing": {"pos": "NOUN", "Number": "sing"},
 | 
				
			||||||
 | 
					    "NOUN___": {"pos": "NOUN"},
 | 
				
			||||||
 | 
					    "NOUN__Reflex=Yes": {"pos": "NOUN", "Reflex": "yes"},
 | 
				
			||||||
 | 
					    "NOUN__VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf"},
 | 
				
			||||||
 | 
					    "NOUN__VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun"},
 | 
				
			||||||
 | 
					    "NUM__Definite=Def|NumType=Card": {"pos": "NUM", "Definite": "def", "NumType": "card"},
 | 
				
			||||||
 | 
					    "NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "NUM__NumType=Card": {"pos": "NUM", "NumType": "card"},
 | 
				
			||||||
 | 
					    "NUM__NumType=Ord": {"pos": "NUM", "NumType": "ord"},
 | 
				
			||||||
 | 
					    "NUM___": {"pos": "NUM"},
 | 
				
			||||||
 | 
					    "PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"Form": "ecl", "PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"Mood": "int", "PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Ad": {"pos": "PART", "Other": {"PartType": "ad"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "cmpl"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "cmpl"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Cmpl": {"pos": "PART", "Other": {"PartType": "cmpl"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Comp": {"pos": "PART", "Other": {"PartType": "comp"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "cop"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Deg": {"pos": "PART", "Other": {"PartType": "deg"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Inf": {"pos": "PART", "PartType": "inf"},
 | 
				
			||||||
 | 
					    "PART__PartType=Num": {"pos": "PART", "Other": {"PartType": "num"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Pat": {"pos": "PART", "Other": {"PartType": "pat"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb": {"pos": "PART", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "Other": {"PartType": "vb"}},
 | 
				
			||||||
 | 
					    "PART__PartType=Voc": {"pos": "PART", "Other": {"PartType": "voc"}},
 | 
				
			||||||
 | 
					    "PART___": {"pos": "PART"},
 | 
				
			||||||
 | 
					    "PART__PronType=Rel": {"pos": "PART", "PronType": "rel"},
 | 
				
			||||||
 | 
					    "PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PRON__Gender=Fem|Number=Sing|Person=3": {"pos": "PRON", "Gender": "fem", "Number": "sing", "Person": 3},
 | 
				
			||||||
 | 
					    "PRON__Gender=Masc|Number=Sing|Person=3": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3},
 | 
				
			||||||
 | 
					    "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "PRON__Gender=Masc|Person=3": {"pos": "PRON", "Gender": "masc", "Person": 3},
 | 
				
			||||||
 | 
					    "PRON__Number=Plur|Person=1": {"pos": "PRON", "Number": "plur", "Person": 1},
 | 
				
			||||||
 | 
					    "PRON__Number=Plur|Person=1|PronType=Emp": {"pos": "PRON", "Number": "plur", "Person": 1, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "PRON__Number=Plur|Person=2": {"pos": "PRON", "Number": "plur", "Person": 2},
 | 
				
			||||||
 | 
					    "PRON__Number=Plur|Person=3": {"pos": "PRON", "Number": "plur", "Person": 3},
 | 
				
			||||||
 | 
					    "PRON__Number=Plur|Person=3|PronType=Emp": {"pos": "PRON", "Number": "plur", "Person": 3, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "PRON__Number=Sing|Person=1": {"pos": "PRON", "Number": "sing", "Person": 1},
 | 
				
			||||||
 | 
					    "PRON__Number=Sing|Person=1|PronType=Emp": {"pos": "PRON", "Number": "sing", "Person": 1, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "PRON__Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2},
 | 
				
			||||||
 | 
					    "PRON__Number=Sing|Person=2|PronType=Emp": {"pos": "PRON", "Number": "sing", "Person": 2, "PronType": "emp"},
 | 
				
			||||||
 | 
					    "PRON__Number=Sing|Person=3": {"pos": "PRON", "Number": "sing", "Person": 3},
 | 
				
			||||||
 | 
					    "PRON__Number=Sing|PronType=Int": {"pos": "PRON", "Number": "sing", "PronType": "int"},
 | 
				
			||||||
 | 
					    "PRON__PronType=Dem": {"pos": "PRON", "PronType": "dem"},
 | 
				
			||||||
 | 
					    "PRON__PronType=Ind": {"pos": "PRON", "PronType": "ind"},
 | 
				
			||||||
 | 
					    "PRON__PronType=Int": {"pos": "PRON", "PronType": "int"},
 | 
				
			||||||
 | 
					    "PRON__Reflex=Yes": {"pos": "PRON", "Reflex": "yes"},
 | 
				
			||||||
 | 
					    "PROPN__Abbr=Yes": {"pos": "PROPN", "Other": {"Abbr": "yes"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "dat", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem"},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=Gen|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc"},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Case=NomAcc|Gender=Masc": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc"},
 | 
				
			||||||
 | 
					    "PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "PROPN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "voc", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN__Gender=Masc|Number=Sing": {"pos": "PROPN", "Gender": "masc", "Number": "sing"},
 | 
				
			||||||
 | 
					    "PROPN___": {"pos": "PROPN"},
 | 
				
			||||||
 | 
					    "PUNCT___": {"pos": "PUNCT"},
 | 
				
			||||||
 | 
					    "SCONJ___": {"pos": "SCONJ"},
 | 
				
			||||||
 | 
					    "SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "SCONJ__VerbForm=Cop": {"pos": "SCONJ", "Other": {"VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "SYM__Abbr=Yes": {"pos": "SYM", "Other": {"Abbr": "yes"}},
 | 
				
			||||||
 | 
					    "VERB__Case=NomAcc|Gender=Masc|Mood=Ind|Number=Sing|Tense=Pres": {"pos": "VERB", "Case": "nom|acc", "Gender": "masc", "Mood": "ind", "Number": "sing", "Tense": "pres"},
 | 
				
			||||||
 | 
					    "VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Foreign=Yes": {"pos": "VERB", "Foreign": "yes"},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Ecl": {"pos": "VERB", "Other": {"Form": "ecl"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Other": {"Form": "emp"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Form=Len": {"pos": "VERB", "Other": {"Form": "len"}},
 | 
				
			||||||
 | 
					    "VERB__Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3},
 | 
				
			||||||
 | 
					    "VERB__Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1},
 | 
				
			||||||
 | 
					    "VERB__Mood=Cnd": {"pos": "VERB", "Mood": "cnd"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Mood=Imp|Number=Plur|Person=1|Polarity=Neg": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1, "Polarity": "neg"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Imp|Number=Plur|Person=1": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1},
 | 
				
			||||||
 | 
					    "VERB__Mood=Imp|Number=Plur|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 2},
 | 
				
			||||||
 | 
					    "VERB__Mood=Imp|Number=Sing|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 2},
 | 
				
			||||||
 | 
					    "VERB__Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|PronType=Rel|Tense=Fut": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "fut"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "pres"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres"},
 | 
				
			||||||
 | 
					    "VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Voice": "auto"}},
 | 
				
			||||||
 | 
					    "VERB___": {"pos": "VERB"},
 | 
				
			||||||
 | 
					    "X__Abbr=Yes": {"pos": "X", "Other": {"Abbr": "yes"}},
 | 
				
			||||||
 | 
					    "X__Case=NomAcc|Foreign=Yes|Gender=Fem|Number=Sing": {"pos": "X", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Foreign": "yes"},
 | 
				
			||||||
 | 
					    "X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Other": {"Dialect": "ulster"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Other": {"Dialect": "munster"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Voice": "auto"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Munster": {"pos": "X", "Other": {"Dialect": "munster"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Other": {"Dialect": "munster"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"Dialect": "ulster"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Other": {"Dialect": "ulster", "PartType": "vb"}},
 | 
				
			||||||
 | 
					    "X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Other": {"Dialect": "ulster", "VerbForm": "cop"}},
 | 
				
			||||||
 | 
					    "X__Foreign=Yes": {"pos": "X", "Foreign": "yes"},
 | 
				
			||||||
 | 
					    "X___": {"pos": "X"}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										86
									
								
								spacy/lang/ga/tokenizer_exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								spacy/lang/ga/tokenizer_exceptions.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,86 @@
 | 
				
			||||||
 | 
					# encoding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ...symbols import POS, DET, ADP, CCONJ, ADV, NOUN, X, AUX
 | 
				
			||||||
 | 
					from ...symbols import ORTH, LEMMA, NORM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_exc = {
 | 
				
			||||||
 | 
					    "'acha'n": [
 | 
				
			||||||
 | 
					        {ORTH: "'ach", LEMMA: "gach", NORM: "gach", POS: DET},
 | 
				
			||||||
 | 
					        {ORTH: "a'n", LEMMA: "aon", NORM: "aon", POS: DET}],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    "dem'": [
 | 
				
			||||||
 | 
					        {ORTH: "de", LEMMA: "de", NORM: "de", POS: ADP},
 | 
				
			||||||
 | 
					        {ORTH: "m'", LEMMA: "mo", NORM: "mo", POS: DET}],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    "ded'": [
 | 
				
			||||||
 | 
					        {ORTH: "de", LEMMA: "de", NORM: "de", POS: ADP},
 | 
				
			||||||
 | 
					        {ORTH: "d'", LEMMA: "do", NORM: "do", POS: DET}],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    "lem'": [
 | 
				
			||||||
 | 
					        {ORTH: "le", LEMMA: "le", NORM: "le", POS: ADP},
 | 
				
			||||||
 | 
					        {ORTH: "m'", LEMMA: "mo", NORM: "mo", POS: DET}],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    "led'": [
 | 
				
			||||||
 | 
					        {ORTH: "le", LEMMA: "le", NORM: "le", POS: ADP},
 | 
				
			||||||
 | 
					        {ORTH: "d'", LEMMA: "mo", NORM: "do", POS: DET}]
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for exc_data in [
 | 
				
			||||||
 | 
					    {ORTH: "'gus", LEMMA: "agus", NORM: "agus", POS: CCONJ},
 | 
				
			||||||
 | 
					    {ORTH: "'ach", LEMMA: "gach", NORM: "gach", POS: DET},
 | 
				
			||||||
 | 
					    {ORTH: "ao'", LEMMA: "aon", NORM: "aon"},
 | 
				
			||||||
 | 
					    {ORTH: "'niar", LEMMA: "aniar", NORM: "aniar", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "'níos", LEMMA: "aníos", NORM: "aníos", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "'ndiu", LEMMA: "inniu", NORM: "inniu", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "'nocht", LEMMA: "anocht", NORM: "anocht", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "m'", LEMMA: "mo", POS: DET},
 | 
				
			||||||
 | 
					    {ORTH: "Aib.", LEMMA: "Aibreán", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Ath.", LEMMA: "athair", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Beal.", LEMMA: "Bealtaine", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "a.C.n.", LEMMA: "ante Christum natum", POS: X},
 | 
				
			||||||
 | 
					    {ORTH: "m.sh.", LEMMA: "mar shampla", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "M.F.", LEMMA: "Meán Fómhair", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "M.Fómh.", LEMMA: "Meán Fómhair", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "D.F.", LEMMA: "Deireadh Fómhair", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "D.Fómh.", LEMMA: "Deireadh Fómhair", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "r.C.", LEMMA: "roimh Chríost", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "R.C.", LEMMA: "roimh Chríost", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "r.Ch.", LEMMA: "roimh Chríost", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "r.Chr.", LEMMA: "roimh Chríost", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "R.Ch.", LEMMA: "roimh Chríost", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "R.Chr.", LEMMA: "roimh Chríost", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "⁊rl.", LEMMA: "agus araile", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "srl.", LEMMA: "agus araile", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "Co.", LEMMA: "contae", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Ean.", LEMMA: "Eanáir", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Feab.", LEMMA: "Feabhra", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "gCo.", LEMMA: "contae", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: ".i.", LEMMA: "eadhon", POS: ADV},
 | 
				
			||||||
 | 
					    {ORTH: "B'", LEMMA: "ba", POS: AUX},
 | 
				
			||||||
 | 
					    {ORTH: "b'", LEMMA: "ba", POS: AUX},
 | 
				
			||||||
 | 
					    {ORTH: "lch.", LEMMA: "leathanach", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Lch.", LEMMA: "leathanach", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "lgh.", LEMMA: "leathanach", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Lgh.", LEMMA: "leathanach", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Lún.", LEMMA: "Lúnasa", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Már.", LEMMA: "Márta", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Meith.", LEMMA: "Meitheamh", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Noll.", LEMMA: "Nollaig", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Samh.", LEMMA: "Samhain", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "tAth.", LEMMA: "athair", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "tUas.", LEMMA: "Uasal", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "teo.", LEMMA: "teoranta", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Teo.", LEMMA: "teoranta", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Uas.", LEMMA: "Uasal", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "uimh.", LEMMA: "uimhir", POS: NOUN},
 | 
				
			||||||
 | 
					    {ORTH: "Uimh.", LEMMA: "uimhir", POS: NOUN}]:
 | 
				
			||||||
 | 
					    _exc[exc_data[ORTH]] = [exc_data]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for orth in [
 | 
				
			||||||
 | 
					    "d'", "D'"]:
 | 
				
			||||||
 | 
					    _exc[orth] = [{ORTH: orth}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TOKENIZER_EXCEPTIONS = _exc
 | 
				
			||||||
| 
						 | 
					@ -14,9 +14,8 @@ from .. import util
 | 
				
			||||||
# These languages are used for generic tokenizer tests – only add a language
 | 
					# These languages are used for generic tokenizer tests – only add a language
 | 
				
			||||||
# here if it's using spaCy's tokenizer (not a different library)
 | 
					# here if it's using spaCy's tokenizer (not a different library)
 | 
				
			||||||
# TODO: re-implement generic tokenizer tests
 | 
					# TODO: re-implement generic tokenizer tests
 | 
				
			||||||
_languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'id',
 | 
					_languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'ga', 'he', 'hu', 'id',
 | 
				
			||||||
              'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'xx']
 | 
					              'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'xx']
 | 
				
			||||||
 | 
					 | 
				
			||||||
_models = {'en': ['en_core_web_sm'],
 | 
					_models = {'en': ['en_core_web_sm'],
 | 
				
			||||||
           'de': ['de_core_news_md'],
 | 
					           'de': ['de_core_news_md'],
 | 
				
			||||||
           'fr': ['fr_depvec_web_lg'],
 | 
					           'fr': ['fr_depvec_web_lg'],
 | 
				
			||||||
| 
						 | 
					@ -108,6 +107,11 @@ def bn_tokenizer():
 | 
				
			||||||
    return util.get_lang_class('bn').Defaults.create_tokenizer()
 | 
					    return util.get_lang_class('bn').Defaults.create_tokenizer()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.fixture
 | 
				
			||||||
 | 
					def ga_tokenizer():
 | 
				
			||||||
 | 
					    return util.get_lang_class('ga').Defaults.create_tokenizer()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.fixture
 | 
					@pytest.fixture
 | 
				
			||||||
def he_tokenizer():
 | 
					def he_tokenizer():
 | 
				
			||||||
    return util.get_lang_class('he').Defaults.create_tokenizer()
 | 
					    return util.get_lang_class('he').Defaults.create_tokenizer()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										0
									
								
								spacy/tests/lang/ga/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								spacy/tests/lang/ga/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										17
									
								
								spacy/tests/lang/ga/test_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								spacy/tests/lang/ga/test_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,17 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import pytest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GA_TOKEN_EXCEPTION_TESTS = [
 | 
				
			||||||
 | 
					    ('Niall Ó Domhnaill, Rialtas na hÉireann 1977 (lch. 600).', ['Niall', 'Ó', 'Domhnaill', ',', 'Rialtas', 'na', 'hÉireann', '1977', '(', 'lch.', '600', ')', '.']),
 | 
				
			||||||
 | 
					    ('Daoine a bhfuil Gaeilge acu, m.sh. tusa agus mise', ['Daoine', 'a', 'bhfuil', 'Gaeilge', 'acu', ',', 'm.sh.', 'tusa', 'agus', 'mise'])
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.parametrize('text,expected_tokens', GA_TOKEN_EXCEPTION_TESTS)
 | 
				
			||||||
 | 
					def test_tokenizer_handles_exception_cases(ga_tokenizer, text, expected_tokens):
 | 
				
			||||||
 | 
					    tokens = ga_tokenizer(text)
 | 
				
			||||||
 | 
					    token_list = [token.text for token in tokens if not token.is_space]
 | 
				
			||||||
 | 
					    assert expected_tokens == token_list
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user