mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						e808bdd076
					
				
							
								
								
									
										106
									
								
								.github/contributors/jarib.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								.github/contributors/jarib.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,106 @@
 | 
				
			||||||
 | 
					# spaCy contributor agreement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This spaCy Contributor Agreement (**"SCA"**) is based on the
 | 
				
			||||||
 | 
					[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
 | 
				
			||||||
 | 
					The SCA applies to any contribution that you make to any product or project
 | 
				
			||||||
 | 
					managed by us (the **"project"**), and sets out the intellectual property rights
 | 
				
			||||||
 | 
					you grant to us in the contributed materials. The term **"us"** shall mean
 | 
				
			||||||
 | 
					[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term
 | 
				
			||||||
 | 
					**"you"** shall mean the person or entity identified below.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you agree to be bound by these terms, fill in the information requested
 | 
				
			||||||
 | 
					below and include the filled-in version with your first pull request, under the
 | 
				
			||||||
 | 
					folder [`.github/contributors/`](/.github/contributors/). The name of the file
 | 
				
			||||||
 | 
					should be your GitHub username, with the extension `.md`. For example, the user
 | 
				
			||||||
 | 
					example_user would create the file `.github/contributors/example_user.md`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Read this agreement carefully before signing. These terms and conditions
 | 
				
			||||||
 | 
					constitute a binding legal agreement.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Contributor Agreement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. The term "contribution" or "contributed materials" means any source code,
 | 
				
			||||||
 | 
					object code, patch, tool, sample, graphic, specification, manual,
 | 
				
			||||||
 | 
					documentation, or any other material posted or submitted by you to the project.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. With respect to any worldwide copyrights, or copyright applications and
 | 
				
			||||||
 | 
					registrations, in your contribution:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you hereby assign to us joint ownership, and to the extent that such
 | 
				
			||||||
 | 
					    assignment is or becomes invalid, ineffective or unenforceable, you hereby
 | 
				
			||||||
 | 
					    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
 | 
				
			||||||
 | 
					    royalty-free, unrestricted license to exercise all rights under those
 | 
				
			||||||
 | 
					    copyrights. This includes, at our option, the right to sublicense these same
 | 
				
			||||||
 | 
					    rights to third parties through multiple levels of sublicensees or other
 | 
				
			||||||
 | 
					    licensing arrangements;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that each of us can do all things in relation to your
 | 
				
			||||||
 | 
					    contribution as if each of us were the sole owners, and if one of us makes
 | 
				
			||||||
 | 
					    a derivative work of your contribution, the one who makes the derivative
 | 
				
			||||||
 | 
					    work (or has it made will be the sole owner of that derivative work;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that you will not assert any moral rights in your contribution
 | 
				
			||||||
 | 
					    against us, our licensees or transferees;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that we may register a copyright in your contribution and
 | 
				
			||||||
 | 
					    exercise all ownership rights associated with it; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that neither of us has any duty to consult with, obtain the
 | 
				
			||||||
 | 
					    consent of, pay or render an accounting to the other for any use or
 | 
				
			||||||
 | 
					    distribution of your contribution.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. With respect to any patents you own, or that you can license without payment
 | 
				
			||||||
 | 
					to any third party, you hereby grant to us a perpetual, irrevocable,
 | 
				
			||||||
 | 
					non-exclusive, worldwide, no-charge, royalty-free license to:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * make, have made, use, sell, offer to sell, import, and otherwise transfer
 | 
				
			||||||
 | 
					    your contribution in whole or in part, alone or in combination with or
 | 
				
			||||||
 | 
					    included in any product, work or materials arising out of the project to
 | 
				
			||||||
 | 
					    which your contribution was submitted, and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * at our option, to sublicense these same rights to third parties through
 | 
				
			||||||
 | 
					    multiple levels of sublicensees or other licensing arrangements.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					4. Except as set out above, you keep all right, title, and interest in your
 | 
				
			||||||
 | 
					contribution. The rights that you grant to us under these terms are effective
 | 
				
			||||||
 | 
					on the date you first submitted a contribution to us, even if your submission
 | 
				
			||||||
 | 
					took place before the date you sign these terms.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					5. You covenant, represent, warrant and agree that:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * Each contribution that you submit is and shall be an original work of
 | 
				
			||||||
 | 
					    authorship and you can legally grant the rights set out in this SCA;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * to the best of your knowledge, each contribution will not violate any
 | 
				
			||||||
 | 
					    third party's copyrights, trademarks, patents, or other intellectual
 | 
				
			||||||
 | 
					    property rights; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * each contribution shall be in compliance with U.S. export control laws and
 | 
				
			||||||
 | 
					    other applicable export and import laws. You agree to notify us if you
 | 
				
			||||||
 | 
					    become aware of any circumstance which would make any of the foregoing
 | 
				
			||||||
 | 
					    representations inaccurate in any respect. We may publicly disclose your
 | 
				
			||||||
 | 
					    participation in the project, including the fact that you have signed the SCA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					6. This SCA is governed by the laws of the State of California and applicable
 | 
				
			||||||
 | 
					U.S. Federal law. Any choice of law rules will not apply.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					7. Please place an “x” on one of the applicable statement below. Please do NOT
 | 
				
			||||||
 | 
					mark both statements:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * [x] I am signing on behalf of myself as an individual and no other person
 | 
				
			||||||
 | 
					    or entity, including my employer, has or will have rights with respect to my
 | 
				
			||||||
 | 
					    contributions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * [ ] I am signing on behalf of my employer or a legal entity and I have the
 | 
				
			||||||
 | 
					    actual authority to contractually bind that entity.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Contributor Details
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					| Field                          | Entry                |
 | 
				
			||||||
 | 
					|------------------------------- | -------------------- |
 | 
				
			||||||
 | 
					| Name                           | Jari Bakken          |
 | 
				
			||||||
 | 
					| Company name (if applicable)   |                      |
 | 
				
			||||||
 | 
					| Title or role (if applicable)  |                      |
 | 
				
			||||||
 | 
					| Date                           | 2018-12-21           |
 | 
				
			||||||
 | 
					| GitHub username                | jarib                |
 | 
				
			||||||
 | 
					| Website (optional)             |                      |
 | 
				
			||||||
| 
						 | 
					@ -69,7 +69,7 @@ def convert(
 | 
				
			||||||
    # Use converter function to convert data
 | 
					    # Use converter function to convert data
 | 
				
			||||||
    func = CONVERTERS[converter]
 | 
					    func = CONVERTERS[converter]
 | 
				
			||||||
    input_data = input_path.open("r", encoding="utf-8").read()
 | 
					    input_data = input_path.open("r", encoding="utf-8").read()
 | 
				
			||||||
    data = func(input_data, nsents=n_sents, use_morphology=morphology, lang=lang)
 | 
					    data = func(input_data, n_sents=n_sents, use_morphology=morphology, lang=lang)
 | 
				
			||||||
    if output_dir != "-":
 | 
					    if output_dir != "-":
 | 
				
			||||||
        # Export data to a file
 | 
					        # Export data to a file
 | 
				
			||||||
        suffix = ".{}".format(file_type)
 | 
					        suffix = ".{}".format(file_type)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,9 +49,9 @@ def evaluate(
 | 
				
			||||||
    end = timer()
 | 
					    end = timer()
 | 
				
			||||||
    nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
 | 
					    nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
 | 
				
			||||||
    results = {
 | 
					    results = {
 | 
				
			||||||
        "Time": "%.2f s" % end - begin,
 | 
					        "Time": "%.2f s" % (end - begin),
 | 
				
			||||||
        "Words": nwords,
 | 
					        "Words": nwords,
 | 
				
			||||||
        "Words/s": "%.0f" % nwords / (end - begin),
 | 
					        "Words/s": "%.0f" % (nwords / (end - begin)),
 | 
				
			||||||
        "TOK": "%.2f" % scorer.token_acc,
 | 
					        "TOK": "%.2f" % scorer.token_acc,
 | 
				
			||||||
        "POS": "%.2f" % scorer.tags_acc,
 | 
					        "POS": "%.2f" % scorer.tags_acc,
 | 
				
			||||||
        "UAS": "%.2f" % scorer.uas,
 | 
					        "UAS": "%.2f" % scorer.uas,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -102,6 +102,7 @@ def generate_meta(model_path, existing_meta, msg):
 | 
				
			||||||
        "width": nlp.vocab.vectors_length,
 | 
					        "width": nlp.vocab.vectors_length,
 | 
				
			||||||
        "vectors": len(nlp.vocab.vectors),
 | 
					        "vectors": len(nlp.vocab.vectors),
 | 
				
			||||||
        "keys": nlp.vocab.vectors.n_keys,
 | 
					        "keys": nlp.vocab.vectors.n_keys,
 | 
				
			||||||
 | 
					        "name": nlp.vocab.vectors.name,
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    msg.divider("Generating meta.json")
 | 
					    msg.divider("Generating meta.json")
 | 
				
			||||||
    msg.text(
 | 
					    msg.text(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -279,6 +279,7 @@ def train(
 | 
				
			||||||
                    "width": nlp.vocab.vectors_length,
 | 
					                    "width": nlp.vocab.vectors_length,
 | 
				
			||||||
                    "vectors": len(nlp.vocab.vectors),
 | 
					                    "vectors": len(nlp.vocab.vectors),
 | 
				
			||||||
                    "keys": nlp.vocab.vectors.n_keys,
 | 
					                    "keys": nlp.vocab.vectors.n_keys,
 | 
				
			||||||
 | 
					                    "name": nlp.vocab.vectors.name
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                meta.setdefault("name", "model%d" % i)
 | 
					                meta.setdefault("name", "model%d" % i)
 | 
				
			||||||
                meta.setdefault("version", version)
 | 
					                meta.setdefault("version", version)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -76,6 +76,7 @@ TAG_MAP = {
 | 
				
			||||||
    "ADJ___": {"morph": "_", POS: ADJ},
 | 
					    "ADJ___": {"morph": "_", POS: ADJ},
 | 
				
			||||||
    "ADP___": {"morph": "_", POS: ADP},
 | 
					    "ADP___": {"morph": "_", POS: ADP},
 | 
				
			||||||
    "ADV___": {"morph": "_", POS: ADV},
 | 
					    "ADV___": {"morph": "_", POS: ADV},
 | 
				
			||||||
 | 
					    "ADV__Gender=Masc": {"morph": "Gender=Masc", POS: ADV},
 | 
				
			||||||
    "AUX__Mood=Imp|VerbForm=Fin": {"morph": "Mood=Imp|VerbForm=Fin", POS: AUX},
 | 
					    "AUX__Mood=Imp|VerbForm=Fin": {"morph": "Mood=Imp|VerbForm=Fin", POS: AUX},
 | 
				
			||||||
    "AUX__Mood=Ind|Tense=Past|VerbForm=Fin": {
 | 
					    "AUX__Mood=Ind|Tense=Past|VerbForm=Fin": {
 | 
				
			||||||
        "morph": "Mood=Ind|Tense=Past|VerbForm=Fin",
 | 
					        "morph": "Mood=Ind|Tense=Past|VerbForm=Fin",
 | 
				
			||||||
| 
						 | 
					@ -90,6 +91,7 @@ TAG_MAP = {
 | 
				
			||||||
        POS: AUX,
 | 
					        POS: AUX,
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
    "AUX__VerbForm=Inf": {"morph": "VerbForm=Inf", POS: AUX},
 | 
					    "AUX__VerbForm=Inf": {"morph": "VerbForm=Inf", POS: AUX},
 | 
				
			||||||
 | 
					    "AUX__VerbForm=Inf|Voice=Pass": {"morph": "VerbForm=Inf|Voice=Pass", POS: AUX},
 | 
				
			||||||
    "AUX__VerbForm=Part": {"morph": "VerbForm=Part", POS: AUX},
 | 
					    "AUX__VerbForm=Part": {"morph": "VerbForm=Part", POS: AUX},
 | 
				
			||||||
    "CONJ___": {"morph": "_", POS: CONJ},
 | 
					    "CONJ___": {"morph": "_", POS: CONJ},
 | 
				
			||||||
    "DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
 | 
					    "DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
 | 
				
			||||||
| 
						 | 
					@ -310,6 +312,7 @@ TAG_MAP = {
 | 
				
			||||||
        POS: NOUN,
 | 
					        POS: NOUN,
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
    "NOUN__Definite=Ind|Number=Plur": {"morph": "Definite=Ind|Number=Plur", POS: NOUN},
 | 
					    "NOUN__Definite=Ind|Number=Plur": {"morph": "Definite=Ind|Number=Plur", POS: NOUN},
 | 
				
			||||||
 | 
					    "NOUN__Definite=Ind|Number=Sing": {"morph": "Definite=Ind|Number=Sing", POS: NOUN},
 | 
				
			||||||
    "NOUN__Gender=Fem": {"morph": "Gender=Fem", POS: NOUN},
 | 
					    "NOUN__Gender=Fem": {"morph": "Gender=Fem", POS: NOUN},
 | 
				
			||||||
    "NOUN__Gender=Masc": {"morph": "Gender=Masc", POS: NOUN},
 | 
					    "NOUN__Gender=Masc": {"morph": "Gender=Masc", POS: NOUN},
 | 
				
			||||||
    "NOUN__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: NOUN},
 | 
					    "NOUN__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: NOUN},
 | 
				
			||||||
| 
						 | 
					@ -411,6 +414,10 @@ TAG_MAP = {
 | 
				
			||||||
        "morph": "Case=Nom|Number=Plur|Person=",
 | 
					        "morph": "Case=Nom|Number=Plur|Person=",
 | 
				
			||||||
        POS: PRON,
 | 
					        POS: PRON,
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
 | 
					    "PRON__Case=Gen|Number=Plur|Person=3|PronType=Prs": {
 | 
				
			||||||
 | 
					        "morph": "Case=Gen|Number=Plur|Person=3|PronType=Prs",
 | 
				
			||||||
 | 
					        POS: PRON,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
    "PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
 | 
					    "PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
 | 
				
			||||||
        "morph": "Gender=Fem",
 | 
					        "morph": "Gender=Fem",
 | 
				
			||||||
        POS: PRON,
 | 
					        POS: PRON,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user