mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'master' into develop
This commit is contained in:
		
						commit
						0184a95340
					
				
							
								
								
									
										87
									
								
								.github/contributors/akki2825.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								.github/contributors/akki2825.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,87 @@
 | 
				
			||||||
 | 
					## Contributor Agreement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. The term "contribution" or "contributed materials" means any source code,
 | 
				
			||||||
 | 
					object code, patch, tool, sample, graphic, specification, manual,
 | 
				
			||||||
 | 
					documentation, or any other material posted or submitted by you to the project.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. With respect to any worldwide copyrights, or copyright applications and
 | 
				
			||||||
 | 
					registrations, in your contribution:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you hereby assign to us joint ownership, and to the extent that such
 | 
				
			||||||
 | 
					    assignment is or becomes invalid, ineffective or unenforceable, you hereby
 | 
				
			||||||
 | 
					    grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
 | 
				
			||||||
 | 
					    royalty-free, unrestricted license to exercise all rights under those
 | 
				
			||||||
 | 
					    copyrights. This includes, at our option, the right to sublicense these same
 | 
				
			||||||
 | 
					    rights to third parties through multiple levels of sublicensees or other
 | 
				
			||||||
 | 
					    licensing arrangements;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that each of us can do all things in relation to your
 | 
				
			||||||
 | 
					    contribution as if each of us were the sole owners, and if one of us makes
 | 
				
			||||||
 | 
					    a derivative work of your contribution, the one who makes the derivative
 | 
				
			||||||
 | 
					    work (or has it made will be the sole owner of that derivative work;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that you will not assert any moral rights in your contribution
 | 
				
			||||||
 | 
					    against us, our licensees or transferees;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that we may register a copyright in your contribution and
 | 
				
			||||||
 | 
					    exercise all ownership rights associated with it; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * you agree that neither of us has any duty to consult with, obtain the
 | 
				
			||||||
 | 
					    consent of, pay or render an accounting to the other for any use or
 | 
				
			||||||
 | 
					    distribution of your contribution.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. With respect to any patents you own, or that you can license without payment
 | 
				
			||||||
 | 
					to any third party, you hereby grant to us a perpetual, irrevocable,
 | 
				
			||||||
 | 
					non-exclusive, worldwide, no-charge, royalty-free license to:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * make, have made, use, sell, offer to sell, import, and otherwise transfer
 | 
				
			||||||
 | 
					    your contribution in whole or in part, alone or in combination with or
 | 
				
			||||||
 | 
					    included in any product, work or materials arising out of the project to
 | 
				
			||||||
 | 
					    which your contribution was submitted, and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * at our option, to sublicense these same rights to third parties through
 | 
				
			||||||
 | 
					    multiple levels of sublicensees or other licensing arrangements.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					4. Except as set out above, you keep all right, title, and interest in your
 | 
				
			||||||
 | 
					contribution. The rights that you grant to us under these terms are effective
 | 
				
			||||||
 | 
					on the date you first submitted a contribution to us, even if your submission
 | 
				
			||||||
 | 
					took place before the date you sign these terms.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					5. You covenant, represent, warrant and agree that:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * Each contribution that you submit is and shall be an original work of
 | 
				
			||||||
 | 
					    authorship and you can legally grant the rights set out in this SCA;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * to the best of your knowledge, each contribution will not violate any
 | 
				
			||||||
 | 
					    third party's copyrights, trademarks, patents, or other intellectual
 | 
				
			||||||
 | 
					    property rights; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * each contribution shall be in compliance with U.S. export control laws and
 | 
				
			||||||
 | 
					    other applicable export and import laws. You agree to notify us if you
 | 
				
			||||||
 | 
					    become aware of any circumstance which would make any of the foregoing
 | 
				
			||||||
 | 
					    representations inaccurate in any respect. We may publicly disclose your
 | 
				
			||||||
 | 
					    participation in the project, including the fact that you have signed the SCA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					6. This SCA is governed by the laws of the State of California and applicable
 | 
				
			||||||
 | 
					U.S. Federal law. Any choice of law rules will not apply.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					7. Please place an “x” on one of the applicable statement below. Please do NOT
 | 
				
			||||||
 | 
					mark both statements:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * [x] I am signing on behalf of myself as an individual and no other person
 | 
				
			||||||
 | 
					    or entity, including my employer, has or will have rights with respect to my
 | 
				
			||||||
 | 
					    contributions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    * [ ] I am signing on behalf of my employer or a legal entity and I have the
 | 
				
			||||||
 | 
					    actual authority to contractually bind that entity.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Contributor Details
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					| Field                          | Entry                |
 | 
				
			||||||
 | 
					|------------------------------- | -------------------- |
 | 
				
			||||||
 | 
					| Name                           | Akhilesh K R         |
 | 
				
			||||||
 | 
					| Company name (if applicable)   |                      |
 | 
				
			||||||
 | 
					| Title or role (if applicable)  |                      |
 | 
				
			||||||
 | 
					| Date                           | 2019-02-12           |
 | 
				
			||||||
 | 
					| GitHub username                | akki2825             |
 | 
				
			||||||
 | 
					| Website (optional)             |                      |
 | 
				
			||||||
							
								
								
									
										21
									
								
								spacy/lang/kn/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								spacy/lang/kn/__init__.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,21 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .stop_words import STOP_WORDS
 | 
				
			||||||
 | 
					from .lex_attrs import LEX_ATTRS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ..norm_exceptions import BASE_NORMS
 | 
				
			||||||
 | 
					from ...language import Language
 | 
				
			||||||
 | 
					from ...attrs import LANG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class KannadaDefaults(Language.Defaults):
 | 
				
			||||||
 | 
					    stop_words = STOP_WORDS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Kannada(Language):
 | 
				
			||||||
 | 
					    lang = 'kn'
 | 
				
			||||||
 | 
					    Defaults = KannadaDefaults
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__all__ = ['Kannada']
 | 
				
			||||||
							
								
								
									
										69
									
								
								spacy/lang/kn/stop_words.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								spacy/lang/kn/stop_words.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,69 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Stop words
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					STOP_WORD = set("""
 | 
				
			||||||
 | 
					ಈ
 | 
				
			||||||
 | 
					ಮತ್ತು
 | 
				
			||||||
 | 
					ಹಾಗೂ  
 | 
				
			||||||
 | 
					ಅವರು  
 | 
				
			||||||
 | 
					ಅವರ  
 | 
				
			||||||
 | 
					ಬಗ್ಗೆ  
 | 
				
			||||||
 | 
					ಎಂಬ
 | 
				
			||||||
 | 
					ಆದರೆ
 | 
				
			||||||
 | 
					ಅವರನ್ನು
 | 
				
			||||||
 | 
					ಆದರೆ
 | 
				
			||||||
 | 
					ತಮ್ಮ 
 | 
				
			||||||
 | 
					ಒಂದು
 | 
				
			||||||
 | 
					ಎಂದರು
 | 
				
			||||||
 | 
					ಮೇಲೆ
 | 
				
			||||||
 | 
					ಹೇಳಿದರು
 | 
				
			||||||
 | 
					ಸೇರಿದಂತೆ
 | 
				
			||||||
 | 
					ಬಳಿಕ
 | 
				
			||||||
 | 
					ಆ
 | 
				
			||||||
 | 
					ಯಾವುದೇ
 | 
				
			||||||
 | 
					ಅವರಿಗೆ
 | 
				
			||||||
 | 
					ನಡೆದ
 | 
				
			||||||
 | 
					ಕುರಿತು
 | 
				
			||||||
 | 
					ಇದು
 | 
				
			||||||
 | 
					ಅವರು
 | 
				
			||||||
 | 
					ಕಳೆದ
 | 
				
			||||||
 | 
					ಇದೇ
 | 
				
			||||||
 | 
					ತಿಳಿಸಿದರು
 | 
				
			||||||
 | 
					ಹೀಗಾಗಿ
 | 
				
			||||||
 | 
					ಕೂಡ
 | 
				
			||||||
 | 
					ತನ್ನ
 | 
				
			||||||
 | 
					ತಿಳಿಸಿದ್ದಾರೆ
 | 
				
			||||||
 | 
					ನಾನು
 | 
				
			||||||
 | 
					ಹೇಳಿದ್ದಾರೆ
 | 
				
			||||||
 | 
					ಈಗ
 | 
				
			||||||
 | 
					ಎಲ್ಲ
 | 
				
			||||||
 | 
					ನನ್ನ
 | 
				
			||||||
 | 
					ನಮ್ಮ
 | 
				
			||||||
 | 
					ಈಗಾಗಲೇ
 | 
				
			||||||
 | 
					ಇದಕ್ಕೆ
 | 
				
			||||||
 | 
					ಹಲವು
 | 
				
			||||||
 | 
					ಇದೆ
 | 
				
			||||||
 | 
					ಮತ್ತೆ
 | 
				
			||||||
 | 
					ಮಾಡುವ
 | 
				
			||||||
 | 
					ನೀಡಿದರು
 | 
				
			||||||
 | 
					ನಾವು
 | 
				
			||||||
 | 
					ನೀಡಿದ
 | 
				
			||||||
 | 
					ಇದರಿಂದ
 | 
				
			||||||
 | 
					ಅದು
 | 
				
			||||||
 | 
					ಇದನ್ನು
 | 
				
			||||||
 | 
					ನೀಡಿದ್ದಾರೆ
 | 
				
			||||||
 | 
					ಅದನ್ನು
 | 
				
			||||||
 | 
					ಇಲ್ಲಿ
 | 
				
			||||||
 | 
					ಆಗ
 | 
				
			||||||
 | 
					ಬಂದಿದೆ.
 | 
				
			||||||
 | 
					ಅದೇ
 | 
				
			||||||
 | 
					ಇರುವ
 | 
				
			||||||
 | 
					ಅಲ್ಲದೆ
 | 
				
			||||||
 | 
					ಕೆಲವು
 | 
				
			||||||
 | 
					ನೀಡಿದೆ
 | 
				
			||||||
 | 
					ಇದರ
 | 
				
			||||||
 | 
					ಇನ್ನು
 | 
				
			||||||
 | 
					ನಡೆದಿದೆ
 | 
				
			||||||
 | 
					""".split())
 | 
				
			||||||
| 
						 | 
					@ -1108,6 +1108,35 @@
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            "github": "BramVanroy/spacy_conll",
 | 
					            "github": "BramVanroy/spacy_conll",
 | 
				
			||||||
            "category": ["standalone"]
 | 
					            "category": ["standalone"]
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "id": "spacy-langdetect",
 | 
				
			||||||
 | 
					            "title": "spacy-langdetect" ,
 | 
				
			||||||
 | 
					            "slogan": "A fully customizable language detection pipeline for spaCy",
 | 
				
			||||||
 | 
					            "description": "This module allows you to add language detection capabilites to your spaCy pipeline. Also supports custom language detectors!",
 | 
				
			||||||
 | 
					            "pip": "spacy-langdetect",
 | 
				
			||||||
 | 
					            "code_example": [
 | 
				
			||||||
 | 
					                "import spacy",
 | 
				
			||||||
 | 
					                "from spacy_langdetect import LanguageDetector",
 | 
				
			||||||
 | 
					                "nlp = spacy.load('en')",
 | 
				
			||||||
 | 
					                "nlp.add_pipe(LanguageDetector(), name='language_detector', last=True)",
 | 
				
			||||||
 | 
					                "text = 'This is an english text.'",
 | 
				
			||||||
 | 
					                "doc = nlp(text)",
 | 
				
			||||||
 | 
					                "# document level language detection. Think of it like average language of the document!",
 | 
				
			||||||
 | 
					                "print(doc._.language)",
 | 
				
			||||||
 | 
					                "# sentence level language detection",
 | 
				
			||||||
 | 
					                "for sent in doc.sents:",
 | 
				
			||||||
 | 
					                "   print(sent, sent._.language)"
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "code_language": "python",
 | 
				
			||||||
 | 
					            "author": "Abhijit Balaji",
 | 
				
			||||||
 | 
					            "author_links": {
 | 
				
			||||||
 | 
					                "github": "Abhijit-2592",
 | 
				
			||||||
 | 
					                "website": "https://abhijit-2592.github.io/"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "github": "Abhijit-2592/spacy-langdetect",
 | 
				
			||||||
 | 
					            "category": ["pipeline"],
 | 
				
			||||||
 | 
					            "tags": ["language-detection"]
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
    "projectCats": {
 | 
					    "projectCats": {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user