mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-24 20:51:30 +03:00 
			
		
		
		
	Merge branch 'master' into fix-entity-recognizer-docs
This commit is contained in:
		
						commit
						99da6e1d79
					
				
							
								
								
									
										6
									
								
								.github/contributors/nikhilsaldanha.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/contributors/nikhilsaldanha.md
									
									
									
									
										vendored
									
									
								
							|  | @ -98,9 +98,9 @@ mark both statements: | |||
| 
 | ||||
| | Field                          | Entry                | | ||||
| |------------------------------- | -------------------- | | ||||
| | Name                           |   Nikhil Saldanha    | | ||||
| | Name                           | Nikhil Saldanha      | | ||||
| | Company name (if applicable)   |                      | | ||||
| | Title or role (if applicable)  |                      | | ||||
| | Date                           |     2020/03/28       | | ||||
| | GitHub username                |    nikhilsaldanha    | | ||||
| | Date                           | 2020-03-17           | | ||||
| | GitHub username                | nikhilsaldanha       | | ||||
| | Website (optional)             |                      | | ||||
|  |  | |||
							
								
								
									
										106
									
								
								.github/contributors/tommilligan.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								.github/contributors/tommilligan.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,106 @@ | |||
| # spaCy contributor agreement | ||||
| 
 | ||||
| This spaCy Contributor Agreement (**"SCA"**) is based on the | ||||
| [Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). | ||||
| The SCA applies to any contribution that you make to any product or project | ||||
| managed by us (the **"project"**), and sets out the intellectual property rights | ||||
| you grant to us in the contributed materials. The term **"us"** shall mean | ||||
| [ExplosionAI GmbH](https://explosion.ai/legal). The term | ||||
| **"you"** shall mean the person or entity identified below. | ||||
| 
 | ||||
| If you agree to be bound by these terms, fill in the information requested | ||||
| below and include the filled-in version with your first pull request, under the | ||||
| folder [`.github/contributors/`](/.github/contributors/). The name of the file | ||||
| should be your GitHub username, with the extension `.md`. For example, the user | ||||
| example_user would create the file `.github/contributors/example_user.md`. | ||||
| 
 | ||||
| Read this agreement carefully before signing. These terms and conditions | ||||
| constitute a binding legal agreement. | ||||
| 
 | ||||
| ## Contributor Agreement | ||||
| 
 | ||||
| 1.  The term "contribution" or "contributed materials" means any source code, | ||||
|     object code, patch, tool, sample, graphic, specification, manual, | ||||
|     documentation, or any other material posted or submitted by you to the project. | ||||
| 
 | ||||
| 2.  With respect to any worldwide copyrights, or copyright applications and | ||||
|     registrations, in your contribution: | ||||
| 
 | ||||
|         * you hereby assign to us joint ownership, and to the extent that such | ||||
|         assignment is or becomes invalid, ineffective or unenforceable, you hereby | ||||
|         grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, | ||||
|         royalty-free, unrestricted license to exercise all rights under those | ||||
|         copyrights. This includes, at our option, the right to sublicense these same | ||||
|         rights to third parties through multiple levels of sublicensees or other | ||||
|         licensing arrangements; | ||||
| 
 | ||||
|         * you agree that each of us can do all things in relation to your | ||||
|         contribution as if each of us were the sole owners, and if one of us makes | ||||
|         a derivative work of your contribution, the one who makes the derivative | ||||
|         work (or has it made will be the sole owner of that derivative work; | ||||
| 
 | ||||
|         * you agree that you will not assert any moral rights in your contribution | ||||
|         against us, our licensees or transferees; | ||||
| 
 | ||||
|         * you agree that we may register a copyright in your contribution and | ||||
|         exercise all ownership rights associated with it; and | ||||
| 
 | ||||
|         * you agree that neither of us has any duty to consult with, obtain the | ||||
|         consent of, pay or render an accounting to the other for any use or | ||||
|         distribution of your contribution. | ||||
| 
 | ||||
| 3.  With respect to any patents you own, or that you can license without payment | ||||
|     to any third party, you hereby grant to us a perpetual, irrevocable, | ||||
|     non-exclusive, worldwide, no-charge, royalty-free license to: | ||||
| 
 | ||||
|         * make, have made, use, sell, offer to sell, import, and otherwise transfer | ||||
|         your contribution in whole or in part, alone or in combination with or | ||||
|         included in any product, work or materials arising out of the project to | ||||
|         which your contribution was submitted, and | ||||
| 
 | ||||
|         * at our option, to sublicense these same rights to third parties through | ||||
|         multiple levels of sublicensees or other licensing arrangements. | ||||
| 
 | ||||
| 4.  Except as set out above, you keep all right, title, and interest in your | ||||
|     contribution. The rights that you grant to us under these terms are effective | ||||
|     on the date you first submitted a contribution to us, even if your submission | ||||
|     took place before the date you sign these terms. | ||||
| 
 | ||||
| 5.  You covenant, represent, warrant and agree that: | ||||
| 
 | ||||
|     - Each contribution that you submit is and shall be an original work of | ||||
|       authorship and you can legally grant the rights set out in this SCA; | ||||
| 
 | ||||
|     - to the best of your knowledge, each contribution will not violate any | ||||
|       third party's copyrights, trademarks, patents, or other intellectual | ||||
|       property rights; and | ||||
| 
 | ||||
|     - each contribution shall be in compliance with U.S. export control laws and | ||||
|       other applicable export and import laws. You agree to notify us if you | ||||
|       become aware of any circumstance which would make any of the foregoing | ||||
|       representations inaccurate in any respect. We may publicly disclose your | ||||
|       participation in the project, including the fact that you have signed the SCA. | ||||
| 
 | ||||
| 6.  This SCA is governed by the laws of the State of California and applicable | ||||
|     U.S. Federal law. Any choice of law rules will not apply. | ||||
| 
 | ||||
| 7.  Please place an “x” on one of the applicable statement below. Please do NOT | ||||
|     mark both statements: | ||||
| 
 | ||||
|         * [x] I am signing on behalf of myself as an individual and no other person | ||||
|         or entity, including my employer, has or will have rights with respect to my | ||||
|         contributions. | ||||
| 
 | ||||
|         * [ ] I am signing on behalf of my employer or a legal entity and I have the | ||||
|         actual authority to contractually bind that entity. | ||||
| 
 | ||||
| ## Contributor Details | ||||
| 
 | ||||
| | Field                         | Entry        | | ||||
| | ----------------------------- | ------------ | | ||||
| | Name                          | Tom Milligan | | ||||
| | Company name (if applicable)  |              | | ||||
| | Title or role (if applicable) |              | | ||||
| | Date                          | 2020-03-24   | | ||||
| | GitHub username               | tommilligan  | | ||||
| | Website (optional)            |              | | ||||
							
								
								
									
										12
									
								
								setup.cfg
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								setup.cfg
									
									
									
									
									
								
							|  | @ -61,17 +61,17 @@ install_requires = | |||
| lookups = | ||||
|     spacy_lookups_data>=0.0.5,<0.2.0 | ||||
| cuda = | ||||
|     cupy>=5.0.0b4 | ||||
|     cupy>=5.0.0b4,<9.0.0 | ||||
| cuda80 = | ||||
|     cupy-cuda80>=5.0.0b4 | ||||
|     cupy-cuda80>=5.0.0b4,<9.0.0 | ||||
| cuda90 = | ||||
|     cupy-cuda90>=5.0.0b4 | ||||
|     cupy-cuda90>=5.0.0b4,<9.0.0 | ||||
| cuda91 = | ||||
|     cupy-cuda91>=5.0.0b4 | ||||
|     cupy-cuda91>=5.0.0b4,<9.0.0 | ||||
| cuda92 = | ||||
|     cupy-cuda92>=5.0.0b4 | ||||
|     cupy-cuda92>=5.0.0b4,<9.0.0 | ||||
| cuda100 = | ||||
|     cupy-cuda100>=5.0.0b4 | ||||
|     cupy-cuda100>=5.0.0b4,<9.0.0 | ||||
| # Language tokenizers with external dependencies | ||||
| ja = | ||||
|     fugashi>=0.1.3 | ||||
|  |  | |||
|  | @ -693,9 +693,11 @@ def build_text_classifier(nr_class, width=64, **cfg): | |||
|         ) | ||||
| 
 | ||||
|         linear_model = build_bow_text_classifier( | ||||
|             nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False | ||||
|             nr_class, | ||||
|             ngram_size=cfg.get("ngram_size", 1), | ||||
|             exclusive_classes=cfg.get("exclusive_classes", False), | ||||
|         ) | ||||
|         if cfg.get("exclusive_classes"): | ||||
|         if cfg.get("exclusive_classes", False): | ||||
|             output_layer = Softmax(nr_class, nr_class * 2) | ||||
|         else: | ||||
|             output_layer = ( | ||||
|  |  | |||
|  | @ -551,6 +551,7 @@ class Errors(object): | |||
|             "array.") | ||||
|     E191 = ("Invalid head: the head token must be from the same doc as the " | ||||
|             "token itself.") | ||||
|     E192 = ("Unable to resize vectors in place with cupy.") | ||||
| 
 | ||||
| 
 | ||||
| @add_codes | ||||
|  |  | |||
							
								
								
									
										22
									
								
								spacy/lang/kn/examples.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								spacy/lang/kn/examples.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,22 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| 
 | ||||
| """ | ||||
| Example sentences to test spaCy and its language models. | ||||
| 
 | ||||
| >>> from spacy.lang.en.examples import sentences | ||||
| >>> docs = nlp.pipe(sentences) | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| sentences = [ | ||||
|     "ಆಪಲ್ ಒಂದು ಯು.ಕೆ. ಸ್ಟಾರ್ಟ್ಅಪ್ ಅನ್ನು ೧ ಶತಕೋಟಿ ಡಾಲರ್ಗಳಿಗೆ ಖರೀದಿಸಲು ನೋಡುತ್ತಿದೆ.", | ||||
|     "ಸ್ವಾಯತ್ತ ಕಾರುಗಳು ವಿಮಾ ಹೊಣೆಗಾರಿಕೆಯನ್ನು ತಯಾರಕರ ಕಡೆಗೆ ಬದಲಾಯಿಸುತ್ತವೆ.", | ||||
|     "ಕಾಲುದಾರಿ ವಿತರಣಾ ರೋಬೋಟ್ಗಳನ್ನು ನಿಷೇಧಿಸುವುದನ್ನು ಸ್ಯಾನ್ ಫ್ರಾನ್ಸಿಸ್ಕೊ ಪರಿಗಣಿಸುತ್ತದೆ.", | ||||
|     "ಲಂಡನ್ ಯುನೈಟೆಡ್ ಕಿಂಗ್ಡಂನ ದೊಡ್ಡ ನಗರ.", | ||||
|     "ನೀನು ಎಲ್ಲಿದಿಯಾ?", | ||||
|     "ಫ್ರಾನ್ಸಾದ ಅಧ್ಯಕ್ಷರು ಯಾರು?", | ||||
|     "ಯುನೈಟೆಡ್ ಸ್ಟೇಟ್ಸ್ನ ರಾಜಧಾನಿ ಯಾವುದು?", | ||||
|     "ಬರಾಕ್ ಒಬಾಮ ಯಾವಾಗ ಜನಿಸಿದರು?", | ||||
| ] | ||||
|  | @ -89,17 +89,28 @@ def test_init_vectors_with_resize_data(data, resize_data): | |||
|     assert v.shape != data.shape | ||||
| 
 | ||||
| 
 | ||||
| def test_get_vector_resize(strings, data, resize_data): | ||||
|     v = Vectors(data=data) | ||||
|     v.resize(shape=resize_data.shape) | ||||
| def test_get_vector_resize(strings, data): | ||||
|     strings = [hash_string(s) for s in strings] | ||||
| 
 | ||||
|     # decrease vector dimension (truncate) | ||||
|     v = Vectors(data=data) | ||||
|     resized_dim = v.shape[1] - 1 | ||||
|     v.resize(shape=(v.shape[0], resized_dim)) | ||||
|     for i, string in enumerate(strings): | ||||
|         v.add(string, row=i) | ||||
| 
 | ||||
|     assert list(v[strings[0]]) == list(resize_data[0]) | ||||
|     assert list(v[strings[0]]) != list(resize_data[1]) | ||||
|     assert list(v[strings[1]]) != list(resize_data[0]) | ||||
|     assert list(v[strings[1]]) == list(resize_data[1]) | ||||
|     assert list(v[strings[0]]) == list(data[0, :resized_dim]) | ||||
|     assert list(v[strings[1]]) == list(data[1, :resized_dim]) | ||||
| 
 | ||||
|     # increase vector dimension (pad with zeros) | ||||
|     v = Vectors(data=data) | ||||
|     resized_dim = v.shape[1] + 1 | ||||
|     v.resize(shape=(v.shape[0], resized_dim)) | ||||
|     for i, string in enumerate(strings): | ||||
|         v.add(string, row=i) | ||||
| 
 | ||||
|     assert list(v[strings[0]]) == list(data[0]) + [0] | ||||
|     assert list(v[strings[1]]) == list(data[1]) + [0] | ||||
| 
 | ||||
| 
 | ||||
| def test_init_vectors_with_data(strings, data): | ||||
|  |  | |||
|  | @ -198,11 +198,17 @@ cdef class Vectors: | |||
| 
 | ||||
|         DOCS: https://spacy.io/api/vectors#resize | ||||
|         """ | ||||
|         xp = get_array_module(self.data) | ||||
|         if inplace: | ||||
|             self.data.resize(shape, refcheck=False) | ||||
|             if xp == numpy: | ||||
|                 self.data.resize(shape, refcheck=False) | ||||
|             else: | ||||
|                 raise ValueError(Errors.E192) | ||||
|         else: | ||||
|             xp = get_array_module(self.data) | ||||
|             self.data = xp.resize(self.data, shape) | ||||
|             resized_array = xp.zeros(shape, dtype=self.data.dtype) | ||||
|             copy_shape = (min(shape[0], self.data.shape[0]), min(shape[1], self.data.shape[1])) | ||||
|             resized_array[:copy_shape[0], :copy_shape[1]] = self.data[:copy_shape[0], :copy_shape[1]] | ||||
|             self.data = resized_array | ||||
|         filled = {row for row in self.key2row.values()} | ||||
|         self._unset = cppset[int]({row for row in range(shape[0]) if row not in filled}) | ||||
|         removed_items = [] | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user