mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* Fix scoring normalization (#7629) * fix scoring normalization * score weights by total sum instead of per component * cleanup * more cleanup * Use a context manager when reading model (fix #7036) (#8244) * Fix other open calls without context managers (#8245) * Don't add duplicate patterns all the time in EntityRuler (fix #8216) (#8246) * Don't add duplicate patterns (fix #8216) * Refactor EntityRuler init This simplifies the EntityRuler init code. This is helpful as prep for allowing the EntityRuler to reset itself. * Make EntityRuler.clear reset matchers Includes a new test for this. * Tidy PhraseMatcher instantiation Since the attr can be None safely now, the guard if is no longer required here. Also renamed the `_validate` attr. Maybe it's not needed? * Fix NER test * Add test to make sure patterns aren't increasing * Move test to regression tests * Exclude generated .cpp files from package (#8271) * Fix non-deterministic deduplication in Greek lemmatizer (#8421) * Fix setting empty entities in Example.from_dict (#8426) * Filter W036 for entity ruler, etc. (#8424) * Preserve paths.vectors/initialize.vectors setting in quickstart template * Various fixes for spans in Docs.from_docs (#8487) * Fix spans offsets if a doc ends in a single space and no space is inserted * Also include spans key in merged doc for empty spans lists * Fix duplicate spacy package CLI opts (#8551) Use `-c` for `--code` and not additionally for `--create-meta`, in line with the docs. * Raise an error for textcat with <2 labels (#8584) * Raise an error for textcat with <2 labels Raise an error if initializing a `textcat` component without at least two labels. * Add similar note to docs * Update positive_label description in API docs * Add Macedonian models to website (#8637) * Fix Azerbaijani init, extend lang init tests (#8656) * Extend langs in initialize tests * Fix az init * Fix ru/uk lemmatizer mp with spawn (#8657) Use an instance variable instead a class variable for the morphological analzyer so that multiprocessing with spawn is possible. * Use 0-vector for OOV lexemes (#8639) * Set version to v3.0.7 Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
		
			
				
	
	
		
			25 lines
		
	
	
		
			922 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			25 lines
		
	
	
		
			922 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| from spacy.util import get_lang_class
 | |
| 
 | |
| 
 | |
| # fmt: off
 | |
| # Only include languages with no external dependencies
 | |
| # excluded: ja, ko, th, vi, zh
 | |
| LANGUAGES = ["af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el",
 | |
|              "en", "es", "et", "eu", "fa", "fi", "fr", "ga", "gu", "he", "hi",
 | |
|              "hr", "hu", "hy", "id", "is", "it", "kn", "ky", "lb", "lt", "lv",
 | |
|              "mk", "ml", "mr", "nb", "ne", "nl", "pl", "pt", "ro", "ru", "sa",
 | |
|              "si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "ti", "tl", "tn",
 | |
|              "tr", "tt", "uk", "ur", "xx", "yo"]
 | |
| # fmt: on
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("lang", LANGUAGES)
 | |
| def test_lang_initialize(lang, capfd):
 | |
|     """Test that languages can be initialized."""
 | |
|     nlp = get_lang_class(lang)()
 | |
|     # Check for stray print statements (see #3342)
 | |
|     doc = nlp("test")  # noqa: F841
 | |
|     captured = capfd.readouterr()
 | |
|     assert not captured.out
 |