mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Rename _URL_PATTERN to URL_PATTERN
This commit is contained in:
		
							parent
							
								
									604f299cf6
								
							
						
					
					
						commit
						a91278cb32
					
				| 
						 | 
					@ -144,7 +144,7 @@ _regular_exp += ["^{prefix}[{hyphen}][{alpha}][{alpha}{elision}{other_hyphen}\-]
 | 
				
			||||||
_regular_exp += ["^{prefix}[{elision}][{alpha}][{alpha}{elision}{hyphen}\-]*$".format(
 | 
					_regular_exp += ["^{prefix}[{elision}][{alpha}][{alpha}{elision}{hyphen}\-]*$".format(
 | 
				
			||||||
                 prefix=p, elision=HYPHENS, hyphen=_other_hyphens, alpha=ALPHA_LOWER)
 | 
					                 prefix=p, elision=HYPHENS, hyphen=_other_hyphens, alpha=ALPHA_LOWER)
 | 
				
			||||||
                 for p in _elision_prefix]
 | 
					                 for p in _elision_prefix]
 | 
				
			||||||
_regular_exp.append(_URL_PATTERN)
 | 
					_regular_exp.append(URL_PATTERN)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TOKENIZER_EXCEPTIONS = dict(_exc)
 | 
					TOKENIZER_EXCEPTIONS = dict(_exc)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -96,4 +96,4 @@ _nums = "(({ne})|({t})|({on})|({c}))({s})?".format(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TOKENIZER_EXCEPTIONS = dict(_exc)
 | 
					TOKENIZER_EXCEPTIONS = dict(_exc)
 | 
				
			||||||
TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=_URL_PATTERN, n=_nums)).match
 | 
					TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=URL_PATTERN, n=_nums)).match
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,7 +10,7 @@ from ..symbols import ORTH, POS, LEMMA, SPACE, PUNCT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex
 | 
					# URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex
 | 
				
			||||||
# A few minor mods to this regex to account for use cases represented in test_urls
 | 
					# A few minor mods to this regex to account for use cases represented in test_urls
 | 
				
			||||||
_URL_PATTERN = (
 | 
					URL_PATTERN = (
 | 
				
			||||||
    r"^"
 | 
					    r"^"
 | 
				
			||||||
    # in order to support the prefix tokenization (see prefix test cases in test_urls).
 | 
					    # in order to support the prefix tokenization (see prefix test cases in test_urls).
 | 
				
			||||||
    r"(?=[\w])"
 | 
					    r"(?=[\w])"
 | 
				
			||||||
| 
						 | 
					@ -53,7 +53,7 @@ _URL_PATTERN = (
 | 
				
			||||||
    r"$"
 | 
					    r"$"
 | 
				
			||||||
).strip()
 | 
					).strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TOKEN_MATCH = regex.compile(_URL_PATTERN, regex.UNICODE).match
 | 
					TOKEN_MATCH = regex.compile(URL_PATTERN, regex.UNICODE).match
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user