mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			43 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			43 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //- 💫 DOCS > USAGE > LINGUISTIC FEATURES
 | |
| 
 | |
| include ../_includes/_mixins
 | |
| 
 | |
| p
 | |
|     |  Processing raw text intelligently is difficult: most words are rare, and
 | |
|     |  it's common for words that look completely different to mean almost the
 | |
|     |  same thing. The same words in a different order can mean something
 | |
|     |  completely different. Even splitting text into useful word-like units can
 | |
|     |  be difficult in many languages. While it's possible to solve some
 | |
|     |  problems starting from only the raw characters, it's usually better to
 | |
|     |  use linguistic knowledge to add useful information. That's exactly what
 | |
|     |  spaCy is designed to do: you put in raw text, and get back a
 | |
|     |  #[+api("doc") #[code Doc]] object, that comes with a variety of
 | |
|     |  annotations.
 | |
| 
 | |
| +section("pos-tagging")
 | |
|     +h(2, "pos-tagging") Part-of-speech tagging
 | |
|         +tag-model("tagger", "dependency parse")
 | |
|     include _linguistic-features/_pos-tagging
 | |
| 
 | |
| +section("dependency-parse")
 | |
|     +h(2, "dependency-parse") Dependency parsing
 | |
|         +tag-model("dependency parse")
 | |
|     include _linguistic-features/_dependency-parse
 | |
| 
 | |
| +section("named-entities")
 | |
|     +h(2, "named-entities") Named Entities
 | |
|         +tag-model("named entities")
 | |
|     include _linguistic-features/_named-entities
 | |
| 
 | |
| +section("tokenization")
 | |
|     +h(2, "tokenization") Tokenization
 | |
|     include _linguistic-features/_tokenization
 | |
| 
 | |
| +section("sbd")
 | |
|     +h(2, "sbd") Sentence Segmentation
 | |
|     include _linguistic-features/_sentence-segmentation
 | |
| 
 | |
| +section("rule-based-matching")
 | |
|     +h(2, "rule-based-matching") Rule-based matching
 | |
|     include _linguistic-features/_rule-based-matching
 |