mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			43 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			43 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
//- 💫 DOCS > USAGE > LINGUISTIC FEATURES
 | 
						|
 | 
						|
include ../_includes/_mixins
 | 
						|
 | 
						|
p
 | 
						|
    |  Processing raw text intelligently is difficult: most words are rare, and
 | 
						|
    |  it's common for words that look completely different to mean almost the
 | 
						|
    |  same thing. The same words in a different order can mean something
 | 
						|
    |  completely different. Even splitting text into useful word-like units can
 | 
						|
    |  be difficult in many languages. While it's possible to solve some
 | 
						|
    |  problems starting from only the raw characters, it's usually better to
 | 
						|
    |  use linguistic knowledge to add useful information. That's exactly what
 | 
						|
    |  spaCy is designed to do: you put in raw text, and get back a
 | 
						|
    |  #[+api("doc") #[code Doc]] object, that comes with a variety of
 | 
						|
    |  annotations.
 | 
						|
 | 
						|
+section("pos-tagging")
 | 
						|
    +h(2, "pos-tagging") Part-of-speech tagging
 | 
						|
        +tag-model("tagger", "dependency parse")
 | 
						|
    include _linguistic-features/_pos-tagging
 | 
						|
 | 
						|
+section("dependency-parse")
 | 
						|
    +h(2, "dependency-parse") Dependency parsing
 | 
						|
        +tag-model("dependency parse")
 | 
						|
    include _linguistic-features/_dependency-parse
 | 
						|
 | 
						|
+section("named-entities")
 | 
						|
    +h(2, "named-entities") Named Entities
 | 
						|
        +tag-model("named entities")
 | 
						|
    include _linguistic-features/_named-entities
 | 
						|
 | 
						|
+section("tokenization")
 | 
						|
    +h(2, "tokenization") Tokenization
 | 
						|
    include _linguistic-features/_tokenization
 | 
						|
 | 
						|
+section("sbd")
 | 
						|
    +h(2, "sbd") Sentence Segmentation
 | 
						|
    include _linguistic-features/_sentence-segmentation
 | 
						|
 | 
						|
+section("rule-based-matching")
 | 
						|
    +h(2, "rule-based-matching") Rule-based matching
 | 
						|
    include _linguistic-features/_rule-based-matching
 |