mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Improve EntityRuler serialization
This commit is contained in:
		
							parent
							
								
									570ab1f481
								
							
						
					
					
						commit
						40cd03fc35
					
				| 
						 | 
				
			
			@ -293,12 +293,13 @@ class EntityRuler(object):
 | 
			
		|||
        """Save the entity ruler patterns to a directory. The patterns will be
 | 
			
		||||
        saved as newline-delimited JSON (JSONL).
 | 
			
		||||
 | 
			
		||||
        path (unicode / Path): The JSONL file to load.
 | 
			
		||||
        path (unicode / Path): The JSONL file to save.
 | 
			
		||||
        **kwargs: Other config paramters, mostly for consistency.
 | 
			
		||||
        RETURNS (EntityRuler): The loaded entity ruler.
 | 
			
		||||
 | 
			
		||||
        DOCS: https://spacy.io/api/entityruler#to_disk
 | 
			
		||||
        """
 | 
			
		||||
        path = ensure_path(path)
 | 
			
		||||
        cfg = {
 | 
			
		||||
            "overwrite": self.overwrite,
 | 
			
		||||
            "phrase_matcher_attr": self.phrase_matcher_attr,
 | 
			
		||||
| 
						 | 
				
			
			@ -310,5 +311,7 @@ class EntityRuler(object):
 | 
			
		|||
            ),
 | 
			
		||||
            "cfg": lambda p: srsly.write_json(p, cfg),
 | 
			
		||||
        }
 | 
			
		||||
        path = ensure_path(path)
 | 
			
		||||
        if path.suffix == ".jsonl":  # user wants to save only JSONL
 | 
			
		||||
            srsly.write_jsonl(path, self.patterns)
 | 
			
		||||
        else:
 | 
			
		||||
            to_disk(path, serializers, {})
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,10 +31,10 @@ be a token pattern (list) or a phrase pattern (string). For example:
 | 
			
		|||
> ```
 | 
			
		||||
 | 
			
		||||
| Name                  | Type          | Description                                                                                                                                           |
 | 
			
		||||
| ---------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
 | 
			
		||||
| --------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
 | 
			
		||||
| `nlp`                 | `Language`    | The shared nlp object to pass the vocab to the matchers and process phrase patterns.                                                                  |
 | 
			
		||||
| `patterns`            | iterable      | Optional patterns to load in.                                                                                                                         |
 | 
			
		||||
| `phrase_matcher_attr` | int / unicode | Optional attr to pass to the internal [`PhraseMatcher`](/api/phtasematcher). defaults to `None`
 | 
			
		||||
| `phrase_matcher_attr` | int / unicode | Optional attr to pass to the internal [`PhraseMatcher`](/api/phtasematcher). defaults to `None`                                                       |
 | 
			
		||||
| `overwrite_ents`      | bool          | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`.                      |
 | 
			
		||||
| `**cfg`               | -             | Other config parameters. If pipeline component is loaded as part of a model pipeline, this will include all keyword arguments passed to `spacy.load`. |
 | 
			
		||||
| **RETURNS**           | `EntityRuler` | The newly constructed object.                                                                                                                         |
 | 
			
		||||
| 
						 | 
				
			
			@ -123,34 +123,40 @@ of dicts) or a phrase pattern (string). For more details, see the usage guide on
 | 
			
		|||
## EntityRuler.to_disk {#to_disk tag="method"}
 | 
			
		||||
 | 
			
		||||
Save the entity ruler patterns to a directory. The patterns will be saved as
 | 
			
		||||
newline-delimited JSON (JSONL).
 | 
			
		||||
newline-delimited JSON (JSONL). If a file with the suffix `.jsonl` is provided,
 | 
			
		||||
only the patterns are saved as JSONL. If a directory name is provided, a
 | 
			
		||||
`patterns.jsonl` and `cfg` file with the component configuration is exported.
 | 
			
		||||
 | 
			
		||||
> #### Example
 | 
			
		||||
>
 | 
			
		||||
> ```python
 | 
			
		||||
> ruler = EntityRuler(nlp)
 | 
			
		||||
> ruler.to_disk("/path/to/rules.jsonl")
 | 
			
		||||
> ruler.to_disk("/path/to/patterns.jsonl")  # saves patterns only
 | 
			
		||||
> ruler.to_disk("/path/to/entity_ruler")    # saves patterns and config
 | 
			
		||||
> ```
 | 
			
		||||
 | 
			
		||||
| Name   | Type             | Description                                                                                                                         |
 | 
			
		||||
| ------ | ---------------- | ---------------------------------------------------------------------------------------------------------------- |
 | 
			
		||||
| `path` | unicode / `Path` | A path to a file, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | 
			
		||||
| ------ | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
 | 
			
		||||
| `path` | unicode / `Path` | A path to a JSONL file or directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
 | 
			
		||||
 | 
			
		||||
## EntityRuler.from_disk {#from_disk tag="method"}
 | 
			
		||||
 | 
			
		||||
Load the entity ruler from a file. Expects a file containing newline-delimited
 | 
			
		||||
JSON (JSONL) with one entry per line.
 | 
			
		||||
Load the entity ruler from a file. Expects either a file containing
 | 
			
		||||
newline-delimited JSON (JSONL) with one entry per line, or a directory
 | 
			
		||||
containing a `patterns.jsonl` file and a `cfg` file with the component
 | 
			
		||||
configuration.
 | 
			
		||||
 | 
			
		||||
> #### Example
 | 
			
		||||
>
 | 
			
		||||
> ```python
 | 
			
		||||
> ruler = EntityRuler(nlp)
 | 
			
		||||
> ruler.from_disk("/path/to/rules.jsonl")
 | 
			
		||||
> ruler.from_disk("/path/to/patterns.jsonl")  # loads patterns only
 | 
			
		||||
> ruler.from_disk("/path/to/entity_ruler")    # loads patterns and config
 | 
			
		||||
> ```
 | 
			
		||||
 | 
			
		||||
| Name        | Type             | Description                                                                              |
 | 
			
		||||
| ----------- | ---------------- | --------------------------------------------------------------------------- |
 | 
			
		||||
| `path`      | unicode / `Path` | A path to a JSONL file. Paths may be either strings or `Path`-like objects. |
 | 
			
		||||
| ----------- | ---------------- | ---------------------------------------------------------------------------------------- |
 | 
			
		||||
| `path`      | unicode / `Path` | A path to a JSONL file or directory. Paths may be either strings or `Path`-like objects. |
 | 
			
		||||
| **RETURNS** | `EntityRuler`    | The modified `EntityRuler` object.                                                       |
 | 
			
		||||
 | 
			
		||||
## EntityRuler.to_bytes {#to_bytes tag="method"}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user