mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			58 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			58 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from typing import Optional
 | 
						|
from pathlib import Path
 | 
						|
from wasabi import msg
 | 
						|
import typer
 | 
						|
import logging
 | 
						|
 | 
						|
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 | 
						|
from ._util import import_code
 | 
						|
from .. import util
 | 
						|
from ..util import get_sourced_components, load_model_from_config
 | 
						|
 | 
						|
 | 
						|
@app.command(
 | 
						|
    "assemble",
 | 
						|
    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
 | 
						|
)
 | 
						|
def assemble_cli(
 | 
						|
    # fmt: off
 | 
						|
    ctx: typer.Context,  # This is only used to read additional arguments
 | 
						|
    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
 | 
						|
    output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"),
 | 
						|
    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
 | 
						|
    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
 | 
						|
    # fmt: on
 | 
						|
):
 | 
						|
    """
 | 
						|
    Assemble a spaCy pipeline from a config file. The config file includes
 | 
						|
    all settings for initializing the pipeline. To override settings in the
 | 
						|
    config, e.g. settings that point to local paths or that you want to
 | 
						|
    experiment with, you can override them as command line options. The
 | 
						|
    --code argument lets you pass in a Python file that can be used to
 | 
						|
    register custom functions that are referenced in the config.
 | 
						|
 | 
						|
    DOCS: https://spacy.io/api/cli#assemble
 | 
						|
    """
 | 
						|
    util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
 | 
						|
    # Make sure all files and paths exists if they are needed
 | 
						|
    if not config_path or (str(config_path) != "-" and not config_path.exists()):
 | 
						|
        msg.fail("Config file not found", config_path, exits=1)
 | 
						|
    overrides = parse_config_overrides(ctx.args)
 | 
						|
    import_code(code_path)
 | 
						|
    with show_validation_error(config_path):
 | 
						|
        config = util.load_config(config_path, overrides=overrides, interpolate=False)
 | 
						|
    msg.divider("Initializing pipeline")
 | 
						|
    nlp = load_model_from_config(config, auto_fill=True)
 | 
						|
    config = config.interpolate()
 | 
						|
    sourced = get_sourced_components(config)
 | 
						|
    # Make sure that listeners are defined before initializing further
 | 
						|
    nlp._link_components()
 | 
						|
    with nlp.select_pipes(disable=[*sourced]):
 | 
						|
        nlp.initialize()
 | 
						|
    msg.good("Initialized pipeline")
 | 
						|
    msg.divider("Serializing to disk")
 | 
						|
    if output_path is not None and not output_path.exists():
 | 
						|
        output_path.mkdir(parents=True)
 | 
						|
        msg.good(f"Created output directory: {output_path}")
 | 
						|
    nlp.to_disk(output_path)
 |