2020-08-27 19:17:58 +03:00
from typing import Optional , Dict , Any , Union , List
from pathlib import Path
from wasabi import msg , table
from thinc . api import Config
2020-09-22 13:24:39 +03:00
from thinc . config import VARIABLE_RE , ConfigValidationError
2020-08-27 19:17:58 +03:00
import typer
from . _util import Arg , Opt , show_validation_error , parse_config_overrides
from . _util import import_code , debug_cli
from . . import util
@debug_cli.command (
" config " ,
context_settings = { " allow_extra_args " : True , " ignore_unknown_options " : True } ,
)
def debug_config_cli (
# fmt: off
ctx : typer . Context , # This is only used to read additional arguments
config_path : Path = Arg ( . . . , help = " Path to config file " , exists = True ) ,
code_path : Optional [ Path ] = Opt ( None , " --code-path " , " -c " , help = " Path to Python file with additional code (registered functions) to be imported " ) ,
show_funcs : bool = Opt ( False , " --show-functions " , " -F " , help = " Show an overview of all registered functions used in the config and where they come from (modules, files etc.) " ) ,
show_vars : bool = Opt ( False , " --show-variables " , " -V " , help = " Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI. " )
# fmt: on
) :
""" Debug a config.cfg file and show validation errors. The command will
create all objects in the tree and validate them . Note that some config
validation errors are blocking and will prevent the rest of the config from
being resolved . This means that you may not see all validation errors at
once and some issues are only shown once previous errors have been fixed .
Similar as with the ' train ' command , you can override settings from the config
as command line options . For instance , - - training . batch_size 128 overrides
the value of " batch_size " in the block " [training] " .
2020-09-04 13:58:50 +03:00
DOCS : https : / / nightly . spacy . io / api / cli #debug-config
2020-08-27 19:17:58 +03:00
"""
overrides = parse_config_overrides ( ctx . args )
import_code ( code_path )
debug_config (
config_path , overrides = overrides , show_funcs = show_funcs , show_vars = show_vars
)
def debug_config (
config_path : Path ,
* ,
overrides : Dict [ str , Any ] = { } ,
show_funcs : bool = False ,
show_vars : bool = False ,
) :
msg . divider ( " Config validation " )
with show_validation_error ( config_path ) :
config = util . load_config ( config_path , overrides = overrides )
2020-09-22 13:24:39 +03:00
nlp , resolved = util . load_model_from_config ( config )
# Use the resolved config here in case user has one function returning
# a dict of corpora etc.
check_section_refs ( resolved , [ " training.dev_corpus " , " training.train_corpus " ] )
2020-08-27 19:17:58 +03:00
msg . good ( " Config is valid " )
if show_vars :
variables = get_variables ( config )
msg . divider ( f " Variables ( { len ( variables ) } ) " )
head = ( " Variable " , " Value " )
msg . table ( variables , header = head , divider = True , widths = ( 41 , 34 ) , spacing = 2 )
if show_funcs :
funcs = get_registered_funcs ( config )
msg . divider ( f " Registered functions ( { len ( funcs ) } ) " )
for func in funcs :
func_data = {
" Registry " : f " @ { func [ ' registry ' ] } " ,
" Name " : func [ " name " ] ,
" Module " : func [ " module " ] ,
" File " : f " { func [ ' file ' ] } (line { func [ ' line_no ' ] } ) " ,
}
msg . info ( f " [ { func [ ' path ' ] } ] " )
print ( table ( func_data ) . strip ( ) )
def get_registered_funcs ( config : Config ) - > List [ Dict [ str , Optional [ Union [ str , int ] ] ] ] :
result = [ ]
for key , value in util . walk_dict ( config ) :
if not key [ - 1 ] . startswith ( " @ " ) :
continue
# We have a reference to a registered function
reg_name = key [ - 1 ] [ 1 : ]
registry = getattr ( util . registry , reg_name )
path = " . " . join ( key [ : - 1 ] )
info = registry . find ( value )
result . append ( { " name " : value , " registry " : reg_name , " path " : path , * * info } )
return result
def get_variables ( config : Config ) - > Dict [ str , Any ] :
result = { }
for variable in sorted ( set ( VARIABLE_RE . findall ( config . to_str ( ) ) ) ) :
path = variable [ 2 : - 1 ] . replace ( " : " , " . " )
value = util . dot_to_object ( config , path )
result [ variable ] = repr ( value )
return result
2020-09-22 13:24:39 +03:00
def check_section_refs ( config : Config , fields : List [ str ] ) - > None :
""" Validate fields in the config that refer to other sections or values
( e . g . in the corpora ) and make sure that those references exist .
"""
errors = [ ]
for field in fields :
# If the field doesn't exist in the config, we ignore it
try :
value = util . dot_to_object ( config , field )
except KeyError :
continue
try :
util . dot_to_object ( config , value )
except KeyError :
msg = f " not a valid section reference: { value } "
errors . append ( { " loc " : field . split ( " . " ) , " msg " : msg } )
if errors :
raise ConfigValidationError ( config , errors )