From ce79b394072bba66c84d2bc680e179c2d69c5479 Mon Sep 17 00:00:00 2001 From: olliemath Date: Mon, 16 Aug 2021 09:44:48 +0100 Subject: [PATCH] Chore: fix linting for examples --- examples/cpu_usage/collect.py | 21 ++++-- examples/cpu_usage/models.py | 3 +- examples/cpu_usage/results.py | 8 +- examples/db_explorer/charts.py | 45 ++++++----- examples/db_explorer/server.py | 78 +++++++++++--------- examples/full_text_search/download_ebooks.py | 17 +++-- examples/full_text_search/load.py | 37 +++++----- examples/full_text_search/models.py | 16 ++-- examples/full_text_search/search.py | 43 +++++------ pyproject.toml | 5 -- setup.cfg | 1 - 11 files changed, 148 insertions(+), 126 deletions(-) diff --git a/examples/cpu_usage/collect.py b/examples/cpu_usage/collect.py index 62102ac..0aab1c4 100644 --- a/examples/cpu_usage/collect.py +++ b/examples/cpu_usage/collect.py @@ -1,20 +1,25 @@ -import psutil, time, datetime -from clickhouse_orm import Database +import datetime +import time + +import psutil from models import CPUStats +from clickhouse_orm import Database -db = Database('demo') +db = Database("demo") db.create_table(CPUStats) -psutil.cpu_percent(percpu=True) # first sample should be discarded +psutil.cpu_percent(percpu=True) # first sample should be discarded while True: time.sleep(1) stats = psutil.cpu_percent(percpu=True) timestamp = datetime.datetime.now() print(timestamp) - db.insert([ - CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent) - for cpu_id, cpu_percent in enumerate(stats) - ]) + db.insert( + [ + CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent) + for cpu_id, cpu_percent in enumerate(stats) + ] + ) diff --git a/examples/cpu_usage/models.py b/examples/cpu_usage/models.py index fe9afc6..6bba423 100644 --- a/examples/cpu_usage/models.py +++ b/examples/cpu_usage/models.py @@ -1,4 +1,4 @@ -from clickhouse_orm import Model, DateTimeField, UInt16Field, Float32Field, Memory +from clickhouse_orm import DateTimeField, Float32Field, Memory, Model, UInt16Field class CPUStats(Model): @@ -8,4 +8,3 @@ class CPUStats(Model): cpu_percent = Float32Field() engine = Memory() - diff --git a/examples/cpu_usage/results.py b/examples/cpu_usage/results.py index 06ee1f0..f7ef9b1 100644 --- a/examples/cpu_usage/results.py +++ b/examples/cpu_usage/results.py @@ -1,13 +1,13 @@ -from clickhouse_orm import Database, F from models import CPUStats +from clickhouse_orm import Database, F -db = Database('demo') +db = Database("demo") queryset = CPUStats.objects_in(db) total = queryset.filter(CPUStats.cpu_id == 1).count() busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count() -print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)) +print("CPU 1 was busy {:.2f}% of the time".format(busy * 100.0 / total)) # Calculate the average usage per CPU for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)): - print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)) + print("CPU {row.cpu_id}: {row.average:.2f}%".format(row=row)) diff --git a/examples/db_explorer/charts.py b/examples/db_explorer/charts.py index 1a690e6..1b01d56 100644 --- a/examples/db_explorer/charts.py +++ b/examples/db_explorer/charts.py @@ -1,62 +1,73 @@ import pygal -from pygal.style import RotateStyle from jinja2.filters import do_filesizeformat +from pygal.style import RotateStyle # Formatting functions -number_formatter = lambda v: '{:,}'.format(v) -bytes_formatter = lambda v: do_filesizeformat(v, True) +def number_formatter(v): + return "{:,}".format(v) + + +def bytes_formatter(v): + do_filesizeformat(v, True) def tables_piechart(db, by_field, value_formatter): - ''' + """ Generate a pie chart of the top n tables in the database. `db` - the database instance `by_field` - the field name to sort by `value_formatter` - a function to use for formatting the numeric values - ''' - Tables = db.get_model_for_table('tables', system_table=True) - qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine='Buffer') + """ + Tables = db.get_model_for_table("tables", system_table=True) + qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine="Buffer") tuples = [(getattr(table, by_field), table.name) for table in qs] return _generate_piechart(tuples, value_formatter) def columns_piechart(db, tbl_name, by_field, value_formatter): - ''' + """ Generate a pie chart of the top n columns in the table. `db` - the database instance `tbl_name` - the table name `by_field` - the field name to sort by `value_formatter` - a function to use for formatting the numeric values - ''' - ColumnsTable = db.get_model_for_table('columns', system_table=True) + """ + ColumnsTable = db.get_model_for_table("columns", system_table=True) qs = ColumnsTable.objects_in(db).filter(database=db.db_name, table=tbl_name) tuples = [(getattr(col, by_field), col.name) for col in qs] return _generate_piechart(tuples, value_formatter) def _get_top_tuples(tuples, n=15): - ''' + """ Given a list of tuples (value, name), this function sorts the list and returns only the top n results. All other tuples are aggregated to a single "others" tuple. - ''' + """ non_zero_tuples = [t for t in tuples if t[0]] sorted_tuples = sorted(non_zero_tuples, reverse=True) if len(sorted_tuples) > n: - others = (sum(t[0] for t in sorted_tuples[n:]), 'others') + others = (sum(t[0] for t in sorted_tuples[n:]), "others") sorted_tuples = sorted_tuples[:n] + [others] return sorted_tuples def _generate_piechart(tuples, value_formatter): - ''' + """ Generates a pie chart. `tuples` - a list of (value, name) tuples to include in the chart `value_formatter` - a function to use for formatting the values - ''' - style = RotateStyle('#9e6ffe', background='white', legend_font_family='Roboto', legend_font_size=18, tooltip_font_family='Roboto', tooltip_font_size=24) - chart = pygal.Pie(style=style, margin=0, title=' ', value_formatter=value_formatter, truncate_legend=-1) + """ + style = RotateStyle( + "#9e6ffe", + background="white", + legend_font_family="Roboto", + legend_font_size=18, + tooltip_font_family="Roboto", + tooltip_font_size=24, + ) + chart = pygal.Pie(style=style, margin=0, title=" ", value_formatter=value_formatter, truncate_legend=-1) for t in _get_top_tuples(tuples): chart.add(t[1], t[0]) return chart.render(is_unicode=True, disable_xml_declaration=True) diff --git a/examples/db_explorer/server.py b/examples/db_explorer/server.py index 9fbcc03..d1406e1 100644 --- a/examples/db_explorer/server.py +++ b/examples/db_explorer/server.py @@ -1,87 +1,93 @@ -from clickhouse_orm import Database, F -from charts import tables_piechart, columns_piechart, number_formatter, bytes_formatter -from flask import Flask -from flask import render_template import sys +from charts import bytes_formatter, columns_piechart, number_formatter, tables_piechart +from flask import Flask, render_template + +from clickhouse_orm import Database, F app = Flask(__name__) -@app.route('/') +@app.route("/") def homepage_view(): - ''' + """ Root view that lists all databases. - ''' - db = _get_db('system') + """ + db = _get_db("system") # Get all databases in the system.databases table - DatabasesTable = db.get_model_for_table('databases', system_table=True) - databases = DatabasesTable.objects_in(db).exclude(name='system') + DatabasesTable = db.get_model_for_table("databases", system_table=True) + databases = DatabasesTable.objects_in(db).exclude(name="system") databases = databases.order_by(F.lower(DatabasesTable.name)) # Generate the page - return render_template('homepage.html', db=db, databases=databases) + return render_template("homepage.html", db=db, databases=databases) -@app.route('//') +@app.route("//") def database_view(db_name): - ''' + """ A view that displays information about a single database. - ''' + """ db = _get_db(db_name) # Get all the tables in the database, by aggregating information from system.columns - ColumnsTable = db.get_model_for_table('columns', system_table=True) - tables = ColumnsTable.objects_in(db).filter(database=db_name).aggregate( - ColumnsTable.table, - compressed_size=F.sum(ColumnsTable.data_compressed_bytes), - uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes), - ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes) + ColumnsTable = db.get_model_for_table("columns", system_table=True) + tables = ( + ColumnsTable.objects_in(db) + .filter(database=db_name) + .aggregate( + ColumnsTable.table, + compressed_size=F.sum(ColumnsTable.data_compressed_bytes), + uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes), + ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes), + ) ) tables = tables.order_by(F.lower(ColumnsTable.table)) # Generate the page - return render_template('database.html', + return render_template( + "database.html", db=db, tables=tables, - tables_piechart_by_rows=tables_piechart(db, 'total_rows', value_formatter=number_formatter), - tables_piechart_by_size=tables_piechart(db, 'total_bytes', value_formatter=bytes_formatter), + tables_piechart_by_rows=tables_piechart(db, "total_rows", value_formatter=number_formatter), + tables_piechart_by_size=tables_piechart(db, "total_bytes", value_formatter=bytes_formatter), ) -@app.route('///') +@app.route("///") def table_view(db_name, tbl_name): - ''' + """ A view that displays information about a single table. - ''' + """ db = _get_db(db_name) # Get table information from system.tables - TablesTable = db.get_model_for_table('tables', system_table=True) + TablesTable = db.get_model_for_table("tables", system_table=True) tbl_info = TablesTable.objects_in(db).filter(database=db_name, name=tbl_name)[0] # Get the SQL used for creating the table - create_table_sql = db.raw('SHOW CREATE TABLE %s FORMAT TabSeparatedRaw' % tbl_name) + create_table_sql = db.raw("SHOW CREATE TABLE %s FORMAT TabSeparatedRaw" % tbl_name) # Get all columns in the table from system.columns - ColumnsTable = db.get_model_for_table('columns', system_table=True) + ColumnsTable = db.get_model_for_table("columns", system_table=True) columns = ColumnsTable.objects_in(db).filter(database=db_name, table=tbl_name) # Generate the page - return render_template('table.html', + return render_template( + "table.html", db=db, tbl_name=tbl_name, tbl_info=tbl_info, create_table_sql=create_table_sql, columns=columns, - piechart=columns_piechart(db, tbl_name, 'data_compressed_bytes', value_formatter=bytes_formatter), + piechart=columns_piechart(db, tbl_name, "data_compressed_bytes", value_formatter=bytes_formatter), ) def _get_db(db_name): - ''' + """ Returns a Database instance using connection information from the command line arguments (optional). - ''' - db_url = sys.argv[1] if len(sys.argv) > 1 else 'http://localhost:8123/' + """ + db_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8123/" username = sys.argv[2] if len(sys.argv) > 2 else None password = sys.argv[3] if len(sys.argv) > 3 else None return Database(db_name, db_url, username, password, readonly=True) -if __name__ == '__main__': - _get_db('system') # fail early on db connection problems +if __name__ == "__main__": + _get_db("system") # fail early on db connection problems app.run(debug=True) diff --git a/examples/full_text_search/download_ebooks.py b/examples/full_text_search/download_ebooks.py index 170d5e1..865e59c 100644 --- a/examples/full_text_search/download_ebooks.py +++ b/examples/full_text_search/download_ebooks.py @@ -1,27 +1,28 @@ -import requests import os +import requests + def download_ebook(id): - print(id, end=' ') + print(id, end=" ") # Download the ebook's text - r = requests.get('https://www.gutenberg.org/files/{id}/{id}-0.txt'.format(id=id)) + r = requests.get("https://www.gutenberg.org/files/{id}/{id}-0.txt".format(id=id)) if r.status_code == 404: - print('NOT FOUND, SKIPPING') + print("NOT FOUND, SKIPPING") return r.raise_for_status() # Find the ebook's title - text = r.content.decode('utf-8') + text = r.content.decode("utf-8") for line in text.splitlines(): - if line.startswith('Title:'): + if line.startswith("Title:"): title = line[6:].strip() print(title) # Save the ebook - with open('ebooks/{}.txt'.format(title), 'wb') as f: + with open("ebooks/{}.txt".format(title), "wb") as f: f.write(r.content) if __name__ == "__main__": - os.makedirs('ebooks', exist_ok=True) + os.makedirs("ebooks", exist_ok=True) for i in [1342, 11, 84, 2701, 25525, 1661, 98, 74, 43, 215, 1400, 76]: download_ebook(i) diff --git a/examples/full_text_search/load.py b/examples/full_text_search/load.py index 51564f4..225b155 100644 --- a/examples/full_text_search/load.py +++ b/examples/full_text_search/load.py @@ -1,61 +1,64 @@ import sys -import nltk -from nltk.stem.porter import PorterStemmer from glob import glob -from clickhouse_orm import Database + +import nltk from models import Fragment +from nltk.stem.porter import PorterStemmer + +from clickhouse_orm import Database def trim_punctuation(word): - ''' + """ Trim punctuation characters from the beginning and end of the word - ''' + """ start = end = len(word) for i in range(len(word)): if word[i].isalnum(): start = min(start, i) end = i + 1 - return word[start : end] + return word[start:end] def parse_file(filename): - ''' + """ Parses a text file at the give path. Returns a generator of tuples (original_word, stemmed_word) The original_word may include punctuation characters. - ''' + """ stemmer = PorterStemmer() - with open(filename, 'r', encoding='utf-8') as f: + with open(filename, "r", encoding="utf-8") as f: for line in f: for word in line.split(): yield (word, stemmer.stem(trim_punctuation(word))) def get_fragments(filename): - ''' + """ Converts a text file at the given path to a generator of Fragment instances. - ''' + """ from os import path + document = path.splitext(path.basename(filename))[0] idx = 0 for word, stem in parse_file(filename): idx += 1 yield Fragment(document=document, idx=idx, word=word, stem=stem) - print('{} - {} words'.format(filename, idx)) + print("{} - {} words".format(filename, idx)) -if __name__ == '__main__': +if __name__ == "__main__": # Load NLTK data if necessary - nltk.download('punkt') - nltk.download('wordnet') + nltk.download("punkt") + nltk.download("wordnet") # Initialize database - db = Database('default') + db = Database("default") db.create_table(Fragment) # Load files from the command line or everything under ebooks/ - filenames = sys.argv[1:] or glob('ebooks/*.txt') + filenames = sys.argv[1:] or glob("ebooks/*.txt") for filename in filenames: db.insert(get_fragments(filename), batch_size=100000) diff --git a/examples/full_text_search/models.py b/examples/full_text_search/models.py index 80de2b7..ae4c666 100644 --- a/examples/full_text_search/models.py +++ b/examples/full_text_search/models.py @@ -1,16 +1,18 @@ -from clickhouse_orm import * +from clickhouse_orm.engines import MergeTree +from clickhouse_orm.fields import LowCardinalityField, StringField, UInt64Field +from clickhouse_orm.models import Index, Model class Fragment(Model): - language = LowCardinalityField(StringField(), default='EN') + language = LowCardinalityField(StringField(), default="EN") document = LowCardinalityField(StringField()) - idx = UInt64Field() - word = StringField() - stem = StringField() + idx = UInt64Field() + word = StringField() + stem = StringField() # An index for faster search by document and fragment idx - index = Index((document, idx), type=Index.minmax(), granularity=1) + index = Index((document, idx), type=Index.minmax(), granularity=1) # The primary key allows efficient lookup of stems - engine = MergeTree(order_by=(stem, document, idx), partition_key=('language',)) + engine = MergeTree(order_by=(stem, document, idx), partition_key=("language",)) diff --git a/examples/full_text_search/search.py b/examples/full_text_search/search.py index c4d0918..4175929 100644 --- a/examples/full_text_search/search.py +++ b/examples/full_text_search/search.py @@ -1,19 +1,20 @@ import sys -from colorama import init, Fore, Back, Style -from nltk.stem.porter import PorterStemmer -from clickhouse_orm import Database, F -from models import Fragment -from load import trim_punctuation +from colorama import Fore, Style, init +from load import trim_punctuation +from models import Fragment +from nltk.stem.porter import PorterStemmer + +from clickhouse_orm import Database, F # The wildcard character -WILDCARD = '*' +WILDCARD = "*" def prepare_search_terms(text): - ''' + """ Convert the text to search into a list of stemmed words. - ''' + """ stemmer = PorterStemmer() stems = [] for word in text.split(): @@ -25,10 +26,10 @@ def prepare_search_terms(text): def build_query(db, stems): - ''' + """ Returns a queryset instance for finding sequences of Fragment instances that matche the list of stemmed words. - ''' + """ # Start by searching for the first stemmed word all_fragments = Fragment.objects_in(db) query = all_fragments.filter(stem=stems[0]).only(Fragment.document, Fragment.idx) @@ -47,44 +48,44 @@ def build_query(db, stems): def get_matching_text(db, document, from_idx, to_idx, extra=5): - ''' + """ Reconstructs the document text between the given indexes (inclusive), plus `extra` words before and after the match. The words that are included in the given range are highlighted in green. - ''' + """ text = [] conds = (Fragment.document == document) & (Fragment.idx >= from_idx - extra) & (Fragment.idx <= to_idx + extra) - for fragment in Fragment.objects_in(db).filter(conds).order_by('document', 'idx'): + for fragment in Fragment.objects_in(db).filter(conds).order_by("document", "idx"): word = fragment.word if fragment.idx == from_idx: word = Fore.GREEN + word if fragment.idx == to_idx: word = word + Style.RESET_ALL text.append(word) - return ' '.join(text) + return " ".join(text) def find(db, text): - ''' + """ Performs the search for the given text, and prints out the matches. - ''' + """ stems = prepare_search_terms(text) query = build_query(db, stems) - print('\n' + Fore.MAGENTA + str(query) + Style.RESET_ALL + '\n') + print("\n" + Fore.MAGENTA + str(query) + Style.RESET_ALL + "\n") for match in query: text = get_matching_text(db, match.document, match.idx, match.idx + len(stems) - 1) - print(Fore.CYAN + match.document + ':' + Style.RESET_ALL, text) + print(Fore.CYAN + match.document + ":" + Style.RESET_ALL, text) -if __name__ == '__main__': +if __name__ == "__main__": # Initialize colored output init() # Initialize database - db = Database('default') + db = Database("default") # Search - text = ' '.join(sys.argv[1:]) + text = " ".join(sys.argv[1:]) if text: find(db, text) diff --git a/pyproject.toml b/pyproject.toml index 8eb10c4..70e5f06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,5 @@ [tool.black] line-length = 120 -extend-exclude = ''' -/( - | examples -)/ -''' [tool.isort] multi_line_output = 3 diff --git a/setup.cfg b/setup.cfg index ab5c925..d43c3f0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,4 +17,3 @@ ignore = B950 # We use E501 exclude = tests/sample_migrations - examples