Chore: fix linting for examples

This commit is contained in:
olliemath 2021-08-16 09:44:48 +01:00
parent 0e9dea5bcb
commit ce79b39407
11 changed files with 148 additions and 126 deletions

View File

@ -1,20 +1,25 @@
import psutil, time, datetime import datetime
from clickhouse_orm import Database import time
import psutil
from models import CPUStats from models import CPUStats
from clickhouse_orm import Database
db = Database('demo') db = Database("demo")
db.create_table(CPUStats) db.create_table(CPUStats)
psutil.cpu_percent(percpu=True) # first sample should be discarded psutil.cpu_percent(percpu=True) # first sample should be discarded
while True: while True:
time.sleep(1) time.sleep(1)
stats = psutil.cpu_percent(percpu=True) stats = psutil.cpu_percent(percpu=True)
timestamp = datetime.datetime.now() timestamp = datetime.datetime.now()
print(timestamp) print(timestamp)
db.insert([ db.insert(
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent) [
for cpu_id, cpu_percent in enumerate(stats) CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
]) for cpu_id, cpu_percent in enumerate(stats)
]
)

View File

@ -1,4 +1,4 @@
from clickhouse_orm import Model, DateTimeField, UInt16Field, Float32Field, Memory from clickhouse_orm import DateTimeField, Float32Field, Memory, Model, UInt16Field
class CPUStats(Model): class CPUStats(Model):
@ -8,4 +8,3 @@ class CPUStats(Model):
cpu_percent = Float32Field() cpu_percent = Float32Field()
engine = Memory() engine = Memory()

View File

@ -1,13 +1,13 @@
from clickhouse_orm import Database, F
from models import CPUStats from models import CPUStats
from clickhouse_orm import Database, F
db = Database('demo') db = Database("demo")
queryset = CPUStats.objects_in(db) queryset = CPUStats.objects_in(db)
total = queryset.filter(CPUStats.cpu_id == 1).count() total = queryset.filter(CPUStats.cpu_id == 1).count()
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count() busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)) print("CPU 1 was busy {:.2f}% of the time".format(busy * 100.0 / total))
# Calculate the average usage per CPU # Calculate the average usage per CPU
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)): for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)) print("CPU {row.cpu_id}: {row.average:.2f}%".format(row=row))

View File

@ -1,62 +1,73 @@
import pygal import pygal
from pygal.style import RotateStyle
from jinja2.filters import do_filesizeformat from jinja2.filters import do_filesizeformat
from pygal.style import RotateStyle
# Formatting functions # Formatting functions
number_formatter = lambda v: '{:,}'.format(v) def number_formatter(v):
bytes_formatter = lambda v: do_filesizeformat(v, True) return "{:,}".format(v)
def bytes_formatter(v):
do_filesizeformat(v, True)
def tables_piechart(db, by_field, value_formatter): def tables_piechart(db, by_field, value_formatter):
''' """
Generate a pie chart of the top n tables in the database. Generate a pie chart of the top n tables in the database.
`db` - the database instance `db` - the database instance
`by_field` - the field name to sort by `by_field` - the field name to sort by
`value_formatter` - a function to use for formatting the numeric values `value_formatter` - a function to use for formatting the numeric values
''' """
Tables = db.get_model_for_table('tables', system_table=True) Tables = db.get_model_for_table("tables", system_table=True)
qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine='Buffer') qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine="Buffer")
tuples = [(getattr(table, by_field), table.name) for table in qs] tuples = [(getattr(table, by_field), table.name) for table in qs]
return _generate_piechart(tuples, value_formatter) return _generate_piechart(tuples, value_formatter)
def columns_piechart(db, tbl_name, by_field, value_formatter): def columns_piechart(db, tbl_name, by_field, value_formatter):
''' """
Generate a pie chart of the top n columns in the table. Generate a pie chart of the top n columns in the table.
`db` - the database instance `db` - the database instance
`tbl_name` - the table name `tbl_name` - the table name
`by_field` - the field name to sort by `by_field` - the field name to sort by
`value_formatter` - a function to use for formatting the numeric values `value_formatter` - a function to use for formatting the numeric values
''' """
ColumnsTable = db.get_model_for_table('columns', system_table=True) ColumnsTable = db.get_model_for_table("columns", system_table=True)
qs = ColumnsTable.objects_in(db).filter(database=db.db_name, table=tbl_name) qs = ColumnsTable.objects_in(db).filter(database=db.db_name, table=tbl_name)
tuples = [(getattr(col, by_field), col.name) for col in qs] tuples = [(getattr(col, by_field), col.name) for col in qs]
return _generate_piechart(tuples, value_formatter) return _generate_piechart(tuples, value_formatter)
def _get_top_tuples(tuples, n=15): def _get_top_tuples(tuples, n=15):
''' """
Given a list of tuples (value, name), this function sorts Given a list of tuples (value, name), this function sorts
the list and returns only the top n results. All other tuples the list and returns only the top n results. All other tuples
are aggregated to a single "others" tuple. are aggregated to a single "others" tuple.
''' """
non_zero_tuples = [t for t in tuples if t[0]] non_zero_tuples = [t for t in tuples if t[0]]
sorted_tuples = sorted(non_zero_tuples, reverse=True) sorted_tuples = sorted(non_zero_tuples, reverse=True)
if len(sorted_tuples) > n: if len(sorted_tuples) > n:
others = (sum(t[0] for t in sorted_tuples[n:]), 'others') others = (sum(t[0] for t in sorted_tuples[n:]), "others")
sorted_tuples = sorted_tuples[:n] + [others] sorted_tuples = sorted_tuples[:n] + [others]
return sorted_tuples return sorted_tuples
def _generate_piechart(tuples, value_formatter): def _generate_piechart(tuples, value_formatter):
''' """
Generates a pie chart. Generates a pie chart.
`tuples` - a list of (value, name) tuples to include in the chart `tuples` - a list of (value, name) tuples to include in the chart
`value_formatter` - a function to use for formatting the values `value_formatter` - a function to use for formatting the values
''' """
style = RotateStyle('#9e6ffe', background='white', legend_font_family='Roboto', legend_font_size=18, tooltip_font_family='Roboto', tooltip_font_size=24) style = RotateStyle(
chart = pygal.Pie(style=style, margin=0, title=' ', value_formatter=value_formatter, truncate_legend=-1) "#9e6ffe",
background="white",
legend_font_family="Roboto",
legend_font_size=18,
tooltip_font_family="Roboto",
tooltip_font_size=24,
)
chart = pygal.Pie(style=style, margin=0, title=" ", value_formatter=value_formatter, truncate_legend=-1)
for t in _get_top_tuples(tuples): for t in _get_top_tuples(tuples):
chart.add(t[1], t[0]) chart.add(t[1], t[0])
return chart.render(is_unicode=True, disable_xml_declaration=True) return chart.render(is_unicode=True, disable_xml_declaration=True)

View File

@ -1,87 +1,93 @@
from clickhouse_orm import Database, F
from charts import tables_piechart, columns_piechart, number_formatter, bytes_formatter
from flask import Flask
from flask import render_template
import sys import sys
from charts import bytes_formatter, columns_piechart, number_formatter, tables_piechart
from flask import Flask, render_template
from clickhouse_orm import Database, F
app = Flask(__name__) app = Flask(__name__)
@app.route('/') @app.route("/")
def homepage_view(): def homepage_view():
''' """
Root view that lists all databases. Root view that lists all databases.
''' """
db = _get_db('system') db = _get_db("system")
# Get all databases in the system.databases table # Get all databases in the system.databases table
DatabasesTable = db.get_model_for_table('databases', system_table=True) DatabasesTable = db.get_model_for_table("databases", system_table=True)
databases = DatabasesTable.objects_in(db).exclude(name='system') databases = DatabasesTable.objects_in(db).exclude(name="system")
databases = databases.order_by(F.lower(DatabasesTable.name)) databases = databases.order_by(F.lower(DatabasesTable.name))
# Generate the page # Generate the page
return render_template('homepage.html', db=db, databases=databases) return render_template("homepage.html", db=db, databases=databases)
@app.route('/<db_name>/') @app.route("/<db_name>/")
def database_view(db_name): def database_view(db_name):
''' """
A view that displays information about a single database. A view that displays information about a single database.
''' """
db = _get_db(db_name) db = _get_db(db_name)
# Get all the tables in the database, by aggregating information from system.columns # Get all the tables in the database, by aggregating information from system.columns
ColumnsTable = db.get_model_for_table('columns', system_table=True) ColumnsTable = db.get_model_for_table("columns", system_table=True)
tables = ColumnsTable.objects_in(db).filter(database=db_name).aggregate( tables = (
ColumnsTable.table, ColumnsTable.objects_in(db)
compressed_size=F.sum(ColumnsTable.data_compressed_bytes), .filter(database=db_name)
uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes), .aggregate(
ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes) ColumnsTable.table,
compressed_size=F.sum(ColumnsTable.data_compressed_bytes),
uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes),
ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes),
)
) )
tables = tables.order_by(F.lower(ColumnsTable.table)) tables = tables.order_by(F.lower(ColumnsTable.table))
# Generate the page # Generate the page
return render_template('database.html', return render_template(
"database.html",
db=db, db=db,
tables=tables, tables=tables,
tables_piechart_by_rows=tables_piechart(db, 'total_rows', value_formatter=number_formatter), tables_piechart_by_rows=tables_piechart(db, "total_rows", value_formatter=number_formatter),
tables_piechart_by_size=tables_piechart(db, 'total_bytes', value_formatter=bytes_formatter), tables_piechart_by_size=tables_piechart(db, "total_bytes", value_formatter=bytes_formatter),
) )
@app.route('/<db_name>/<tbl_name>/') @app.route("/<db_name>/<tbl_name>/")
def table_view(db_name, tbl_name): def table_view(db_name, tbl_name):
''' """
A view that displays information about a single table. A view that displays information about a single table.
''' """
db = _get_db(db_name) db = _get_db(db_name)
# Get table information from system.tables # Get table information from system.tables
TablesTable = db.get_model_for_table('tables', system_table=True) TablesTable = db.get_model_for_table("tables", system_table=True)
tbl_info = TablesTable.objects_in(db).filter(database=db_name, name=tbl_name)[0] tbl_info = TablesTable.objects_in(db).filter(database=db_name, name=tbl_name)[0]
# Get the SQL used for creating the table # Get the SQL used for creating the table
create_table_sql = db.raw('SHOW CREATE TABLE %s FORMAT TabSeparatedRaw' % tbl_name) create_table_sql = db.raw("SHOW CREATE TABLE %s FORMAT TabSeparatedRaw" % tbl_name)
# Get all columns in the table from system.columns # Get all columns in the table from system.columns
ColumnsTable = db.get_model_for_table('columns', system_table=True) ColumnsTable = db.get_model_for_table("columns", system_table=True)
columns = ColumnsTable.objects_in(db).filter(database=db_name, table=tbl_name) columns = ColumnsTable.objects_in(db).filter(database=db_name, table=tbl_name)
# Generate the page # Generate the page
return render_template('table.html', return render_template(
"table.html",
db=db, db=db,
tbl_name=tbl_name, tbl_name=tbl_name,
tbl_info=tbl_info, tbl_info=tbl_info,
create_table_sql=create_table_sql, create_table_sql=create_table_sql,
columns=columns, columns=columns,
piechart=columns_piechart(db, tbl_name, 'data_compressed_bytes', value_formatter=bytes_formatter), piechart=columns_piechart(db, tbl_name, "data_compressed_bytes", value_formatter=bytes_formatter),
) )
def _get_db(db_name): def _get_db(db_name):
''' """
Returns a Database instance using connection information Returns a Database instance using connection information
from the command line arguments (optional). from the command line arguments (optional).
''' """
db_url = sys.argv[1] if len(sys.argv) > 1 else 'http://localhost:8123/' db_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8123/"
username = sys.argv[2] if len(sys.argv) > 2 else None username = sys.argv[2] if len(sys.argv) > 2 else None
password = sys.argv[3] if len(sys.argv) > 3 else None password = sys.argv[3] if len(sys.argv) > 3 else None
return Database(db_name, db_url, username, password, readonly=True) return Database(db_name, db_url, username, password, readonly=True)
if __name__ == '__main__': if __name__ == "__main__":
_get_db('system') # fail early on db connection problems _get_db("system") # fail early on db connection problems
app.run(debug=True) app.run(debug=True)

View File

@ -1,27 +1,28 @@
import requests
import os import os
import requests
def download_ebook(id): def download_ebook(id):
print(id, end=' ') print(id, end=" ")
# Download the ebook's text # Download the ebook's text
r = requests.get('https://www.gutenberg.org/files/{id}/{id}-0.txt'.format(id=id)) r = requests.get("https://www.gutenberg.org/files/{id}/{id}-0.txt".format(id=id))
if r.status_code == 404: if r.status_code == 404:
print('NOT FOUND, SKIPPING') print("NOT FOUND, SKIPPING")
return return
r.raise_for_status() r.raise_for_status()
# Find the ebook's title # Find the ebook's title
text = r.content.decode('utf-8') text = r.content.decode("utf-8")
for line in text.splitlines(): for line in text.splitlines():
if line.startswith('Title:'): if line.startswith("Title:"):
title = line[6:].strip() title = line[6:].strip()
print(title) print(title)
# Save the ebook # Save the ebook
with open('ebooks/{}.txt'.format(title), 'wb') as f: with open("ebooks/{}.txt".format(title), "wb") as f:
f.write(r.content) f.write(r.content)
if __name__ == "__main__": if __name__ == "__main__":
os.makedirs('ebooks', exist_ok=True) os.makedirs("ebooks", exist_ok=True)
for i in [1342, 11, 84, 2701, 25525, 1661, 98, 74, 43, 215, 1400, 76]: for i in [1342, 11, 84, 2701, 25525, 1661, 98, 74, 43, 215, 1400, 76]:
download_ebook(i) download_ebook(i)

View File

@ -1,61 +1,64 @@
import sys import sys
import nltk
from nltk.stem.porter import PorterStemmer
from glob import glob from glob import glob
from clickhouse_orm import Database
import nltk
from models import Fragment from models import Fragment
from nltk.stem.porter import PorterStemmer
from clickhouse_orm import Database
def trim_punctuation(word): def trim_punctuation(word):
''' """
Trim punctuation characters from the beginning and end of the word Trim punctuation characters from the beginning and end of the word
''' """
start = end = len(word) start = end = len(word)
for i in range(len(word)): for i in range(len(word)):
if word[i].isalnum(): if word[i].isalnum():
start = min(start, i) start = min(start, i)
end = i + 1 end = i + 1
return word[start : end] return word[start:end]
def parse_file(filename): def parse_file(filename):
''' """
Parses a text file at the give path. Parses a text file at the give path.
Returns a generator of tuples (original_word, stemmed_word) Returns a generator of tuples (original_word, stemmed_word)
The original_word may include punctuation characters. The original_word may include punctuation characters.
''' """
stemmer = PorterStemmer() stemmer = PorterStemmer()
with open(filename, 'r', encoding='utf-8') as f: with open(filename, "r", encoding="utf-8") as f:
for line in f: for line in f:
for word in line.split(): for word in line.split():
yield (word, stemmer.stem(trim_punctuation(word))) yield (word, stemmer.stem(trim_punctuation(word)))
def get_fragments(filename): def get_fragments(filename):
''' """
Converts a text file at the given path to a generator Converts a text file at the given path to a generator
of Fragment instances. of Fragment instances.
''' """
from os import path from os import path
document = path.splitext(path.basename(filename))[0] document = path.splitext(path.basename(filename))[0]
idx = 0 idx = 0
for word, stem in parse_file(filename): for word, stem in parse_file(filename):
idx += 1 idx += 1
yield Fragment(document=document, idx=idx, word=word, stem=stem) yield Fragment(document=document, idx=idx, word=word, stem=stem)
print('{} - {} words'.format(filename, idx)) print("{} - {} words".format(filename, idx))
if __name__ == '__main__': if __name__ == "__main__":
# Load NLTK data if necessary # Load NLTK data if necessary
nltk.download('punkt') nltk.download("punkt")
nltk.download('wordnet') nltk.download("wordnet")
# Initialize database # Initialize database
db = Database('default') db = Database("default")
db.create_table(Fragment) db.create_table(Fragment)
# Load files from the command line or everything under ebooks/ # Load files from the command line or everything under ebooks/
filenames = sys.argv[1:] or glob('ebooks/*.txt') filenames = sys.argv[1:] or glob("ebooks/*.txt")
for filename in filenames: for filename in filenames:
db.insert(get_fragments(filename), batch_size=100000) db.insert(get_fragments(filename), batch_size=100000)

View File

@ -1,16 +1,18 @@
from clickhouse_orm import * from clickhouse_orm.engines import MergeTree
from clickhouse_orm.fields import LowCardinalityField, StringField, UInt64Field
from clickhouse_orm.models import Index, Model
class Fragment(Model): class Fragment(Model):
language = LowCardinalityField(StringField(), default='EN') language = LowCardinalityField(StringField(), default="EN")
document = LowCardinalityField(StringField()) document = LowCardinalityField(StringField())
idx = UInt64Field() idx = UInt64Field()
word = StringField() word = StringField()
stem = StringField() stem = StringField()
# An index for faster search by document and fragment idx # An index for faster search by document and fragment idx
index = Index((document, idx), type=Index.minmax(), granularity=1) index = Index((document, idx), type=Index.minmax(), granularity=1)
# The primary key allows efficient lookup of stems # The primary key allows efficient lookup of stems
engine = MergeTree(order_by=(stem, document, idx), partition_key=('language',)) engine = MergeTree(order_by=(stem, document, idx), partition_key=("language",))

View File

@ -1,19 +1,20 @@
import sys import sys
from colorama import init, Fore, Back, Style
from nltk.stem.porter import PorterStemmer
from clickhouse_orm import Database, F
from models import Fragment
from load import trim_punctuation
from colorama import Fore, Style, init
from load import trim_punctuation
from models import Fragment
from nltk.stem.porter import PorterStemmer
from clickhouse_orm import Database, F
# The wildcard character # The wildcard character
WILDCARD = '*' WILDCARD = "*"
def prepare_search_terms(text): def prepare_search_terms(text):
''' """
Convert the text to search into a list of stemmed words. Convert the text to search into a list of stemmed words.
''' """
stemmer = PorterStemmer() stemmer = PorterStemmer()
stems = [] stems = []
for word in text.split(): for word in text.split():
@ -25,10 +26,10 @@ def prepare_search_terms(text):
def build_query(db, stems): def build_query(db, stems):
''' """
Returns a queryset instance for finding sequences of Fragment instances Returns a queryset instance for finding sequences of Fragment instances
that matche the list of stemmed words. that matche the list of stemmed words.
''' """
# Start by searching for the first stemmed word # Start by searching for the first stemmed word
all_fragments = Fragment.objects_in(db) all_fragments = Fragment.objects_in(db)
query = all_fragments.filter(stem=stems[0]).only(Fragment.document, Fragment.idx) query = all_fragments.filter(stem=stems[0]).only(Fragment.document, Fragment.idx)
@ -47,44 +48,44 @@ def build_query(db, stems):
def get_matching_text(db, document, from_idx, to_idx, extra=5): def get_matching_text(db, document, from_idx, to_idx, extra=5):
''' """
Reconstructs the document text between the given indexes (inclusive), Reconstructs the document text between the given indexes (inclusive),
plus `extra` words before and after the match. The words that are plus `extra` words before and after the match. The words that are
included in the given range are highlighted in green. included in the given range are highlighted in green.
''' """
text = [] text = []
conds = (Fragment.document == document) & (Fragment.idx >= from_idx - extra) & (Fragment.idx <= to_idx + extra) conds = (Fragment.document == document) & (Fragment.idx >= from_idx - extra) & (Fragment.idx <= to_idx + extra)
for fragment in Fragment.objects_in(db).filter(conds).order_by('document', 'idx'): for fragment in Fragment.objects_in(db).filter(conds).order_by("document", "idx"):
word = fragment.word word = fragment.word
if fragment.idx == from_idx: if fragment.idx == from_idx:
word = Fore.GREEN + word word = Fore.GREEN + word
if fragment.idx == to_idx: if fragment.idx == to_idx:
word = word + Style.RESET_ALL word = word + Style.RESET_ALL
text.append(word) text.append(word)
return ' '.join(text) return " ".join(text)
def find(db, text): def find(db, text):
''' """
Performs the search for the given text, and prints out the matches. Performs the search for the given text, and prints out the matches.
''' """
stems = prepare_search_terms(text) stems = prepare_search_terms(text)
query = build_query(db, stems) query = build_query(db, stems)
print('\n' + Fore.MAGENTA + str(query) + Style.RESET_ALL + '\n') print("\n" + Fore.MAGENTA + str(query) + Style.RESET_ALL + "\n")
for match in query: for match in query:
text = get_matching_text(db, match.document, match.idx, match.idx + len(stems) - 1) text = get_matching_text(db, match.document, match.idx, match.idx + len(stems) - 1)
print(Fore.CYAN + match.document + ':' + Style.RESET_ALL, text) print(Fore.CYAN + match.document + ":" + Style.RESET_ALL, text)
if __name__ == '__main__': if __name__ == "__main__":
# Initialize colored output # Initialize colored output
init() init()
# Initialize database # Initialize database
db = Database('default') db = Database("default")
# Search # Search
text = ' '.join(sys.argv[1:]) text = " ".join(sys.argv[1:])
if text: if text:
find(db, text) find(db, text)

View File

@ -1,10 +1,5 @@
[tool.black] [tool.black]
line-length = 120 line-length = 120
extend-exclude = '''
/(
| examples
)/
'''
[tool.isort] [tool.isort]
multi_line_output = 3 multi_line_output = 3

View File

@ -17,4 +17,3 @@ ignore =
B950 # We use E501 B950 # We use E501
exclude = exclude =
tests/sample_migrations tests/sample_migrations
examples