Chore: fix linting for examples

This commit is contained in:
olliemath 2021-08-16 09:44:48 +01:00
parent 0e9dea5bcb
commit ce79b39407
11 changed files with 148 additions and 126 deletions

View File

@ -1,9 +1,12 @@
import psutil, time, datetime
from clickhouse_orm import Database
import datetime
import time
import psutil
from models import CPUStats
from clickhouse_orm import Database
db = Database('demo')
db = Database("demo")
db.create_table(CPUStats)
@ -14,7 +17,9 @@ while True:
stats = psutil.cpu_percent(percpu=True)
timestamp = datetime.datetime.now()
print(timestamp)
db.insert([
db.insert(
[
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
for cpu_id, cpu_percent in enumerate(stats)
])
]
)

View File

@ -1,4 +1,4 @@
from clickhouse_orm import Model, DateTimeField, UInt16Field, Float32Field, Memory
from clickhouse_orm import DateTimeField, Float32Field, Memory, Model, UInt16Field
class CPUStats(Model):
@ -8,4 +8,3 @@ class CPUStats(Model):
cpu_percent = Float32Field()
engine = Memory()

View File

@ -1,13 +1,13 @@
from clickhouse_orm import Database, F
from models import CPUStats
from clickhouse_orm import Database, F
db = Database('demo')
db = Database("demo")
queryset = CPUStats.objects_in(db)
total = queryset.filter(CPUStats.cpu_id == 1).count()
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
print("CPU 1 was busy {:.2f}% of the time".format(busy * 100.0 / total))
# Calculate the average usage per CPU
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
print("CPU {row.cpu_id}: {row.average:.2f}%".format(row=row))

View File

@ -1,62 +1,73 @@
import pygal
from pygal.style import RotateStyle
from jinja2.filters import do_filesizeformat
from pygal.style import RotateStyle
# Formatting functions
number_formatter = lambda v: '{:,}'.format(v)
bytes_formatter = lambda v: do_filesizeformat(v, True)
def number_formatter(v):
return "{:,}".format(v)
def bytes_formatter(v):
do_filesizeformat(v, True)
def tables_piechart(db, by_field, value_formatter):
'''
"""
Generate a pie chart of the top n tables in the database.
`db` - the database instance
`by_field` - the field name to sort by
`value_formatter` - a function to use for formatting the numeric values
'''
Tables = db.get_model_for_table('tables', system_table=True)
qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine='Buffer')
"""
Tables = db.get_model_for_table("tables", system_table=True)
qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine="Buffer")
tuples = [(getattr(table, by_field), table.name) for table in qs]
return _generate_piechart(tuples, value_formatter)
def columns_piechart(db, tbl_name, by_field, value_formatter):
'''
"""
Generate a pie chart of the top n columns in the table.
`db` - the database instance
`tbl_name` - the table name
`by_field` - the field name to sort by
`value_formatter` - a function to use for formatting the numeric values
'''
ColumnsTable = db.get_model_for_table('columns', system_table=True)
"""
ColumnsTable = db.get_model_for_table("columns", system_table=True)
qs = ColumnsTable.objects_in(db).filter(database=db.db_name, table=tbl_name)
tuples = [(getattr(col, by_field), col.name) for col in qs]
return _generate_piechart(tuples, value_formatter)
def _get_top_tuples(tuples, n=15):
'''
"""
Given a list of tuples (value, name), this function sorts
the list and returns only the top n results. All other tuples
are aggregated to a single "others" tuple.
'''
"""
non_zero_tuples = [t for t in tuples if t[0]]
sorted_tuples = sorted(non_zero_tuples, reverse=True)
if len(sorted_tuples) > n:
others = (sum(t[0] for t in sorted_tuples[n:]), 'others')
others = (sum(t[0] for t in sorted_tuples[n:]), "others")
sorted_tuples = sorted_tuples[:n] + [others]
return sorted_tuples
def _generate_piechart(tuples, value_formatter):
'''
"""
Generates a pie chart.
`tuples` - a list of (value, name) tuples to include in the chart
`value_formatter` - a function to use for formatting the values
'''
style = RotateStyle('#9e6ffe', background='white', legend_font_family='Roboto', legend_font_size=18, tooltip_font_family='Roboto', tooltip_font_size=24)
chart = pygal.Pie(style=style, margin=0, title=' ', value_formatter=value_formatter, truncate_legend=-1)
"""
style = RotateStyle(
"#9e6ffe",
background="white",
legend_font_family="Roboto",
legend_font_size=18,
tooltip_font_family="Roboto",
tooltip_font_size=24,
)
chart = pygal.Pie(style=style, margin=0, title=" ", value_formatter=value_formatter, truncate_legend=-1)
for t in _get_top_tuples(tuples):
chart.add(t[1], t[0])
return chart.render(is_unicode=True, disable_xml_declaration=True)

View File

@ -1,87 +1,93 @@
from clickhouse_orm import Database, F
from charts import tables_piechart, columns_piechart, number_formatter, bytes_formatter
from flask import Flask
from flask import render_template
import sys
from charts import bytes_formatter, columns_piechart, number_formatter, tables_piechart
from flask import Flask, render_template
from clickhouse_orm import Database, F
app = Flask(__name__)
@app.route('/')
@app.route("/")
def homepage_view():
'''
"""
Root view that lists all databases.
'''
db = _get_db('system')
"""
db = _get_db("system")
# Get all databases in the system.databases table
DatabasesTable = db.get_model_for_table('databases', system_table=True)
databases = DatabasesTable.objects_in(db).exclude(name='system')
DatabasesTable = db.get_model_for_table("databases", system_table=True)
databases = DatabasesTable.objects_in(db).exclude(name="system")
databases = databases.order_by(F.lower(DatabasesTable.name))
# Generate the page
return render_template('homepage.html', db=db, databases=databases)
return render_template("homepage.html", db=db, databases=databases)
@app.route('/<db_name>/')
@app.route("/<db_name>/")
def database_view(db_name):
'''
"""
A view that displays information about a single database.
'''
"""
db = _get_db(db_name)
# Get all the tables in the database, by aggregating information from system.columns
ColumnsTable = db.get_model_for_table('columns', system_table=True)
tables = ColumnsTable.objects_in(db).filter(database=db_name).aggregate(
ColumnsTable = db.get_model_for_table("columns", system_table=True)
tables = (
ColumnsTable.objects_in(db)
.filter(database=db_name)
.aggregate(
ColumnsTable.table,
compressed_size=F.sum(ColumnsTable.data_compressed_bytes),
uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes),
ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes)
ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes),
)
)
tables = tables.order_by(F.lower(ColumnsTable.table))
# Generate the page
return render_template('database.html',
return render_template(
"database.html",
db=db,
tables=tables,
tables_piechart_by_rows=tables_piechart(db, 'total_rows', value_formatter=number_formatter),
tables_piechart_by_size=tables_piechart(db, 'total_bytes', value_formatter=bytes_formatter),
tables_piechart_by_rows=tables_piechart(db, "total_rows", value_formatter=number_formatter),
tables_piechart_by_size=tables_piechart(db, "total_bytes", value_formatter=bytes_formatter),
)
@app.route('/<db_name>/<tbl_name>/')
@app.route("/<db_name>/<tbl_name>/")
def table_view(db_name, tbl_name):
'''
"""
A view that displays information about a single table.
'''
"""
db = _get_db(db_name)
# Get table information from system.tables
TablesTable = db.get_model_for_table('tables', system_table=True)
TablesTable = db.get_model_for_table("tables", system_table=True)
tbl_info = TablesTable.objects_in(db).filter(database=db_name, name=tbl_name)[0]
# Get the SQL used for creating the table
create_table_sql = db.raw('SHOW CREATE TABLE %s FORMAT TabSeparatedRaw' % tbl_name)
create_table_sql = db.raw("SHOW CREATE TABLE %s FORMAT TabSeparatedRaw" % tbl_name)
# Get all columns in the table from system.columns
ColumnsTable = db.get_model_for_table('columns', system_table=True)
ColumnsTable = db.get_model_for_table("columns", system_table=True)
columns = ColumnsTable.objects_in(db).filter(database=db_name, table=tbl_name)
# Generate the page
return render_template('table.html',
return render_template(
"table.html",
db=db,
tbl_name=tbl_name,
tbl_info=tbl_info,
create_table_sql=create_table_sql,
columns=columns,
piechart=columns_piechart(db, tbl_name, 'data_compressed_bytes', value_formatter=bytes_formatter),
piechart=columns_piechart(db, tbl_name, "data_compressed_bytes", value_formatter=bytes_formatter),
)
def _get_db(db_name):
'''
"""
Returns a Database instance using connection information
from the command line arguments (optional).
'''
db_url = sys.argv[1] if len(sys.argv) > 1 else 'http://localhost:8123/'
"""
db_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8123/"
username = sys.argv[2] if len(sys.argv) > 2 else None
password = sys.argv[3] if len(sys.argv) > 3 else None
return Database(db_name, db_url, username, password, readonly=True)
if __name__ == '__main__':
_get_db('system') # fail early on db connection problems
if __name__ == "__main__":
_get_db("system") # fail early on db connection problems
app.run(debug=True)

View File

@ -1,27 +1,28 @@
import requests
import os
import requests
def download_ebook(id):
print(id, end=' ')
print(id, end=" ")
# Download the ebook's text
r = requests.get('https://www.gutenberg.org/files/{id}/{id}-0.txt'.format(id=id))
r = requests.get("https://www.gutenberg.org/files/{id}/{id}-0.txt".format(id=id))
if r.status_code == 404:
print('NOT FOUND, SKIPPING')
print("NOT FOUND, SKIPPING")
return
r.raise_for_status()
# Find the ebook's title
text = r.content.decode('utf-8')
text = r.content.decode("utf-8")
for line in text.splitlines():
if line.startswith('Title:'):
if line.startswith("Title:"):
title = line[6:].strip()
print(title)
# Save the ebook
with open('ebooks/{}.txt'.format(title), 'wb') as f:
with open("ebooks/{}.txt".format(title), "wb") as f:
f.write(r.content)
if __name__ == "__main__":
os.makedirs('ebooks', exist_ok=True)
os.makedirs("ebooks", exist_ok=True)
for i in [1342, 11, 84, 2701, 25525, 1661, 98, 74, 43, 215, 1400, 76]:
download_ebook(i)

View File

@ -1,61 +1,64 @@
import sys
import nltk
from nltk.stem.porter import PorterStemmer
from glob import glob
from clickhouse_orm import Database
import nltk
from models import Fragment
from nltk.stem.porter import PorterStemmer
from clickhouse_orm import Database
def trim_punctuation(word):
'''
"""
Trim punctuation characters from the beginning and end of the word
'''
"""
start = end = len(word)
for i in range(len(word)):
if word[i].isalnum():
start = min(start, i)
end = i + 1
return word[start : end]
return word[start:end]
def parse_file(filename):
'''
"""
Parses a text file at the give path.
Returns a generator of tuples (original_word, stemmed_word)
The original_word may include punctuation characters.
'''
"""
stemmer = PorterStemmer()
with open(filename, 'r', encoding='utf-8') as f:
with open(filename, "r", encoding="utf-8") as f:
for line in f:
for word in line.split():
yield (word, stemmer.stem(trim_punctuation(word)))
def get_fragments(filename):
'''
"""
Converts a text file at the given path to a generator
of Fragment instances.
'''
"""
from os import path
document = path.splitext(path.basename(filename))[0]
idx = 0
for word, stem in parse_file(filename):
idx += 1
yield Fragment(document=document, idx=idx, word=word, stem=stem)
print('{} - {} words'.format(filename, idx))
print("{} - {} words".format(filename, idx))
if __name__ == '__main__':
if __name__ == "__main__":
# Load NLTK data if necessary
nltk.download('punkt')
nltk.download('wordnet')
nltk.download("punkt")
nltk.download("wordnet")
# Initialize database
db = Database('default')
db = Database("default")
db.create_table(Fragment)
# Load files from the command line or everything under ebooks/
filenames = sys.argv[1:] or glob('ebooks/*.txt')
filenames = sys.argv[1:] or glob("ebooks/*.txt")
for filename in filenames:
db.insert(get_fragments(filename), batch_size=100000)

View File

@ -1,9 +1,11 @@
from clickhouse_orm import *
from clickhouse_orm.engines import MergeTree
from clickhouse_orm.fields import LowCardinalityField, StringField, UInt64Field
from clickhouse_orm.models import Index, Model
class Fragment(Model):
language = LowCardinalityField(StringField(), default='EN')
language = LowCardinalityField(StringField(), default="EN")
document = LowCardinalityField(StringField())
idx = UInt64Field()
word = StringField()
@ -13,4 +15,4 @@ class Fragment(Model):
index = Index((document, idx), type=Index.minmax(), granularity=1)
# The primary key allows efficient lookup of stems
engine = MergeTree(order_by=(stem, document, idx), partition_key=('language',))
engine = MergeTree(order_by=(stem, document, idx), partition_key=("language",))

View File

@ -1,19 +1,20 @@
import sys
from colorama import init, Fore, Back, Style
from nltk.stem.porter import PorterStemmer
from clickhouse_orm import Database, F
from models import Fragment
from load import trim_punctuation
from colorama import Fore, Style, init
from load import trim_punctuation
from models import Fragment
from nltk.stem.porter import PorterStemmer
from clickhouse_orm import Database, F
# The wildcard character
WILDCARD = '*'
WILDCARD = "*"
def prepare_search_terms(text):
'''
"""
Convert the text to search into a list of stemmed words.
'''
"""
stemmer = PorterStemmer()
stems = []
for word in text.split():
@ -25,10 +26,10 @@ def prepare_search_terms(text):
def build_query(db, stems):
'''
"""
Returns a queryset instance for finding sequences of Fragment instances
that matche the list of stemmed words.
'''
"""
# Start by searching for the first stemmed word
all_fragments = Fragment.objects_in(db)
query = all_fragments.filter(stem=stems[0]).only(Fragment.document, Fragment.idx)
@ -47,44 +48,44 @@ def build_query(db, stems):
def get_matching_text(db, document, from_idx, to_idx, extra=5):
'''
"""
Reconstructs the document text between the given indexes (inclusive),
plus `extra` words before and after the match. The words that are
included in the given range are highlighted in green.
'''
"""
text = []
conds = (Fragment.document == document) & (Fragment.idx >= from_idx - extra) & (Fragment.idx <= to_idx + extra)
for fragment in Fragment.objects_in(db).filter(conds).order_by('document', 'idx'):
for fragment in Fragment.objects_in(db).filter(conds).order_by("document", "idx"):
word = fragment.word
if fragment.idx == from_idx:
word = Fore.GREEN + word
if fragment.idx == to_idx:
word = word + Style.RESET_ALL
text.append(word)
return ' '.join(text)
return " ".join(text)
def find(db, text):
'''
"""
Performs the search for the given text, and prints out the matches.
'''
"""
stems = prepare_search_terms(text)
query = build_query(db, stems)
print('\n' + Fore.MAGENTA + str(query) + Style.RESET_ALL + '\n')
print("\n" + Fore.MAGENTA + str(query) + Style.RESET_ALL + "\n")
for match in query:
text = get_matching_text(db, match.document, match.idx, match.idx + len(stems) - 1)
print(Fore.CYAN + match.document + ':' + Style.RESET_ALL, text)
print(Fore.CYAN + match.document + ":" + Style.RESET_ALL, text)
if __name__ == '__main__':
if __name__ == "__main__":
# Initialize colored output
init()
# Initialize database
db = Database('default')
db = Database("default")
# Search
text = ' '.join(sys.argv[1:])
text = " ".join(sys.argv[1:])
if text:
find(db, text)

View File

@ -1,10 +1,5 @@
[tool.black]
line-length = 120
extend-exclude = '''
/(
| examples
)/
'''
[tool.isort]
multi_line_output = 3

View File

@ -17,4 +17,3 @@ ignore =
B950 # We use E501
exclude =
tests/sample_migrations
examples