mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2025-08-02 11:10:11 +03:00
Chore: fix linting for examples
This commit is contained in:
parent
0e9dea5bcb
commit
ce79b39407
|
@ -1,20 +1,25 @@
|
|||
import psutil, time, datetime
|
||||
from clickhouse_orm import Database
|
||||
import datetime
|
||||
import time
|
||||
|
||||
import psutil
|
||||
from models import CPUStats
|
||||
|
||||
from clickhouse_orm import Database
|
||||
|
||||
db = Database('demo')
|
||||
db = Database("demo")
|
||||
db.create_table(CPUStats)
|
||||
|
||||
|
||||
psutil.cpu_percent(percpu=True) # first sample should be discarded
|
||||
psutil.cpu_percent(percpu=True) # first sample should be discarded
|
||||
|
||||
while True:
|
||||
time.sleep(1)
|
||||
stats = psutil.cpu_percent(percpu=True)
|
||||
timestamp = datetime.datetime.now()
|
||||
print(timestamp)
|
||||
db.insert([
|
||||
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
|
||||
for cpu_id, cpu_percent in enumerate(stats)
|
||||
])
|
||||
db.insert(
|
||||
[
|
||||
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
|
||||
for cpu_id, cpu_percent in enumerate(stats)
|
||||
]
|
||||
)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from clickhouse_orm import Model, DateTimeField, UInt16Field, Float32Field, Memory
|
||||
from clickhouse_orm import DateTimeField, Float32Field, Memory, Model, UInt16Field
|
||||
|
||||
|
||||
class CPUStats(Model):
|
||||
|
@ -8,4 +8,3 @@ class CPUStats(Model):
|
|||
cpu_percent = Float32Field()
|
||||
|
||||
engine = Memory()
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
from clickhouse_orm import Database, F
|
||||
from models import CPUStats
|
||||
|
||||
from clickhouse_orm import Database, F
|
||||
|
||||
db = Database('demo')
|
||||
db = Database("demo")
|
||||
queryset = CPUStats.objects_in(db)
|
||||
total = queryset.filter(CPUStats.cpu_id == 1).count()
|
||||
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
|
||||
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
|
||||
print("CPU 1 was busy {:.2f}% of the time".format(busy * 100.0 / total))
|
||||
|
||||
# Calculate the average usage per CPU
|
||||
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
|
||||
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
|
||||
print("CPU {row.cpu_id}: {row.average:.2f}%".format(row=row))
|
||||
|
|
|
@ -1,62 +1,73 @@
|
|||
import pygal
|
||||
from pygal.style import RotateStyle
|
||||
from jinja2.filters import do_filesizeformat
|
||||
from pygal.style import RotateStyle
|
||||
|
||||
|
||||
# Formatting functions
|
||||
number_formatter = lambda v: '{:,}'.format(v)
|
||||
bytes_formatter = lambda v: do_filesizeformat(v, True)
|
||||
def number_formatter(v):
|
||||
return "{:,}".format(v)
|
||||
|
||||
|
||||
def bytes_formatter(v):
|
||||
do_filesizeformat(v, True)
|
||||
|
||||
|
||||
def tables_piechart(db, by_field, value_formatter):
|
||||
'''
|
||||
"""
|
||||
Generate a pie chart of the top n tables in the database.
|
||||
`db` - the database instance
|
||||
`by_field` - the field name to sort by
|
||||
`value_formatter` - a function to use for formatting the numeric values
|
||||
'''
|
||||
Tables = db.get_model_for_table('tables', system_table=True)
|
||||
qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine='Buffer')
|
||||
"""
|
||||
Tables = db.get_model_for_table("tables", system_table=True)
|
||||
qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine="Buffer")
|
||||
tuples = [(getattr(table, by_field), table.name) for table in qs]
|
||||
return _generate_piechart(tuples, value_formatter)
|
||||
|
||||
|
||||
def columns_piechart(db, tbl_name, by_field, value_formatter):
|
||||
'''
|
||||
"""
|
||||
Generate a pie chart of the top n columns in the table.
|
||||
`db` - the database instance
|
||||
`tbl_name` - the table name
|
||||
`by_field` - the field name to sort by
|
||||
`value_formatter` - a function to use for formatting the numeric values
|
||||
'''
|
||||
ColumnsTable = db.get_model_for_table('columns', system_table=True)
|
||||
"""
|
||||
ColumnsTable = db.get_model_for_table("columns", system_table=True)
|
||||
qs = ColumnsTable.objects_in(db).filter(database=db.db_name, table=tbl_name)
|
||||
tuples = [(getattr(col, by_field), col.name) for col in qs]
|
||||
return _generate_piechart(tuples, value_formatter)
|
||||
|
||||
|
||||
def _get_top_tuples(tuples, n=15):
|
||||
'''
|
||||
"""
|
||||
Given a list of tuples (value, name), this function sorts
|
||||
the list and returns only the top n results. All other tuples
|
||||
are aggregated to a single "others" tuple.
|
||||
'''
|
||||
"""
|
||||
non_zero_tuples = [t for t in tuples if t[0]]
|
||||
sorted_tuples = sorted(non_zero_tuples, reverse=True)
|
||||
if len(sorted_tuples) > n:
|
||||
others = (sum(t[0] for t in sorted_tuples[n:]), 'others')
|
||||
others = (sum(t[0] for t in sorted_tuples[n:]), "others")
|
||||
sorted_tuples = sorted_tuples[:n] + [others]
|
||||
return sorted_tuples
|
||||
|
||||
|
||||
def _generate_piechart(tuples, value_formatter):
|
||||
'''
|
||||
"""
|
||||
Generates a pie chart.
|
||||
`tuples` - a list of (value, name) tuples to include in the chart
|
||||
`value_formatter` - a function to use for formatting the values
|
||||
'''
|
||||
style = RotateStyle('#9e6ffe', background='white', legend_font_family='Roboto', legend_font_size=18, tooltip_font_family='Roboto', tooltip_font_size=24)
|
||||
chart = pygal.Pie(style=style, margin=0, title=' ', value_formatter=value_formatter, truncate_legend=-1)
|
||||
"""
|
||||
style = RotateStyle(
|
||||
"#9e6ffe",
|
||||
background="white",
|
||||
legend_font_family="Roboto",
|
||||
legend_font_size=18,
|
||||
tooltip_font_family="Roboto",
|
||||
tooltip_font_size=24,
|
||||
)
|
||||
chart = pygal.Pie(style=style, margin=0, title=" ", value_formatter=value_formatter, truncate_legend=-1)
|
||||
for t in _get_top_tuples(tuples):
|
||||
chart.add(t[1], t[0])
|
||||
return chart.render(is_unicode=True, disable_xml_declaration=True)
|
||||
|
|
|
@ -1,87 +1,93 @@
|
|||
from clickhouse_orm import Database, F
|
||||
from charts import tables_piechart, columns_piechart, number_formatter, bytes_formatter
|
||||
from flask import Flask
|
||||
from flask import render_template
|
||||
import sys
|
||||
|
||||
from charts import bytes_formatter, columns_piechart, number_formatter, tables_piechart
|
||||
from flask import Flask, render_template
|
||||
|
||||
from clickhouse_orm import Database, F
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route('/')
|
||||
@app.route("/")
|
||||
def homepage_view():
|
||||
'''
|
||||
"""
|
||||
Root view that lists all databases.
|
||||
'''
|
||||
db = _get_db('system')
|
||||
"""
|
||||
db = _get_db("system")
|
||||
# Get all databases in the system.databases table
|
||||
DatabasesTable = db.get_model_for_table('databases', system_table=True)
|
||||
databases = DatabasesTable.objects_in(db).exclude(name='system')
|
||||
DatabasesTable = db.get_model_for_table("databases", system_table=True)
|
||||
databases = DatabasesTable.objects_in(db).exclude(name="system")
|
||||
databases = databases.order_by(F.lower(DatabasesTable.name))
|
||||
# Generate the page
|
||||
return render_template('homepage.html', db=db, databases=databases)
|
||||
return render_template("homepage.html", db=db, databases=databases)
|
||||
|
||||
|
||||
@app.route('/<db_name>/')
|
||||
@app.route("/<db_name>/")
|
||||
def database_view(db_name):
|
||||
'''
|
||||
"""
|
||||
A view that displays information about a single database.
|
||||
'''
|
||||
"""
|
||||
db = _get_db(db_name)
|
||||
# Get all the tables in the database, by aggregating information from system.columns
|
||||
ColumnsTable = db.get_model_for_table('columns', system_table=True)
|
||||
tables = ColumnsTable.objects_in(db).filter(database=db_name).aggregate(
|
||||
ColumnsTable.table,
|
||||
compressed_size=F.sum(ColumnsTable.data_compressed_bytes),
|
||||
uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes),
|
||||
ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes)
|
||||
ColumnsTable = db.get_model_for_table("columns", system_table=True)
|
||||
tables = (
|
||||
ColumnsTable.objects_in(db)
|
||||
.filter(database=db_name)
|
||||
.aggregate(
|
||||
ColumnsTable.table,
|
||||
compressed_size=F.sum(ColumnsTable.data_compressed_bytes),
|
||||
uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes),
|
||||
ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes),
|
||||
)
|
||||
)
|
||||
tables = tables.order_by(F.lower(ColumnsTable.table))
|
||||
# Generate the page
|
||||
return render_template('database.html',
|
||||
return render_template(
|
||||
"database.html",
|
||||
db=db,
|
||||
tables=tables,
|
||||
tables_piechart_by_rows=tables_piechart(db, 'total_rows', value_formatter=number_formatter),
|
||||
tables_piechart_by_size=tables_piechart(db, 'total_bytes', value_formatter=bytes_formatter),
|
||||
tables_piechart_by_rows=tables_piechart(db, "total_rows", value_formatter=number_formatter),
|
||||
tables_piechart_by_size=tables_piechart(db, "total_bytes", value_formatter=bytes_formatter),
|
||||
)
|
||||
|
||||
|
||||
@app.route('/<db_name>/<tbl_name>/')
|
||||
@app.route("/<db_name>/<tbl_name>/")
|
||||
def table_view(db_name, tbl_name):
|
||||
'''
|
||||
"""
|
||||
A view that displays information about a single table.
|
||||
'''
|
||||
"""
|
||||
db = _get_db(db_name)
|
||||
# Get table information from system.tables
|
||||
TablesTable = db.get_model_for_table('tables', system_table=True)
|
||||
TablesTable = db.get_model_for_table("tables", system_table=True)
|
||||
tbl_info = TablesTable.objects_in(db).filter(database=db_name, name=tbl_name)[0]
|
||||
# Get the SQL used for creating the table
|
||||
create_table_sql = db.raw('SHOW CREATE TABLE %s FORMAT TabSeparatedRaw' % tbl_name)
|
||||
create_table_sql = db.raw("SHOW CREATE TABLE %s FORMAT TabSeparatedRaw" % tbl_name)
|
||||
# Get all columns in the table from system.columns
|
||||
ColumnsTable = db.get_model_for_table('columns', system_table=True)
|
||||
ColumnsTable = db.get_model_for_table("columns", system_table=True)
|
||||
columns = ColumnsTable.objects_in(db).filter(database=db_name, table=tbl_name)
|
||||
# Generate the page
|
||||
return render_template('table.html',
|
||||
return render_template(
|
||||
"table.html",
|
||||
db=db,
|
||||
tbl_name=tbl_name,
|
||||
tbl_info=tbl_info,
|
||||
create_table_sql=create_table_sql,
|
||||
columns=columns,
|
||||
piechart=columns_piechart(db, tbl_name, 'data_compressed_bytes', value_formatter=bytes_formatter),
|
||||
piechart=columns_piechart(db, tbl_name, "data_compressed_bytes", value_formatter=bytes_formatter),
|
||||
)
|
||||
|
||||
|
||||
def _get_db(db_name):
|
||||
'''
|
||||
"""
|
||||
Returns a Database instance using connection information
|
||||
from the command line arguments (optional).
|
||||
'''
|
||||
db_url = sys.argv[1] if len(sys.argv) > 1 else 'http://localhost:8123/'
|
||||
"""
|
||||
db_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8123/"
|
||||
username = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
password = sys.argv[3] if len(sys.argv) > 3 else None
|
||||
return Database(db_name, db_url, username, password, readonly=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_get_db('system') # fail early on db connection problems
|
||||
if __name__ == "__main__":
|
||||
_get_db("system") # fail early on db connection problems
|
||||
app.run(debug=True)
|
||||
|
|
|
@ -1,27 +1,28 @@
|
|||
import requests
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def download_ebook(id):
|
||||
print(id, end=' ')
|
||||
print(id, end=" ")
|
||||
# Download the ebook's text
|
||||
r = requests.get('https://www.gutenberg.org/files/{id}/{id}-0.txt'.format(id=id))
|
||||
r = requests.get("https://www.gutenberg.org/files/{id}/{id}-0.txt".format(id=id))
|
||||
if r.status_code == 404:
|
||||
print('NOT FOUND, SKIPPING')
|
||||
print("NOT FOUND, SKIPPING")
|
||||
return
|
||||
r.raise_for_status()
|
||||
# Find the ebook's title
|
||||
text = r.content.decode('utf-8')
|
||||
text = r.content.decode("utf-8")
|
||||
for line in text.splitlines():
|
||||
if line.startswith('Title:'):
|
||||
if line.startswith("Title:"):
|
||||
title = line[6:].strip()
|
||||
print(title)
|
||||
# Save the ebook
|
||||
with open('ebooks/{}.txt'.format(title), 'wb') as f:
|
||||
with open("ebooks/{}.txt".format(title), "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
os.makedirs('ebooks', exist_ok=True)
|
||||
os.makedirs("ebooks", exist_ok=True)
|
||||
for i in [1342, 11, 84, 2701, 25525, 1661, 98, 74, 43, 215, 1400, 76]:
|
||||
download_ebook(i)
|
||||
|
|
|
@ -1,61 +1,64 @@
|
|||
import sys
|
||||
import nltk
|
||||
from nltk.stem.porter import PorterStemmer
|
||||
from glob import glob
|
||||
from clickhouse_orm import Database
|
||||
|
||||
import nltk
|
||||
from models import Fragment
|
||||
from nltk.stem.porter import PorterStemmer
|
||||
|
||||
from clickhouse_orm import Database
|
||||
|
||||
|
||||
def trim_punctuation(word):
|
||||
'''
|
||||
"""
|
||||
Trim punctuation characters from the beginning and end of the word
|
||||
'''
|
||||
"""
|
||||
start = end = len(word)
|
||||
for i in range(len(word)):
|
||||
if word[i].isalnum():
|
||||
start = min(start, i)
|
||||
end = i + 1
|
||||
return word[start : end]
|
||||
return word[start:end]
|
||||
|
||||
|
||||
def parse_file(filename):
|
||||
'''
|
||||
"""
|
||||
Parses a text file at the give path.
|
||||
Returns a generator of tuples (original_word, stemmed_word)
|
||||
The original_word may include punctuation characters.
|
||||
'''
|
||||
"""
|
||||
stemmer = PorterStemmer()
|
||||
with open(filename, 'r', encoding='utf-8') as f:
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
for word in line.split():
|
||||
yield (word, stemmer.stem(trim_punctuation(word)))
|
||||
|
||||
|
||||
def get_fragments(filename):
|
||||
'''
|
||||
"""
|
||||
Converts a text file at the given path to a generator
|
||||
of Fragment instances.
|
||||
'''
|
||||
"""
|
||||
from os import path
|
||||
|
||||
document = path.splitext(path.basename(filename))[0]
|
||||
idx = 0
|
||||
for word, stem in parse_file(filename):
|
||||
idx += 1
|
||||
yield Fragment(document=document, idx=idx, word=word, stem=stem)
|
||||
print('{} - {} words'.format(filename, idx))
|
||||
print("{} - {} words".format(filename, idx))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Load NLTK data if necessary
|
||||
nltk.download('punkt')
|
||||
nltk.download('wordnet')
|
||||
nltk.download("punkt")
|
||||
nltk.download("wordnet")
|
||||
|
||||
# Initialize database
|
||||
db = Database('default')
|
||||
db = Database("default")
|
||||
db.create_table(Fragment)
|
||||
|
||||
# Load files from the command line or everything under ebooks/
|
||||
filenames = sys.argv[1:] or glob('ebooks/*.txt')
|
||||
filenames = sys.argv[1:] or glob("ebooks/*.txt")
|
||||
for filename in filenames:
|
||||
db.insert(get_fragments(filename), batch_size=100000)
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
from clickhouse_orm import *
|
||||
from clickhouse_orm.engines import MergeTree
|
||||
from clickhouse_orm.fields import LowCardinalityField, StringField, UInt64Field
|
||||
from clickhouse_orm.models import Index, Model
|
||||
|
||||
|
||||
class Fragment(Model):
|
||||
|
||||
language = LowCardinalityField(StringField(), default='EN')
|
||||
language = LowCardinalityField(StringField(), default="EN")
|
||||
document = LowCardinalityField(StringField())
|
||||
idx = UInt64Field()
|
||||
word = StringField()
|
||||
stem = StringField()
|
||||
idx = UInt64Field()
|
||||
word = StringField()
|
||||
stem = StringField()
|
||||
|
||||
# An index for faster search by document and fragment idx
|
||||
index = Index((document, idx), type=Index.minmax(), granularity=1)
|
||||
index = Index((document, idx), type=Index.minmax(), granularity=1)
|
||||
|
||||
# The primary key allows efficient lookup of stems
|
||||
engine = MergeTree(order_by=(stem, document, idx), partition_key=('language',))
|
||||
engine = MergeTree(order_by=(stem, document, idx), partition_key=("language",))
|
||||
|
|
|
@ -1,19 +1,20 @@
|
|||
import sys
|
||||
from colorama import init, Fore, Back, Style
|
||||
from nltk.stem.porter import PorterStemmer
|
||||
from clickhouse_orm import Database, F
|
||||
from models import Fragment
|
||||
from load import trim_punctuation
|
||||
|
||||
from colorama import Fore, Style, init
|
||||
from load import trim_punctuation
|
||||
from models import Fragment
|
||||
from nltk.stem.porter import PorterStemmer
|
||||
|
||||
from clickhouse_orm import Database, F
|
||||
|
||||
# The wildcard character
|
||||
WILDCARD = '*'
|
||||
WILDCARD = "*"
|
||||
|
||||
|
||||
def prepare_search_terms(text):
|
||||
'''
|
||||
"""
|
||||
Convert the text to search into a list of stemmed words.
|
||||
'''
|
||||
"""
|
||||
stemmer = PorterStemmer()
|
||||
stems = []
|
||||
for word in text.split():
|
||||
|
@ -25,10 +26,10 @@ def prepare_search_terms(text):
|
|||
|
||||
|
||||
def build_query(db, stems):
|
||||
'''
|
||||
"""
|
||||
Returns a queryset instance for finding sequences of Fragment instances
|
||||
that matche the list of stemmed words.
|
||||
'''
|
||||
"""
|
||||
# Start by searching for the first stemmed word
|
||||
all_fragments = Fragment.objects_in(db)
|
||||
query = all_fragments.filter(stem=stems[0]).only(Fragment.document, Fragment.idx)
|
||||
|
@ -47,44 +48,44 @@ def build_query(db, stems):
|
|||
|
||||
|
||||
def get_matching_text(db, document, from_idx, to_idx, extra=5):
|
||||
'''
|
||||
"""
|
||||
Reconstructs the document text between the given indexes (inclusive),
|
||||
plus `extra` words before and after the match. The words that are
|
||||
included in the given range are highlighted in green.
|
||||
'''
|
||||
"""
|
||||
text = []
|
||||
conds = (Fragment.document == document) & (Fragment.idx >= from_idx - extra) & (Fragment.idx <= to_idx + extra)
|
||||
for fragment in Fragment.objects_in(db).filter(conds).order_by('document', 'idx'):
|
||||
for fragment in Fragment.objects_in(db).filter(conds).order_by("document", "idx"):
|
||||
word = fragment.word
|
||||
if fragment.idx == from_idx:
|
||||
word = Fore.GREEN + word
|
||||
if fragment.idx == to_idx:
|
||||
word = word + Style.RESET_ALL
|
||||
text.append(word)
|
||||
return ' '.join(text)
|
||||
return " ".join(text)
|
||||
|
||||
|
||||
def find(db, text):
|
||||
'''
|
||||
"""
|
||||
Performs the search for the given text, and prints out the matches.
|
||||
'''
|
||||
"""
|
||||
stems = prepare_search_terms(text)
|
||||
query = build_query(db, stems)
|
||||
print('\n' + Fore.MAGENTA + str(query) + Style.RESET_ALL + '\n')
|
||||
print("\n" + Fore.MAGENTA + str(query) + Style.RESET_ALL + "\n")
|
||||
for match in query:
|
||||
text = get_matching_text(db, match.document, match.idx, match.idx + len(stems) - 1)
|
||||
print(Fore.CYAN + match.document + ':' + Style.RESET_ALL, text)
|
||||
print(Fore.CYAN + match.document + ":" + Style.RESET_ALL, text)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Initialize colored output
|
||||
init()
|
||||
|
||||
# Initialize database
|
||||
db = Database('default')
|
||||
db = Database("default")
|
||||
|
||||
# Search
|
||||
text = ' '.join(sys.argv[1:])
|
||||
text = " ".join(sys.argv[1:])
|
||||
if text:
|
||||
find(db, text)
|
||||
|
|
|
@ -1,10 +1,5 @@
|
|||
[tool.black]
|
||||
line-length = 120
|
||||
extend-exclude = '''
|
||||
/(
|
||||
| examples
|
||||
)/
|
||||
'''
|
||||
|
||||
[tool.isort]
|
||||
multi_line_output = 3
|
||||
|
|
Loading…
Reference in New Issue
Block a user