Initial commit

This commit is contained in:
Alexander Karpov 2023-06-25 03:26:59 +03:00
commit e35215f22a
114 changed files with 7855 additions and 0 deletions

10
.dockerignore Normal file
View File

@ -0,0 +1,10 @@
orconfig
.gitattributes
.github
.gitignore
.gitlab-ci.yml
.idea
.pre-commit-config.yaml
.readthedocs.yml
.travis.yml
venv

27
.editorconfig Normal file
View File

@ -0,0 +1,27 @@
# http://editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[*.{py,rst,ini}]
indent_style = space
indent_size = 4
[*.{html,css,scss,json,yml,xml}]
indent_style = space
indent_size = 2
[*.md]
trim_trailing_whitespace = false
[Makefile]
indent_style = tab
[nginx.conf]
indent_style = space
indent_size = 2

4
.env Normal file
View File

@ -0,0 +1,4 @@
DATABASE_URL=postgres://postgres:postgres@127.0.0.1:5432/dock_checker
CELERY_BROKER_URL=redis://localhost:6379/0
REDIS_URL=redis://localhost:6379/1
USE_DOCKER=no

4
.env.template Normal file
View File

@ -0,0 +1,4 @@
DATABASE_URL=postgres://postgres:postgres@127.0.0.1:5432/dock_checker
CELERY_BROKER_URL=redis://localhost:6379/0
REDIS_URL=redis://localhost:6379/1
USE_DOCKER=no

25
.gitattributes vendored Normal file
View File

@ -0,0 +1,25 @@
# Source files
# ============
*.pxd text diff=python
*.py text diff=python
*.py3 text diff=python
*.pyw text diff=python
*.pyx text diff=python
*.pyz text diff=python
*.pyi text diff=python
# Binary files
# ============
*.db binary
*.p binary
*.pkl binary
*.pickle binary
*.pyc binary export-ignore
*.pyo binary export-ignore
*.pyd binary
# Jupyter notebook
*.ipynb text eol=lf
# ignore static
dock_checker/static/** linguist-vendored

331
.gitignore vendored Normal file
View File

@ -0,0 +1,331 @@
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
staticfiles/
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# Environments
.venv
venv/
ENV/
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
### Node template
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
# nyc test coverage
.nyc_output
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (http://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Typescript v1 declaration files
typings/
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
### Linux template
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### VisualStudioCode template
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Provided default Pycharm Run/Debug Configurations should be tracked by git
# In case of local modifications made by Pycharm, use update-index command
# for each changed file, like this:
# git update-index --assume-unchanged .idea/dock_checker.iml
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.xml
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
# Gradle:
.idea/**/gradle.xml
.idea/**/libraries
# CMake
cmake-build-debug/
# Mongo Explorer plugin:
.idea/**/mongoSettings.xml
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
### Windows template
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
### macOS template
# General
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### SublimeText template
# Cache files for Sublime Text
*.tmlanguage.cache
*.tmPreferences.cache
*.stTheme.cache
# Workspace files are user-specific
*.sublime-workspace
# Project files should be checked into the repository, unless a significant
# proportion of contributors will probably not be using Sublime Text
# *.sublime-project
# SFTP configuration file
sftp-config.json
# Package control specific files
Package Control.last-run
Package Control.ca-list
Package Control.ca-bundle
Package Control.system-ca-bundle
Package Control.cache/
Package Control.ca-certs/
Package Control.merged-ca-bundle
Package Control.user-ca-bundle
oscrypto-ca-bundle.crt
bh_unicode_properties.cache
# Sublime-github package stores a github token in this file
# https://packagecontrol.io/packages/sublime-github
GitHub.sublime-settings
### Vim template
# Swap
[._]*.s[a-v][a-z]
[._]*.sw[a-p]
[._]s[a-v][a-z]
[._]sw[a-p]
# Session
Session.vim
# Temporary
.netrwhist
# Auto-generated tag files
tags
### Project template
dock_checker/media/
.pytest_cache/
.ipython/

39
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,39 @@
exclude: "^docs/|/migrations/"
default_stages: [commit]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
hooks:
- id: pyupgrade
args: [--py310-plus]
- repo: https://github.com/psf/black
rev: 22.12.0
hooks:
- id: black
- repo: https://github.com/PyCQA/isort
rev: 5.11.4
hooks:
- id: isort
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
hooks:
- id: flake8
args: ["--config=setup.cfg"]
additional_dependencies: [flake8-isort]
# sets up .pre-commit-ci.yaml to ensure pre-commit dependencies stay up to date
ci:
autoupdate_schedule: weekly
skip: []
submodules: false

14
.pylintrc Normal file
View File

@ -0,0 +1,14 @@
[MASTER]
load-plugins=pylint_django, pylint_celery
django-settings-module=config.settings.local
[FORMAT]
max-line-length=120
[MESSAGES CONTROL]
disable=missing-docstring,invalid-name
[DESIGN]
max-parents=13
[TYPECHECK]
generated-members=REQUEST,acl_users,aq_parent,"[a-zA-Z]+_set{1,2}",save,delete

40
README.md Normal file
View File

@ -0,0 +1,40 @@
# Capital Dock Checker
Detection and comparison with the reference name of the capital construction object in the project documentation
## Basic Commands
### Runserver
$ ./manage.py runserver_plus
### Type checks
Running type checks with mypy:
$ mypy dock_checker
#### Running tests with pytest
$ pytest
### Setting Up Your Users
- To create a **superuser account**, use this command:
$ python manage.py createsuperuser
### Celery
This app comes with Celery.
To run a celery worker:
``` bash
cd dock_checker
celery -A config.celery_app worker -l info
```
Please note: For Celery's import magic to work, it is important *where* the celery commands are run. If you are in the same folder with *manage.py*, you should be right.
made with [cookiecutter-django](https://github.com/Alexander-D-Karpov/cookiecutter-django)

View File

@ -0,0 +1,75 @@
ARG PYTHON_VERSION=3.11-slim
# define an alias for the specfic python version used in this file.
FROM python:${PYTHON_VERSION} as python
# Python build stage
FROM python as python-build-stage
ARG BUILD_ENVIRONMENT=local
# Install apt packages
RUN apt-get update && apt-get install --no-install-recommends -y \
# dependencies for building Python packages
build-essential \
# psycopg2 dependencies
libpq-dev
# Python 'run' stage
FROM python as python-run-stage
ARG BUILD_ENVIRONMENT=local
ARG APP_HOME=/app
ENV PYTHONUNBUFFERED 1
ENV PYTHONDONTWRITEBYTECODE 1
ENV BUILD_ENV ${BUILD_ENVIRONMENT}
WORKDIR ${APP_HOME}
# Install required system dependencies
RUN apt-get update && apt-get install --no-install-recommends -y \
# psycopg2 dependencies
libpq-dev \
# Translations dependencies
gettext \
# cleaning up unused files
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
&& rm -rf /var/lib/apt/lists/*
RUN pip install poetry
# Configuring poetry
RUN poetry config virtualenvs.create false
COPY pyproject.toml poetry.lock /
# Installing requirements
RUN poetry install
COPY ./compose/production/django/entrypoint /entrypoint
RUN sed -i 's/\r$//g' /entrypoint
RUN chmod +x /entrypoint
COPY ./compose/local/django/start /start
RUN sed -i 's/\r$//g' /start
RUN chmod +x /start
COPY ./compose/local/django/celery/worker/start /start-celeryworker
RUN sed -i 's/\r$//g' /start-celeryworker
RUN chmod +x /start-celeryworker
COPY ./compose/local/django/celery/beat/start /start-celerybeat
RUN sed -i 's/\r$//g' /start-celerybeat
RUN chmod +x /start-celerybeat
COPY ./compose/local/django/celery/flower/start /start-flower
RUN sed -i 's/\r$//g' /start-flower
RUN chmod +x /start-flower
# copy application code to WORKDIR
COPY . ${APP_HOME}
ENTRYPOINT ["/entrypoint"]

View File

@ -0,0 +1,8 @@
#!/bin/bash
set -o errexit
set -o nounset
rm -f './celerybeat.pid'
exec watchfiles celery.__main__.main --args '-A config.celery_app beat -l INFO'

View File

@ -0,0 +1,8 @@
#!/bin/bash
set -o errexit
set -o nounset
exec watchfiles celery.__main__.main \
--args \
"-A config.celery_app -b \"${CELERY_BROKER_URL}\" flower --basic_auth=\"${CELERY_FLOWER_USER}:${CELERY_FLOWER_PASSWORD}\""

View File

@ -0,0 +1,7 @@
#!/bin/bash
set -o errexit
set -o nounset
exec watchfiles celery.__main__.main --args '-A config.celery_app worker -l INFO'

View File

@ -0,0 +1,9 @@
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
python manage.py migrate
exec python manage.py runserver_plus 0.0.0.0:8000

View File

@ -0,0 +1,81 @@
ARG PYTHON_VERSION=3.11-slim
# define an alias for the specfic python version used in this file.
FROM python:${PYTHON_VERSION} as python
# Python build stage
FROM python as python-build-stage
ARG BUILD_ENVIRONMENT=production
# Install apt packages
RUN apt-get update && apt-get install --no-install-recommends -y \
# dependencies for building Python packages
build-essential \
# psycopg2 dependencies
libpq-dev
# Python 'run' stage
FROM python as python-run-stage
ARG BUILD_ENVIRONMENT=production
ARG APP_HOME=/app
ENV PYTHONUNBUFFERED 1
ENV PYTHONDONTWRITEBYTECODE 1
ENV BUILD_ENV ${BUILD_ENVIRONMENT}
WORKDIR ${APP_HOME}
# Install required system dependencies
RUN apt-get update && apt-get install --no-install-recommends -y \
# psycopg2 dependencies
libpq-dev \
# Translations dependencies
gettext \
# cleaning up unused files
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
&& rm -rf /var/lib/apt/lists/*
RUN addgroup --system django \
&& adduser --system --ingroup django django
RUN pip install poetry
# Configuring poetry
RUN poetry config virtualenvs.create false
COPY pyproject.toml poetry.lock /
# Installing requirements
RUN poetry install
COPY ./compose/production/django/entrypoint /entrypoint
RUN sed -i 's/\r$//g' /entrypoint
RUN chmod +x /entrypoint
COPY ./compose/local/django/start /start
RUN sed -i 's/\r$//g' /start
RUN chmod +x /start
COPY ./compose/local/django/celery/worker/start /start-celeryworker
RUN sed -i 's/\r$//g' /start-celeryworker
RUN chmod +x /start-celeryworker
COPY ./compose/local/django/celery/beat/start /start-celerybeat
RUN sed -i 's/\r$//g' /start-celerybeat
RUN chmod +x /start-celerybeat
COPY ./compose/local/django/celery/flower/start /start-flower
RUN sed -i 's/\r$//g' /start-flower
RUN chmod +x /start-flower
# copy application code to WORKDIR
COPY --chown=django:django . ${APP_HOME}
# make django owner of the WORKDIR directory as well.
RUN chown django:django ${APP_HOME}
USER django

View File

@ -0,0 +1,8 @@
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
exec celery -A config.celery_app beat -l INFO

View File

@ -0,0 +1,11 @@
#!/bin/bash
set -o errexit
set -o nounset
exec celery \
-A config.celery_app \
-b "${CELERY_BROKER_URL}" \
flower \
--basic_auth="${CELERY_FLOWER_USER}:${CELERY_FLOWER_PASSWORD}"

View File

@ -0,0 +1,8 @@
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
exec celery -A config.celery_app worker -l INFO

View File

@ -0,0 +1,49 @@
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
# N.B. If only .env files supported variable expansion...
export CELERY_BROKER_URL="${REDIS_URL}"
if [ -z "${POSTGRES_USER}" ]; then
base_postgres_image_default_user='postgres'
export POSTGRES_USER="${base_postgres_image_default_user}"
fi
export DATABASE_URL="postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}"
python << END
import sys
import time
import psycopg2
suggest_unrecoverable_after = 30
start = time.time()
while True:
try:
psycopg2.connect(
dbname="${POSTGRES_DB}",
user="${POSTGRES_USER}",
password="${POSTGRES_PASSWORD}",
host="${POSTGRES_HOST}",
port="${POSTGRES_PORT}",
)
break
except psycopg2.OperationalError as error:
sys.stderr.write("Waiting for PostgreSQL to become available...\n")
if time.time() - start > suggest_unrecoverable_after:
sys.stderr.write(" This is taking longer than expected. The following exception may be indicative of an unrecoverable error: '{}'\n".format(error))
time.sleep(1)
END
>&2 echo 'PostgreSQL is available'
exec "$@"

View File

@ -0,0 +1,9 @@
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
python /app/manage.py collectstatic --noinput
exec /usr/local/bin/gunicorn config.wsgi --bind 0.0.0.0:5000 --chdir=/app

View File

@ -0,0 +1,6 @@
FROM postgres:14
COPY ./compose/production/postgres/maintenance /usr/local/bin/maintenance
RUN chmod +x /usr/local/bin/maintenance/*
RUN mv /usr/local/bin/maintenance/* /usr/local/bin \
&& rmdir /usr/local/bin/maintenance

View File

@ -0,0 +1,5 @@
#!/usr/bin/env bash
BACKUP_DIR_PATH='/backups'
BACKUP_FILE_PREFIX='backup'

View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
countdown() {
declare desc="A simple countdown. Source: https://superuser.com/a/611582"
local seconds="${1}"
local d=$(($(date +%s) + "${seconds}"))
while [ "$d" -ge `date +%s` ]; do
echo -ne "$(date -u --date @$(($d - `date +%s`)) +%H:%M:%S)\r";
sleep 0.1
done
}

View File

@ -0,0 +1,41 @@
#!/usr/bin/env bash
message_newline() {
echo
}
message_debug()
{
echo -e "DEBUG: ${@}"
}
message_welcome()
{
echo -e "\e[1m${@}\e[0m"
}
message_warning()
{
echo -e "\e[33mWARNING\e[0m: ${@}"
}
message_error()
{
echo -e "\e[31mERROR\e[0m: ${@}"
}
message_info()
{
echo -e "\e[37mINFO\e[0m: ${@}"
}
message_suggestion()
{
echo -e "\e[33mSUGGESTION\e[0m: ${@}"
}
message_success()
{
echo -e "\e[32mSUCCESS\e[0m: ${@}"
}

View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
yes_no() {
declare desc="Prompt for confirmation. \$\"\{1\}\": confirmation message."
local arg1="${1}"
local response=
read -r -p "${arg1} (y/[n])? " response
if [[ "${response}" =~ ^[Yy]$ ]]
then
exit 0
else
exit 1
fi
}

View File

@ -0,0 +1,38 @@
#!/usr/bin/env bash
### Create a database backup.
###
### Usage:
### $ docker-compose -f <environment>.yml (exec |run --rm) postgres backup
set -o errexit
set -o pipefail
set -o nounset
working_dir="$(dirname ${0})"
source "${working_dir}/_sourced/constants.sh"
source "${working_dir}/_sourced/messages.sh"
message_welcome "Backing up the '${POSTGRES_DB}' database..."
if [[ "${POSTGRES_USER}" == "postgres" ]]; then
message_error "Backing up as 'postgres' user is not supported. Assign 'POSTGRES_USER' env with another one and try again."
exit 1
fi
export PGHOST="${POSTGRES_HOST}"
export PGPORT="${POSTGRES_PORT}"
export PGUSER="${POSTGRES_USER}"
export PGPASSWORD="${POSTGRES_PASSWORD}"
export PGDATABASE="${POSTGRES_DB}"
backup_filename="${BACKUP_FILE_PREFIX}_$(date +'%Y_%m_%dT%H_%M_%S').sql.gz"
pg_dump | gzip > "${BACKUP_DIR_PATH}/${backup_filename}"
message_success "'${POSTGRES_DB}' database backup '${backup_filename}' has been created and placed in '${BACKUP_DIR_PATH}'."

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
### View backups.
###
### Usage:
### $ docker-compose -f <environment>.yml (exec |run --rm) postgres backups
set -o errexit
set -o pipefail
set -o nounset
working_dir="$(dirname ${0})"
source "${working_dir}/_sourced/constants.sh"
source "${working_dir}/_sourced/messages.sh"
message_welcome "These are the backups you have got:"
ls -lht "${BACKUP_DIR_PATH}"

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
### Restore database from a backup.
###
### Parameters:
### <1> filename of an existing backup.
###
### Usage:
### $ docker-compose -f <environment>.yml (exec |run --rm) postgres restore <1>
set -o errexit
set -o pipefail
set -o nounset
working_dir="$(dirname ${0})"
source "${working_dir}/_sourced/constants.sh"
source "${working_dir}/_sourced/messages.sh"
if [[ -z ${1+x} ]]; then
message_error "Backup filename is not specified yet it is a required parameter. Make sure you provide one and try again."
exit 1
fi
backup_filename="${BACKUP_DIR_PATH}/${1}"
if [[ ! -f "${backup_filename}" ]]; then
message_error "No backup with the specified filename found. Check out the 'backups' maintenance script output to see if there is one and try again."
exit 1
fi
message_welcome "Restoring the '${POSTGRES_DB}' database from the '${backup_filename}' backup..."
if [[ "${POSTGRES_USER}" == "postgres" ]]; then
message_error "Restoring as 'postgres' user is not supported. Assign 'POSTGRES_USER' env with another one and try again."
exit 1
fi
export PGHOST="${POSTGRES_HOST}"
export PGPORT="${POSTGRES_PORT}"
export PGUSER="${POSTGRES_USER}"
export PGPASSWORD="${POSTGRES_PASSWORD}"
export PGDATABASE="${POSTGRES_DB}"
message_info "Dropping the database..."
dropdb "${PGDATABASE}"
message_info "Creating a new database..."
createdb --owner="${POSTGRES_USER}"
message_info "Applying the backup to the new database..."
gunzip -c "${backup_filename}" | psql "${POSTGRES_DB}"
message_success "The '${POSTGRES_DB}' database has been restored from the '${backup_filename}' backup."

View File

@ -0,0 +1,5 @@
FROM traefik:v2.2.11
RUN mkdir -p /etc/traefik/acme \
&& touch /etc/traefik/acme/acme.json \
&& chmod 600 /etc/traefik/acme/acme.json
COPY ./compose/production/traefik/traefik.yml /etc/traefik

View File

@ -0,0 +1,75 @@
log:
level: INFO
entryPoints:
web:
# http
address: ":80"
http:
# https://docs.traefik.io/routing/entrypoints/#entrypoint
redirections:
entryPoint:
to: web-secure
web-secure:
# https
address: ":443"
flower:
address: ":5555"
certificatesResolvers:
letsencrypt:
# https://docs.traefik.io/master/https/acme/#lets-encrypt
acme:
email: "sanspie@dev2.akarpov.ru"
storage: /etc/traefik/acme/acme.json
# https://docs.traefik.io/master/https/acme/#httpchallenge
httpChallenge:
entryPoint: web
http:
routers:
web-secure-router:
rule: "Host(`dev2.akarpov.ru`)"
entryPoints:
- web-secure
middlewares:
- csrf
service: django
tls:
# https://docs.traefik.io/master/routing/routers/#certresolver
certResolver: letsencrypt
flower-secure-router:
rule: "Host(`dev2.akarpov.ru`)"
entryPoints:
- flower
service: flower
tls:
# https://docs.traefik.io/master/routing/routers/#certresolver
certResolver: letsencrypt
middlewares:
csrf:
# https://docs.traefik.io/master/middlewares/headers/#hostsproxyheaders
# https://docs.djangoproject.com/en/dev/ref/csrf/#ajax
headers:
hostsProxyHeaders: ["X-CSRFToken"]
services:
django:
loadBalancer:
servers:
- url: http://django:5000
flower:
loadBalancer:
servers:
- url: http://flower:5555
providers:
# https://docs.traefik.io/master/providers/file/
file:
filename: /etc/traefik/traefik.yml
watch: true

5
config/__init__.py Normal file
View File

@ -0,0 +1,5 @@
# This will make sure the app is always imported when
# Django starts so that shared_task will use this app.
from .celery_app import app as celery_app
__all__ = ("celery_app",)

4
config/api_router.py Normal file
View File

@ -0,0 +1,4 @@
from django.urls import path, include
app_name = "api"
urlpatterns = [path("", include("dock_checker.processor.api.urls"))]

17
config/celery_app.py Normal file
View File

@ -0,0 +1,17 @@
import os
from celery import Celery
# set the default Django settings module for the 'celery' program.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")
app = Celery("dock_checker")
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object("django.conf:settings", namespace="CELERY")
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()

View File

350
config/settings/base.py Normal file
View File

@ -0,0 +1,350 @@
"""
Base settings to build other settings files upon.
"""
from pathlib import Path
import environ
import structlog
ROOT_DIR = Path(__file__).resolve(strict=True).parent.parent.parent
# dock_checker/
APPS_DIR = ROOT_DIR / "dock_checker"
env = environ.Env()
READ_DOT_ENV_FILE = env.bool("DJANGO_READ_DOT_ENV_FILE", default=True)
if READ_DOT_ENV_FILE:
# OS environment variables take precedence over variables from .env
env.read_env(str(ROOT_DIR / ".env"))
# GENERAL
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#debug
DEBUG = env.bool("DJANGO_DEBUG", False)
# Local time zone. Choices are
# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
# though not all of them may be available with every OS.
# In Windows, this must be set to your system time zone.
TIME_ZONE = "Europe/Moscow"
# https://docs.djangoproject.com/en/dev/ref/settings/#language-code
LANGUAGE_CODE = "en-us"
# https://docs.djangoproject.com/en/dev/ref/settings/#site-id
SITE_ID = 1
# https://docs.djangoproject.com/en/dev/ref/settings/#use-i18n
USE_I18N = True
# https://docs.djangoproject.com/en/dev/ref/settings/#use-tz
USE_TZ = True
# https://docs.djangoproject.com/en/dev/ref/settings/#locale-paths
LOCALE_PATHS = [str(ROOT_DIR / "locale")]
# DATABASES
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#databases
DATABASES = {"default": env.db("DATABASE_URL")}
DATABASES["default"]["ATOMIC_REQUESTS"] = True
# https://docs.djangoproject.com/en/stable/ref/settings/#std:setting-DEFAULT_AUTO_FIELD
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
# URLS
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#root-urlconf
ROOT_URLCONF = "config.urls"
# https://docs.djangoproject.com/en/dev/ref/settings/#wsgi-application
WSGI_APPLICATION = "config.wsgi.application"
# APPS
# ------------------------------------------------------------------------------
DJANGO_APPS = [
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.sites",
"django.contrib.messages",
"django.contrib.staticfiles",
# "django.contrib.humanize", # Handy template tags
"django.contrib.admin",
"django.forms",
]
THIRD_PARTY_APPS = [
"django_celery_beat",
"rest_framework",
"rest_framework.authtoken",
"corsheaders",
"drf_spectacular",
]
LOCAL_APPS = [
"dock_checker.users",
"dock_checker.processor",
]
# https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS
# MIGRATIONS
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#migration-modules
MIGRATION_MODULES = {"sites": "dock_checker.contrib.sites.migrations"}
# AUTHENTICATION
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#authentication-backends
AUTHENTICATION_BACKENDS = [
"django.contrib.auth.backends.ModelBackend",
]
# https://docs.djangoproject.com/en/dev/ref/settings/#auth-user-model
AUTH_USER_MODEL = "users.User"
# PASSWORDS
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#password-hashers
PASSWORD_HASHERS = [
# https://docs.djangoproject.com/en/dev/topics/auth/passwords/#using-argon2-with-django
"django.contrib.auth.hashers.Argon2PasswordHasher",
"django.contrib.auth.hashers.PBKDF2PasswordHasher",
"django.contrib.auth.hashers.PBKDF2SHA1PasswordHasher",
"django.contrib.auth.hashers.BCryptSHA256PasswordHasher",
]
# https://docs.djangoproject.com/en/dev/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator"
},
{"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator"},
{"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"},
{"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator"},
]
# MIDDLEWARE
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#middleware
MIDDLEWARE = [
"django.middleware.security.SecurityMiddleware",
"corsheaders.middleware.CorsMiddleware",
"whitenoise.middleware.WhiteNoiseMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.locale.LocaleMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.common.BrokenLinkEmailsMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
]
# STATIC
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#static-root
STATIC_ROOT = str(ROOT_DIR / "staticfiles")
# https://docs.djangoproject.com/en/dev/ref/settings/#static-url
STATIC_URL = "/static/"
# https://docs.djangoproject.com/en/dev/ref/contrib/staticfiles/#std:setting-STATICFILES_DIRS
STATICFILES_DIRS = [str(APPS_DIR / "static")]
# https://docs.djangoproject.com/en/dev/ref/contrib/staticfiles/#staticfiles-finders
STATICFILES_FINDERS = [
"django.contrib.staticfiles.finders.FileSystemFinder",
"django.contrib.staticfiles.finders.AppDirectoriesFinder",
]
# MEDIA
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#media-root
MEDIA_ROOT = str(APPS_DIR / "media")
# https://docs.djangoproject.com/en/dev/ref/settings/#media-url
MEDIA_URL = "/media/"
# TEMPLATES
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#templates
TEMPLATES = [
{
# https://docs.djangoproject.com/en/dev/ref/settings/#std:setting-TEMPLATES-BACKEND
"BACKEND": "django.template.backends.django.DjangoTemplates",
# https://docs.djangoproject.com/en/dev/ref/settings/#dirs
"DIRS": [str(APPS_DIR / "templates")],
# https://docs.djangoproject.com/en/dev/ref/settings/#app-dirs
"APP_DIRS": True,
"OPTIONS": {
# https://docs.djangoproject.com/en/dev/ref/settings/#template-context-processors
"context_processors": [
"django.template.context_processors.debug",
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.template.context_processors.i18n",
"django.template.context_processors.media",
"django.template.context_processors.static",
"django.template.context_processors.tz",
"django.contrib.messages.context_processors.messages",
],
},
}
]
# https://docs.djangoproject.com/en/dev/ref/settings/#form-renderer
FORM_RENDERER = "django.forms.renderers.TemplatesSetting"
# FIXTURES
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#fixture-dirs
FIXTURE_DIRS = (str(APPS_DIR / "fixtures"),)
# SECURITY
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#session-cookie-httponly
SESSION_COOKIE_HTTPONLY = True
# https://docs.djangoproject.com/en/dev/ref/settings/#csrf-cookie-httponly
CSRF_COOKIE_HTTPONLY = True
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-browser-xss-filter
SECURE_BROWSER_XSS_FILTER = True
# https://docs.djangoproject.com/en/dev/ref/settings/#x-frame-options
X_FRAME_OPTIONS = "DENY"
# ADMIN
# ------------------------------------------------------------------------------
# Django Admin URL.
ADMIN_URL = "admin/"
# https://docs.djangoproject.com/en/dev/ref/settings/#admins
ADMINS = [("""sanspie""", "sanspie@dev2.akarpov.ru")]
# https://docs.djangoproject.com/en/dev/ref/settings/#managers
MANAGERS = ADMINS
# LOGGING
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#logging
# See https://docs.djangoproject.com/en/dev/topics/logging for
# more details on how to customize your logging configuration.
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"json_formatter": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.processors.JSONRenderer(),
},
"plain_console": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.dev.ConsoleRenderer(),
},
"key_value": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.processors.KeyValueRenderer(
key_order=["timestamp", "level", "event", "logger"]
),
},
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "plain_console",
},
"json_file": {
"class": "logging.handlers.WatchedFileHandler",
"filename": "logs/json.log",
"formatter": "json_formatter",
},
"flat_line_file": {
"class": "logging.handlers.WatchedFileHandler",
"filename": "logs/flat_line.log",
"formatter": "key_value",
},
},
"loggers": {
"django_structlog": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
},
# Make sure to replace the following logger's name for yours
"django_structlog_demo_project": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
},
},
}
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.stdlib.filter_by_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
# Celery
# ------------------------------------------------------------------------------
if USE_TZ:
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-timezone
CELERY_TIMEZONE = TIME_ZONE
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-broker_url
CELERY_BROKER_URL = env("CELERY_BROKER_URL")
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_backend
CELERY_RESULT_BACKEND = CELERY_BROKER_URL
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#result-extended
CELERY_RESULT_EXTENDED = True
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#result-backend-always-retry
# https://github.com/celery/celery/pull/6122
CELERY_RESULT_BACKEND_ALWAYS_RETRY = True
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#result-backend-max-retries
CELERY_RESULT_BACKEND_MAX_RETRIES = 10
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-accept_content
CELERY_ACCEPT_CONTENT = ["json"]
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-task_serializer
CELERY_TASK_SERIALIZER = "json"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_serializer
CELERY_RESULT_SERIALIZER = "json"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-time-limit
CELERY_TASK_TIME_LIMIT = 20 * 60
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-soft-time-limit
CELERY_TASK_SOFT_TIME_LIMIT = 10 * 60
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-scheduler
CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#worker-send-task-events
CELERY_WORKER_SEND_TASK_EVENTS = True
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-task_send_sent_event
CELERY_TASK_SEND_SENT_EVENT = True
# DRF
# -------------------------------------------------------------------------------
# django-rest-framework - https://www.django-rest-framework.org/api-guide/settings/
REST_FRAMEWORK = {
"DEFAULT_AUTHENTICATION_CLASSES": (
"rest_framework.authentication.SessionAuthentication",
"rest_framework.authentication.TokenAuthentication",
),
"DEFAULT_PERMISSION_CLASSES": ("rest_framework.permissions.AllowAny",),
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
}
# django-cors-headers - https://github.com/adamchainz/django-cors-headers#setup
CORS_URLS_REGEX = r"^/api/.*$"
# By Default swagger ui is available only to admin user(s). You can change permission classes to change that
# See more configuration options at https://drf-spectacular.readthedocs.io/en/latest/settings.html#settings
SPECTACULAR_SETTINGS = {
"TITLE": "Capital Dock Checker API",
"DESCRIPTION": "Documentation of API endpoints of Capital Dock Checker",
"VERSION": "1.0.0",
"SERVE_PERMISSIONS": [],
"SERVERS": [
{"url": "http://127.0.0.1:8000", "description": "Local Development server"},
{"url": "https://dev2.akarpov.ru", "description": "Production server"},
],
}
CACHE_TTL = 60 * 1500
CACHES = {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": "redis://127.0.0.1:6379/1",
"OPTIONS": {"CLIENT_CLASS": "django_redis.client.DefaultClient"},
"KEY_PREFIX": "dock_checker",
}
}
SESSION_ENGINE = "django.contrib.sessions.backends.cache"
SESSION_CACHE_ALIAS = "default"

51
config/settings/local.py Normal file
View File

@ -0,0 +1,51 @@
from .base import * # noqa
from .base import env
# GENERAL
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#debug
DEBUG = True
# https://docs.djangoproject.com/en/dev/ref/settings/#secret-key
SECRET_KEY = env(
"DJANGO_SECRET_KEY",
default="dmvHcXHczWrqiCensgEL5buzNKpjzZk6YvZRUa6ALnxEJUWdiCeBbyYp19E4iEJJ",
)
# https://docs.djangoproject.com/en/dev/ref/settings/#allowed-hosts
ALLOWED_HOSTS = ["localhost", "0.0.0.0", "127.0.0.1", "dev2.akarpov.ru"]
CORS_ORIGIN_ALLOW_ALL = True
# WhiteNoise
# ------------------------------------------------------------------------------
# http://whitenoise.evans.io/en/latest/django.html#using-whitenoise-in-development
INSTALLED_APPS = ["whitenoise.runserver_nostatic"] + INSTALLED_APPS # noqa F405
# django-debug-toolbar
# ------------------------------------------------------------------------------
# https://django-debug-toolbar.readthedocs.io/en/latest/installation.html#prerequisites
INSTALLED_APPS += ["debug_toolbar"] # noqa F405
# https://django-debug-toolbar.readthedocs.io/en/latest/installation.html#middleware
MIDDLEWARE += ["debug_toolbar.middleware.DebugToolbarMiddleware"] # noqa F405
# https://django-debug-toolbar.readthedocs.io/en/latest/configuration.html#debug-toolbar-config
DEBUG_TOOLBAR_CONFIG = {
"DISABLE_PANELS": ["debug_toolbar.panels.redirects.RedirectsPanel"],
"SHOW_TEMPLATE_CONTEXT": True,
}
# https://django-debug-toolbar.readthedocs.io/en/latest/installation.html#internal-ips
INTERNAL_IPS = ["127.0.0.1", "10.0.2.2"]
if env("USE_DOCKER") == "yes":
import socket
hostname, _, ips = socket.gethostbyname_ex(socket.gethostname())
INTERNAL_IPS += [".".join(ip.split(".")[:-1] + ["1"]) for ip in ips]
# django-extensions
# ------------------------------------------------------------------------------
# https://django-extensions.readthedocs.io/en/latest/installation_instructions.html#configuration
INSTALLED_APPS += ["django_extensions"] # noqa F405
# Celery
# ------------------------------------------------------------------------------
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-eager-propagates
CELERY_TASK_EAGER_PROPAGATES = True
# Your stuff...
# ------------------------------------------------------------------------------

View File

@ -0,0 +1,115 @@
from .base import * # noqa
from .base import env
# GENERAL
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#secret-key
SECRET_KEY = env("DJANGO_SECRET_KEY")
# https://docs.djangoproject.com/en/dev/ref/settings/#allowed-hosts
ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["dev2.akarpov.ru"])
# DATABASES
# ------------------------------------------------------------------------------
DATABASES["default"]["CONN_MAX_AGE"] = env.int("CONN_MAX_AGE", default=60) # noqa F405
# CACHES
# ------------------------------------------------------------------------------
CACHES = {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": env("REDIS_URL"),
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
# Mimicing memcache behavior.
# https://github.com/jazzband/django-redis#memcached-exceptions-behavior
"IGNORE_EXCEPTIONS": True,
},
}
}
# SECURITY
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-proxy-ssl-header
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-ssl-redirect
SECURE_SSL_REDIRECT = env.bool("DJANGO_SECURE_SSL_REDIRECT", default=True)
# https://docs.djangoproject.com/en/dev/ref/settings/#session-cookie-secure
SESSION_COOKIE_SECURE = True
# https://docs.djangoproject.com/en/dev/ref/settings/#csrf-cookie-secure
CSRF_COOKIE_SECURE = True
# https://docs.djangoproject.com/en/dev/topics/security/#ssl-https
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-hsts-seconds
SECURE_HSTS_SECONDS = 518400
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-hsts-include-subdomains
SECURE_HSTS_INCLUDE_SUBDOMAINS = env.bool(
"DJANGO_SECURE_HSTS_INCLUDE_SUBDOMAINS", default=True
)
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-hsts-preload
SECURE_HSTS_PRELOAD = env.bool("DJANGO_SECURE_HSTS_PRELOAD", default=True)
# https://docs.djangoproject.com/en/dev/ref/middleware/#x-content-type-options-nosniff
SECURE_CONTENT_TYPE_NOSNIFF = env.bool(
"DJANGO_SECURE_CONTENT_TYPE_NOSNIFF", default=True
)
# STATIC
# ------------------------
STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"
# MEDIA
# ------------------------------------------------------------------------------
# ADMIN
# ------------------------------------------------------------------------------
# Django Admin URL regex.
ADMIN_URL = env("DJANGO_ADMIN_URL")
# LOGGING
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#logging
# See https://docs.djangoproject.com/en/dev/topics/logging for
# more details on how to customize your logging configuration.
# A sample logging configuration. The only tangible logging
# performed by this configuration is to send an email to
# the site admins on every HTTP 500 error when DEBUG=False.
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"filters": {"require_debug_false": {"()": "django.utils.log.RequireDebugFalse"}},
"formatters": {
"verbose": {
"format": "%(levelname)s %(asctime)s %(module)s "
"%(process)d %(thread)d %(message)s"
}
},
"handlers": {
"mail_admins": {
"level": "ERROR",
"filters": ["require_debug_false"],
"class": "django.utils.log.AdminEmailHandler",
},
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"formatter": "verbose",
},
},
"root": {"level": "INFO", "handlers": ["console"]},
"loggers": {
"django.request": {
"handlers": ["mail_admins"],
"level": "ERROR",
"propagate": True,
},
"django.security.DisallowedHost": {
"level": "ERROR",
"handlers": ["console", "mail_admins"],
"propagate": True,
},
},
}
# django-rest-framework
# -------------------------------------------------------------------------------
# Tools that generate code samples can use SERVERS to point to the correct domain
SPECTACULAR_SETTINGS["SERVERS"] = [ # noqa F405
{"url": "https://dev2.akarpov.ru", "description": "Production server"}
]

33
config/settings/test.py Normal file
View File

@ -0,0 +1,33 @@
"""
With these settings, tests run faster.
"""
from .base import * # noqa
from .base import env
# GENERAL
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#secret-key
SECRET_KEY = env(
"DJANGO_SECRET_KEY",
default="NxhpmQEDiN98ffqCJXUzEImtr0vUoxPYMOVinwbD7Yk7HyzZ4k4LssOuSlcrO5mW",
)
# https://docs.djangoproject.com/en/dev/ref/settings/#test-runner
TEST_RUNNER = "django.test.runner.DiscoverRunner"
# PASSWORDS
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#password-hashers
PASSWORD_HASHERS = ["django.contrib.auth.hashers.MD5PasswordHasher"]
# EMAIL
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#email-backend
EMAIL_BACKEND = "django.core.mail.backends.locmem.EmailBackend"
# DEBUGGING FOR TEMPLATES
# ------------------------------------------------------------------------------
TEMPLATES[0]["OPTIONS"]["debug"] = True # type: ignore # noqa F405
# Your stuff...
# ------------------------------------------------------------------------------

36
config/urls.py Normal file
View File

@ -0,0 +1,36 @@
from django.conf import settings
from django.conf.urls.static import static
from django.contrib import admin
from django.urls import include, path
from django.views import defaults as default_views
from django.views.generic import TemplateView
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView
from rest_framework.authtoken.views import obtain_auth_token
urlpatterns = [
# Django Admin, use {% url 'admin:index' %}
path(settings.ADMIN_URL, admin.site.urls),
# User management
# Your stuff: custom urls includes go here
] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
# API URLS
urlpatterns += [
# API base url
path("api/", include("config.api_router")),
# DRF auth token
path("api/auth/token/", obtain_auth_token),
path("api/schema/", SpectacularAPIView.as_view(), name="api-schema"),
path(
"api/docs/",
SpectacularSwaggerView.as_view(url_name="api-schema"),
name="api-docs",
),
]
if settings.DEBUG:
# This allows the error pages to be debugged during development, just visit
# these url in browser to see how these error pages look like.
if "debug_toolbar" in settings.INSTALLED_APPS:
import debug_toolbar
urlpatterns = [path("__debug__/", include(debug_toolbar.urls))] + urlpatterns

38
config/wsgi.py Normal file
View File

@ -0,0 +1,38 @@
"""
WSGI config for Capital Dock Checker project.
This module contains the WSGI application used by Django's development server
and any production WSGI deployments. It should expose a module-level variable
named ``application``. Django's ``runserver`` and ``runfcgi`` commands discover
this application via the ``WSGI_APPLICATION`` setting.
Usually you will have the standard Django WSGI application here, but it also
might make sense to replace the whole Django WSGI application with a custom one
that later delegates to the Django one. For example, you could introduce WSGI
middleware here, or combine a Django application with an application of another
framework.
"""
import os
import sys
from pathlib import Path
from django.core.wsgi import get_wsgi_application
# This allows easy placement of apps within the interior
# dock_checker directory.
ROOT_DIR = Path(__file__).resolve(strict=True).parent.parent
sys.path.append(str(ROOT_DIR / "dock_checker"))
# We defer to a DJANGO_SETTINGS_MODULE already in the environment. This breaks
# if running multiple sites in the same mod_wsgi process. To fix this, use
# mod_wsgi daemon mode with each site in its own daemon process, or use
# os.environ["DJANGO_SETTINGS_MODULE"] = "config.settings.production"
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.production")
# This application object is used by any WSGI server configured to use this
# file. This includes Django's development server, if the WSGI_APPLICATION
# setting points here.
application = get_wsgi_application()
# Apply WSGI middleware here.
# from helloworld.wsgi import HelloWorldApplication
# application = HelloWorldApplication(application)

5
dock_checker/__init__.py Normal file
View File

@ -0,0 +1,5 @@
__version__ = "0.1.0"
__version_info__ = tuple(
int(num) if num.isdigit() else num
for num in __version__.replace("-", ".", 1).split(".")
)

View File

View File

@ -0,0 +1,19 @@
from rest_framework.pagination import PageNumberPagination
class SmallResultsSetPagination(PageNumberPagination):
page_size = 10
page_size_query_param = "page_size"
max_page_size = 100
class StandardResultsSetPagination(PageNumberPagination):
page_size = 50
page_size_query_param = "page_size"
max_page_size = 200
class BigResultsSetPagination(PageNumberPagination):
page_size = 100
page_size_query_param = "page_size"
max_page_size = 1000

View File

@ -0,0 +1,25 @@
from django.core.cache import cache
def incr_key(key, value, timeout=None):
return cache.incr(key, delta=value)
def set_key(key, value, timeout=None):
return cache.set(key, value, timeout=timeout)
def add_key(key, value, timeout=None):
return cache.add(key, value, timeout=timeout)
def check_if_key_exists(key):
return cache.get(key) is not None
def get_key(key):
return cache.get(key)
def delete_key(key):
return cache.delete(key)

View File

@ -0,0 +1,18 @@
from celery import shared_task
from django.apps import apps
from django.core.files import File
from dock_checker.utils.files import crop_image
@shared_task()
def crop_model_image(pk: int, app_label: str, model_name: str):
model = apps.get_model(app_label=app_label, model_name=model_name)
instance = model.objects.get(pk=pk)
instance.image_cropped.save(
instance.image.path.split(".")[0].split("/")[-1] + ".png",
File(crop_image(instance.image.path, length=250)),
save=False,
)
instance.save(update_fields=["image_cropped"])
return pk

14
dock_checker/conftest.py Normal file
View File

@ -0,0 +1,14 @@
import pytest
from dock_checker.users.models import User
from dock_checker.users.tests.factories import UserFactory
@pytest.fixture(autouse=True)
def media_storage(settings, tmpdir):
settings.MEDIA_ROOT = tmpdir.strpath
@pytest.fixture
def user(db) -> User:
return UserFactory()

View File

View File

View File

@ -0,0 +1,42 @@
import django.contrib.sites.models
from django.contrib.sites.models import _simple_domain_name_validator
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = []
operations = [
migrations.CreateModel(
name="Site",
fields=[
(
"id",
models.AutoField(
verbose_name="ID",
serialize=False,
auto_created=True,
primary_key=True,
),
),
(
"domain",
models.CharField(
max_length=100,
verbose_name="domain name",
validators=[_simple_domain_name_validator],
),
),
("name", models.CharField(max_length=50, verbose_name="display name")),
],
options={
"ordering": ("domain",),
"db_table": "django_site",
"verbose_name": "site",
"verbose_name_plural": "sites",
},
bases=(models.Model,),
managers=[("objects", django.contrib.sites.models.SiteManager())],
)
]

View File

@ -0,0 +1,20 @@
import django.contrib.sites.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("sites", "0001_initial")]
operations = [
migrations.AlterField(
model_name="site",
name="domain",
field=models.CharField(
max_length=100,
unique=True,
validators=[django.contrib.sites.models._simple_domain_name_validator],
verbose_name="domain name",
),
)
]

View File

@ -0,0 +1,63 @@
"""
To understand why this file is here, please read:
http://cookiecutter-django.readthedocs.io/en/latest/faq.html#why-is-there-a-django-contrib-sites-directory-in-cookiecutter-django
"""
from django.conf import settings
from django.db import migrations
def _update_or_create_site_with_sequence(site_model, connection, domain, name):
"""Update or create the site with default ID and keep the DB sequence in sync."""
site, created = site_model.objects.update_or_create(
id=settings.SITE_ID,
defaults={
"domain": domain,
"name": name,
},
)
if created:
# We provided the ID explicitly when creating the Site entry, therefore the DB
# sequence to auto-generate them wasn't used and is now out of sync. If we
# don't do anything, we'll get a unique constraint violation the next time a
# site is created.
# To avoid this, we need to manually update DB sequence and make sure it's
# greater than the maximum value.
max_id = site_model.objects.order_by('-id').first().id
with connection.cursor() as cursor:
cursor.execute("SELECT last_value from django_site_id_seq")
(current_id,) = cursor.fetchone()
if current_id <= max_id:
cursor.execute(
"alter sequence django_site_id_seq restart with %s",
[max_id + 1],
)
def update_site_forward(apps, schema_editor):
"""Set site domain and name."""
Site = apps.get_model("sites", "Site")
_update_or_create_site_with_sequence(
Site,
schema_editor.connection,
"dock_checker",
"Detection and comparison with the reference name",
)
def update_site_backward(apps, schema_editor):
"""Revert site domain and name to default."""
Site = apps.get_model("sites", "Site")
_update_or_create_site_with_sequence(
Site,
schema_editor.connection,
"example.com",
"example.com",
)
class Migration(migrations.Migration):
dependencies = [("sites", "0002_alter_domain_unique")]
operations = [migrations.RunPython(update_site_forward, update_site_backward)]

View File

@ -0,0 +1,21 @@
# Generated by Django 3.1.7 on 2021-02-04 14:49
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("sites", "0003_set_site_domain_and_name"),
]
operations = [
migrations.AlterModelOptions(
name="site",
options={
"ordering": ["domain"],
"verbose_name": "site",
"verbose_name_plural": "sites",
},
),
]

View File

View File

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

View File

View File

@ -0,0 +1,68 @@
from django.urls import reverse
from drf_spectacular.utils import extend_schema_field
from rest_framework import serializers
from dock_checker.processor.models import File, FileImage
class TaskSerializer(serializers.Serializer):
processed = serializers.IntegerField()
total = serializers.IntegerField()
features_loaded = serializers.BooleanField()
error = serializers.BooleanField()
error_description = serializers.CharField()
class FileImageSerializer(serializers.ModelSerializer):
class Meta:
model = FileImage
fields = ["order", "image"]
class FileSerializer(serializers.ModelSerializer):
status = serializers.SerializerMethodField(method_name="get_status")
file_url = serializers.SerializerMethodField(method_name="get_file_url")
preview = serializers.SerializerMethodField(method_name="get_preview")
@extend_schema_field(serializers.URLField)
def get_status(self, obj):
return reverse("api:status", kwargs={"pk": obj.id})
@extend_schema_field(serializers.FileField)
def get_preview(self, obj):
if obj.images.exists():
return obj.images.first().image.url
return ""
@extend_schema_field(serializers.URLField)
def get_file_url(self, obj):
return reverse("api:file", kwargs={"pk": obj.id})
class Meta:
model = File
fields = ["name", "ideal_title", "file", "file_url", "preview", "status"]
extra_kwargs = {
"ideal_title": {"read_only": True},
"status": {"read_only": True},
"name": {"read_only": True},
"preview": {"read_only": True},
"file_url": {"read_only": True},
}
def create(self, validated_data):
obj = File.objects.create(
file=validated_data["file"], name=validated_data["file"].name
)
return obj
class FullFileSerializer(FileSerializer):
images = FileImageSerializer(many=True)
class Meta:
model = File
fields = ["name", "ideal_title", "file", "images", "text_locations"]
class UpdateFileTitleSerializer(serializers.Serializer):
title = serializers.CharField()

View File

@ -0,0 +1,17 @@
from django.urls import path
from dock_checker.processor.api.views import (
CreateFileApiView,
RetrieveTaskApiView,
ListFileApiView,
RetrieveFileApiView,
UpdateFileTitleApiView,
)
urlpatterns = [
path("list", ListFileApiView.as_view()),
path("upload/", CreateFileApiView.as_view()),
path("status/<str:pk>", RetrieveTaskApiView.as_view(), name="status"),
path("file/<str:pk>", RetrieveFileApiView.as_view(), name="file"),
path("file/<str:pk>/update/", UpdateFileTitleApiView.as_view()),
]

View File

@ -0,0 +1,56 @@
from rest_framework import status
from rest_framework.parsers import FormParser, MultiPartParser
from rest_framework.generics import (
GenericAPIView,
CreateAPIView,
ListAPIView,
RetrieveAPIView,
get_object_or_404,
)
from rest_framework.response import Response
from dock_checker.processor.api.serializers import (
TaskSerializer,
FileSerializer,
FullFileSerializer,
UpdateFileTitleSerializer,
)
from dock_checker.processor.models import File
from dock_checker.processor.services import get_task_status
from dock_checker.processor.tasks import update_pdf_features
class RetrieveTaskApiView(GenericAPIView):
serializer_class = TaskSerializer
def get(self, request, pk):
data = get_task_status(pk)
return Response(data=data, status=status.HTTP_200_OK)
class UpdateFileTitleApiView(GenericAPIView):
serializer_class = UpdateFileTitleSerializer
def post(self, request, pk):
file = get_object_or_404(File, pk=pk)
update_pdf_features.apply_async(
kwargs={"pk": file.pk, "target": request.data["title"]},
countdown=1,
)
data = FileSerializer().to_representation(file)
return Response(data=data, status=status.HTTP_200_OK)
class RetrieveFileApiView(RetrieveAPIView):
queryset = File.objects.all()
serializer_class = FullFileSerializer
class CreateFileApiView(CreateAPIView):
parser_classes = [FormParser, MultiPartParser]
serializer_class = FileSerializer
class ListFileApiView(ListAPIView):
serializer_class = FileSerializer
queryset = File.objects.all()

View File

@ -0,0 +1,9 @@
from django.apps import AppConfig
class ProcessorConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "dock_checker.processor"
def ready(self):
import dock_checker.processor.signals

View File

@ -0,0 +1,50 @@
# Generated by Django 4.2.2 on 2023-06-24 08:39
from django.db import migrations, models
import uuid
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="File",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("uploaded", models.DateTimeField(auto_now_add=True)),
("file", models.FileField(upload_to="uploads/")),
],
options={
"ordering": ("uploaded",),
},
),
migrations.CreateModel(
name="Task",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("processed", models.IntegerField(default=0)),
("total", models.IntegerField(default=0)),
("next_url", models.URLField(blank=True, null=True)),
],
),
]

View File

@ -0,0 +1,23 @@
# Generated by Django 4.2.2 on 2023-06-24 13:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("processor", "0001_initial"),
]
operations = [
migrations.AddField(
model_name="file",
name="name",
field=models.CharField(blank=True, max_length=500, null=True),
),
migrations.AddField(
model_name="file",
name="preview",
field=models.ImageField(blank=True, null=True, upload_to="preview/"),
),
]

View File

@ -0,0 +1,37 @@
# Generated by Django 4.2.2 on 2023-06-24 14:40
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("processor", "0002_file_name_file_preview"),
]
operations = [
migrations.AlterModelOptions(
name="file",
options={"ordering": ("-uploaded",)},
),
migrations.CreateModel(
name="FileImage",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("order", models.IntegerField()),
("image", models.ImageField(upload_to="pages/")),
],
options={
"ordering": ("order",),
"unique_together": {("order", "image")},
},
),
]

View File

@ -0,0 +1,33 @@
# Generated by Django 4.2.2 on 2023-06-24 14:41
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("processor", "0003_alter_file_options_fileimage"),
]
operations = [
migrations.AlterUniqueTogether(
name="fileimage",
unique_together=set(),
),
migrations.AddField(
model_name="fileimage",
name="file",
field=models.ForeignKey(
default=1,
on_delete=django.db.models.deletion.CASCADE,
related_name="images",
to="processor.file",
),
preserve_default=False,
),
migrations.AlterUniqueTogether(
name="fileimage",
unique_together={("order", "file")},
),
]

View File

@ -0,0 +1,17 @@
# Generated by Django 4.2.2 on 2023-06-24 14:57
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("processor", "0004_alter_fileimage_unique_together_fileimage_file_and_more"),
]
operations = [
migrations.RemoveField(
model_name="file",
name="preview",
),
]

View File

@ -0,0 +1,31 @@
# Generated by Django 4.2.2 on 2023-06-24 17:57
import django.core.validators
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("processor", "0005_remove_file_preview"),
]
operations = [
migrations.AddField(
model_name="fileimage",
name="text",
field=models.JSONField(default=dict),
),
migrations.AlterField(
model_name="file",
name="file",
field=models.FileField(
upload_to="uploads/",
validators=[
django.core.validators.FileExtensionValidator(
allowed_extensions=["pdf"]
)
],
),
),
]

View File

@ -0,0 +1,30 @@
# Generated by Django 4.2.2 on 2023-06-24 22:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("processor", "0006_fileimage_text_alter_file_file"),
]
operations = [
migrations.DeleteModel(
name="Task",
),
migrations.RemoveField(
model_name="fileimage",
name="text",
),
migrations.AddField(
model_name="file",
name="ideal_title",
field=models.CharField(blank=True, max_length=500, null=True),
),
migrations.AddField(
model_name="file",
name="text_locations",
field=models.JSONField(default=dict),
),
]

View File

@ -0,0 +1,29 @@
import uuid
from django.core.validators import FileExtensionValidator
from django.db import models
class File(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(null=True, blank=True, max_length=500)
ideal_title = models.CharField(null=True, blank=True, max_length=500)
text_locations = models.JSONField(default=dict)
uploaded = models.DateTimeField(auto_now_add=True)
file = models.FileField(
upload_to="uploads/",
validators=[FileExtensionValidator(allowed_extensions=["pdf"])],
)
class Meta:
ordering = ("-uploaded",)
class FileImage(models.Model):
file = models.ForeignKey("File", related_name="images", on_delete=models.CASCADE)
order = models.IntegerField()
image = models.ImageField(upload_to="pages/")
class Meta:
unique_together = ("order", "file")
ordering = ("order",)

View File

@ -0,0 +1,19 @@
from django.core.cache import cache
from rest_framework.exceptions import NotFound
def get_task_status(pk: str) -> dict:
if cache.get(f"{pk}-processed") is None:
raise NotFound("given task does not exist")
created = cache.get_or_set(f"{pk}-processed", 0)
total = cache.get_or_set(f"{pk}-total", 0)
features_loaded = cache.get_or_set(f"{pk}-features_loaded", False)
error = cache.get_or_set(f"{pk}-error", False)
error_description = cache.get_or_set(f"{pk}-error_description", "")
return {
"processed": created,
"total": total,
"features_loaded": features_loaded,
"error": error,
"error_description": error_description,
}

View File

@ -0,0 +1,17 @@
from django.db.models.signals import pre_save
from django.dispatch import receiver
from django.core.cache import cache
from dock_checker.processor.models import File
from .tasks import process_pdf
@receiver(pre_save, sender=File)
def file_on_create(sender, instance: File, **kwargs):
if instance.id and not instance.text_locations:
cache.set(f"{instance.id}-processed", 0)
cache.set(f"{instance.id}-total", 1)
process_pdf.apply_async(
kwargs={"pk": instance.pk},
countdown=1,
)

View File

@ -0,0 +1,122 @@
import os
import shutil
from time import sleep
from celery import shared_task
from django.core.files import File
from pdf2image import convert_from_path
from django.core.cache import cache
from pypdf import PdfReader
from dock_checker.processor.models import File as FileModel, FileImage
from ml.main import (
extract_test_features,
inference_models,
create_test_features,
get_matches,
)
@shared_task
def process_pdf(pk: str):
file = FileModel.objects.get(pk=pk)
reader = PdfReader(file.file.path)
cache.set(f"{pk}-total", len(reader.pages))
cache.set(f"{pk}-features_loaded", False)
cache.set(f"{pk}-processed", 1)
extract_pdf_features.apply_async(kwargs={"pk": pk})
split_pdf_into_images.apply_async(kwargs={"pk": pk})
load_pdf.apply_async(kwargs={"pk": pk})
return pk
@shared_task
def extract_pdf_features(pk: str):
file = FileModel.objects.get(pk=pk)
data, status = extract_test_features(file.file.path)
if not status:
print(data)
cache.set(f"{pk}-error", True)
cache.set(f"{pk}-error_description", data)
else:
# TODO: create new file for download
data = create_test_features(data)
_, target = inference_models("ml/checkpoints/models.pkl", data)
text_locations = get_matches(file.file.path, target)
file.ideal_title = target
file.text_locations = text_locations
file.save()
cache.set(f"{pk}-features_loaded", True)
return pk
@shared_task
def update_pdf_features(pk: str, target: str):
file = FileModel.objects.get(pk=pk)
cache.set(f"{pk}-features_loaded", False)
data, status = extract_test_features(file.file.path)
if not status:
print(data)
cache.set(f"{pk}-error", True)
cache.set(f"{pk}-error_description", data)
else:
# TODO: create new file for download
text_locations = get_matches(file.file.path, target)
file.ideal_title = target
file.text_locations = text_locations
file.save()
cache.set(f"{pk}-features_loaded", True)
return pk
@shared_task
def split_pdf_into_images(pk: str):
file = FileModel.objects.get(pk=pk)
os.mkdir(str(pk))
convert_from_path(file.file.path, output_folder=str(pk), paths_only=True, fmt="png")
return pk
def get_file(pk: str, number: int):
res = {}
for e in os.listdir(str(pk)):
p = int(e.split("-")[-1].split(".")[0])
res[p] = e
if number == len(os.listdir(str(pk))):
sleep(1)
return res[number]
if number + 1 in res:
return res[number]
return False
@shared_task
def load_pdf(pk: str):
file = FileModel.objects.get(pk=pk)
if not os.path.isdir(str(pk)):
load_pdf.apply_async(
kwargs={"pk": pk},
countdown=1,
)
return
for i in range(cache.get(f"{pk}-processed"), cache.get(f"{pk}-total") + 1):
cache.set(f"{pk}-processed", i)
f_path = get_file(pk, i)
if f_path:
with open(str(pk) + "/" + f_path, "rb") as f:
FileImage.objects.create(
image=File(f, name=f"{pk}-{i}.png"), file=file, order=i
)
print(i)
else:
load_pdf.apply_async(
kwargs={"pk": pk},
countdown=1,
)
return
shutil.rmtree(str(pk))
return pk

0
dock_checker/static/.gitkeep vendored Normal file
View File

View File

View File

View File

@ -0,0 +1,14 @@
from django.contrib.auth import get_user_model
from rest_framework import serializers
User = get_user_model()
class UserSerializer(serializers.ModelSerializer):
class Meta:
model = User
fields = ["username", "name", "url"]
extra_kwargs = {
"url": {"view_name": "api:user-detail", "lookup_field": "username"}
}

View File

@ -0,0 +1,25 @@
from django.contrib.auth import get_user_model
from rest_framework import status
from rest_framework.decorators import action
from rest_framework.mixins import ListModelMixin, RetrieveModelMixin, UpdateModelMixin
from rest_framework.response import Response
from rest_framework.viewsets import GenericViewSet
from .serializers import UserSerializer
User = get_user_model()
class UserViewSet(RetrieveModelMixin, ListModelMixin, UpdateModelMixin, GenericViewSet):
serializer_class = UserSerializer
queryset = User.objects.all()
lookup_field = "username"
def get_queryset(self, *args, **kwargs):
assert isinstance(self.request.user.id, int)
return self.queryset.filter(id=self.request.user.id)
@action(detail=False)
def me(self, request):
serializer = UserSerializer(request.user, context={"request": request})
return Response(status=status.HTTP_200_OK, data=serializer.data)

View File

@ -0,0 +1,13 @@
from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _
class UsersConfig(AppConfig):
name = "dock_checker.users"
verbose_name = _("Users")
def ready(self):
try:
import dock_checker.users.signals # noqa F401
except ImportError:
pass

View File

@ -0,0 +1,118 @@
import django.contrib.auth.models
import django.contrib.auth.validators
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
initial = True
dependencies = [
("auth", "0012_alter_user_first_name_max_length"),
]
operations = [
migrations.CreateModel(
name="User",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("password", models.CharField(max_length=128, verbose_name="password")),
(
"last_login",
models.DateTimeField(
blank=True, null=True, verbose_name="last login"
),
),
(
"is_superuser",
models.BooleanField(
default=False,
help_text="Designates that this user has all permissions without explicitly assigning them.",
verbose_name="superuser status",
),
),
(
"username",
models.CharField(
error_messages={
"unique": "A user with that username already exists."
},
help_text="Required. 150 characters or fewer. Letters, digits and @/./+/-/_ only.",
max_length=150,
unique=True,
validators=[
django.contrib.auth.validators.UnicodeUsernameValidator()
],
verbose_name="username",
),
),
(
"email",
models.EmailField(
blank=True, max_length=254, verbose_name="email address"
),
),
(
"is_staff",
models.BooleanField(
default=False,
help_text="Designates whether the user can log into this admin site.",
verbose_name="staff status",
),
),
(
"is_active",
models.BooleanField(
default=True,
help_text="Designates whether this user should be treated as active. Unselect this instead of deleting accounts.",
verbose_name="active",
),
),
(
"date_joined",
models.DateTimeField(
default=django.utils.timezone.now, verbose_name="date joined"
),
),
(
"groups",
models.ManyToManyField(
blank=True,
help_text="The groups this user belongs to. A user will get all permissions granted to each of their groups.",
related_name="user_set",
related_query_name="user",
to="auth.Group",
verbose_name="groups",
),
),
(
"user_permissions",
models.ManyToManyField(
blank=True,
help_text="Specific permissions for this user.",
related_name="user_set",
related_query_name="user",
to="auth.Permission",
verbose_name="user permissions",
),
),
],
options={
"verbose_name": "user",
"verbose_name_plural": "users",
"abstract": False,
},
managers=[
("objects", django.contrib.auth.models.UserManager()),
],
),
]

View File

@ -0,0 +1,13 @@
from django.contrib.auth.models import AbstractUser
class User(AbstractUser):
"""
Default custom user model for Capital Dock Checker.
If adding fields that need to be filled at user signup,
check forms.SignupForm and forms.SocialSignupForms accordingly.
"""
#: First and last name do not cover name patterns around the globe
first_name = None # type: ignore
last_name = None # type: ignore

View File

@ -0,0 +1 @@

View File

View File

@ -0,0 +1,19 @@
from django.contrib.contenttypes.models import ContentType
def all_subclasses(cls):
return set(cls.__subclasses__()).union(
[s for c in cls.__subclasses__() for s in all_subclasses(c)]
)
class SubclassesMixin:
@classmethod
def get_subclasses(cls):
content_types = ContentType.objects.filter(app_label=cls._meta.app_label)
models = [ct.model_class() for ct in content_types]
return [
model
for model in models
if (model is not None and issubclass(model, cls) and model is not cls)
]

View File

@ -0,0 +1,24 @@
import functools
from channels.generic.websocket import AsyncJsonWebsocketConsumer, JsonWebsocketConsumer
def login_required(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
if not self.scope.get("user", False) or not self.scope["user"].is_authenticated:
self.send_error("Login is required")
else:
return func(self, *args, **kwargs)
return wrapper
class BaseConsumer(AsyncJsonWebsocketConsumer):
async def send_error(self, msg):
await self.send_json({"type": "error", "data": {"msg": msg}})
class SyncBaseConsumer(JsonWebsocketConsumer):
def send_error(self, msg):
self.send_json({"type": "error", "data": {"msg": msg}})

View File

@ -0,0 +1,9 @@
from collections.abc import Iterable
from django.db.models.enums import ChoicesMeta
def count_max_length(choices: Iterable | ChoicesMeta):
if isinstance(choices, ChoicesMeta):
return max([len(val) for val in choices.values])
return max([len(val) for val, _ in choices])

View File

@ -0,0 +1,75 @@
import os
from io import BytesIO
from PIL import Image
from dock_checker.users.models import User
def crop_image(image_path: str, length: int = 500):
"""Makes image's thumbnail bt given parameters. By default, crops to 500x500"""
image = Image.open(image_path)
blob = BytesIO()
try:
if image.size[0] < image.size[1]:
# The image is in portrait mode. Height is bigger than width.
# This makes the width fit the LENGTH in pixels while conserving the ration.
resized_image = image.resize(
(length, int(image.size[1] * (length / image.size[0])))
)
# Amount of pixel to lose in total on the height of the image.
required_loss = resized_image.size[1] - length
# Crop the height of the image so as to keep the center part.
resized_image = resized_image.crop(
box=(
0,
int(required_loss / 2),
length,
int(resized_image.size[1] - required_loss / 2),
)
)
else:
# This image is in landscape mode or already squared. The width is bigger than the heihgt.
# This makes the height fit the LENGTH in pixels while conserving the ration.
resized_image = image.resize(
(int(image.size[0] * (length / image.size[1])), length)
)
# Amount of pixel to lose in total on the width of the image.
required_loss = resized_image.size[0] - length
# Crop the width of the image so as to keep 1080 pixels of the center part.
resized_image = resized_image.crop(
box=(
int(required_loss / 2),
0,
int(resized_image.size[0] - required_loss / 2),
length,
)
)
resized_image.save(blob, "PNG")
except OSError:
print("Can't crop")
return blob
def user_file_upload_mixin(instance, filename):
"""stores user uploaded files at their folder in media dir"""
username = ""
if isinstance(instance, User):
username = instance.username
elif hasattr(instance, "user"):
username = instance.user.username
elif hasattr(instance, "creator"):
username = instance.creator.username
return os.path.join(f"uploads/{username}/", filename)
def get_filename(filename, request):
return filename.upper()

View File

@ -0,0 +1,15 @@
import random
import string
def generate_charset(length: int) -> str:
"""Generate a random string of characters of a given length."""
return "".join(random.choice(string.ascii_letters) for _ in range(length))
def _rand255():
return random.randint(0, 255)
def generate_hex_color() -> str:
return f"#{_rand255():02X}{_rand255():02X}{_rand255():02X}"

View File

@ -0,0 +1,27 @@
def number_to_base(n: int, b: int) -> list[int]:
if n == 0:
return [0]
digits = []
while n:
digits.append(int(n % b))
n //= b
return digits[::-1]
def to_base(n: int, base: list) -> str:
if n == 0:
return ""
b = len(base)
res = ""
while n:
res += base[int(n % b)]
n //= b
return res[::-1]
def from_base(expr: str, base: int) -> int:
return sum(
[int(character) * base**index for index, character in enumerate(expr[::-1])]
)

View File

@ -0,0 +1,8 @@
import re
CLEANR = re.compile("<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});")
def cleanhtml(raw_html):
cleantext = re.sub(CLEANR, "", raw_html)
return cleantext

15
dock_checker/utils/zip.py Normal file
View File

@ -0,0 +1,15 @@
import zipfile
from django import forms
from .validators import validate_zip
class ZipfileField(forms.FileField):
file_validators = [validate_zip]
def to_python(self, value):
value = super().to_python(value)
for validator in self.file_validators:
validator(value)
return zipfile.ZipFile(value)

68
local.yml Normal file
View File

@ -0,0 +1,68 @@
version: '3'
volumes:
dock_checker_local_postgres_data: {}
dock_checker_local_postgres_data_backups: {}
services:
django: &django
build:
context: .
dockerfile: ./compose/local/django/Dockerfile
image: dock_checker_local_django
container_name: dock_checker_local_django
depends_on:
- postgres
- redis
volumes:
- .:/app:z
env_file:
- ./.envs/.local/.django
- ./.envs/.local/.postgres
ports:
- "8000:8000"
command: /start
postgres:
build:
context: .
dockerfile: ./compose/production/postgres/Dockerfile
image: dock_checker_production_postgres
container_name: dock_checker_local_postgres
volumes:
- dock_checker_local_postgres_data:/var/lib/postgresql/data
- dock_checker_local_postgres_data_backups:/backups
env_file:
- ./.envs/.local/.postgres
redis:
image: redis:6
container_name: dock_checker_local_redis
celeryworker:
<<: *django
image: dock_checker_local_celeryworker
container_name: dock_checker_local_celeryworker
depends_on:
- redis
- postgres
ports: []
command: /start-celeryworker
celerybeat:
<<: *django
image: dock_checker_local_celerybeat
container_name: dock_checker_local_celerybeat
depends_on:
- redis
- postgres
ports: []
command: /start-celerybeat
flower:
<<: *django
image: dock_checker_local_flower
container_name: dock_checker_local_flower
ports:
- "5555:5555"
command: /start-flower

6
locale/README.rst Normal file
View File

@ -0,0 +1,6 @@
Translations
============
Translations will be placed in this folder when running::
python manage.py makemessages

31
manage.py Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
import os
import sys
from pathlib import Path
if __name__ == "__main__":
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")
try:
from django.core.management import execute_from_command_line
except ImportError:
# The above import may fail for some other reason. Ensure that the
# issue is really that Django is missing to avoid masking other
# exceptions on Python 2.
try:
import django # noqa
except ImportError:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
)
raise
# This allows easy placement of apps within the interior
# dock_checker directory.
current_path = Path(__file__).parent.resolve()
sys.path.append(str(current_path / "dock_checker"))
execute_from_command_line(sys.argv)

0
ml/__init__.py Normal file
View File

BIN
ml/checkpoints/models.pkl Normal file

Binary file not shown.

226
ml/main.py Normal file
View File

@ -0,0 +1,226 @@
import re
import pickle
import warnings
import numpy as np
import pandas as pd
import Levenshtein as lev
from catboost import Pool
from pdfminer.high_level import extract_pages
from tqdm import tqdm
from pdfminer.layout import LTTextContainer, LTChar
warnings.filterwarnings("ignore")
def extract_test_features(file):
texts = []
fonts = []
squares = []
ids = []
coords = []
relative_coords = []
for page_layout in extract_pages(file):
_x1, _y1, _x2, _y2 = page_layout.bbox
for i, element in enumerate(page_layout):
if isinstance(element, LTTextContainer):
text = element.get_text().replace("\n", "")
if "(cid:" in text:
return "Неправильная кодировка файла", False
if text.split() != [] and len(text) > 4:
texts.append(text)
end = False
for text_line in element:
if end:
break
for character in text_line:
if isinstance(character, LTChar):
if "bold" in character.fontname.lower():
fonts.append(1)
elif "italic" in character.fontname.lower():
fonts.append(2)
else:
fonts.append(0)
end = True
break
x1, y1, x2, y2 = element.bbox
coords.append([x1, y1, x2, y2])
relative_coords.append(
[x1 / _x2, y1 / _y2, (x2 - x1) / _x2, (y2 - y1) / _y2]
)
squares.append((int(x2) - int(x1)) * (int(y2) - int(y1)))
match = re.search(r"LTTextBoxHorizontal\((\d+)\)", str(element))
if match:
id = int(match.group(1))
ids.append(id)
break
if not texts:
return "Файл состоит из сканов", False
if len(texts) < 3:
return "Главная страница состоит из сканов", False
if len(texts) > 25:
return "Произошла ошибка", False
test_df = pd.DataFrame(
{
"text": texts,
"font": fonts,
"file": file,
"squares": squares,
"ids": ids,
"coords": coords,
"relative_coords": relative_coords,
}
)
return test_df, True
def create_test_features(df):
df["len_of_text"] = df["text"].apply(len)
# df['len_of_text'] = df['text'].apply(lambda x: len(x.split()))
df["rank"] = (
df.groupby("file")["len_of_text"]
.rank(ascending=False, method="min")
.astype(int)
)
df["rank_squares"] = (
df.groupby("file")["squares"].rank(ascending=False, method="min").astype(int)
)
df["font"] = df["font"].astype(
object
) # Convert boolean to int for computation, True will be 1 and False will be 0
df["bold"] = (df["font"] == 1).astype(int)
df["bold_percentage"] = (
df.groupby("file")["font"].transform(lambda x: x.mean() * 100).astype(int)
)
df["id_percentage"] = (
df.groupby("file")["ids"].transform(lambda x: (x / x.max()) * 100).astype(int)
)
return df
def inference_models(checkpoint_name, test_df):
columns_to_use = [
"font",
"rank",
"rank_squares",
"bold_percentage",
"id_percentage",
]
with open(checkpoint_name, "rb") as f:
models = pickle.load(f)
test_pool = Pool(data=test_df[columns_to_use])
preds = []
for model in models:
preds.append(model.predict_proba(test_pool)[:, 1])
test_df["pred"] = np.mean(preds, axis=0)
return test_df, test_df.loc[test_df["pred"].idxmax(), "text"].strip()
def calculate_distances(target, list_of_strings):
target_length = len(target.split())
distances = {}
for string in list_of_strings:
string_words = string.split()
# If the string has at least as many words as the target
if len(string_words) >= target_length:
for i in range(len(string_words) - target_length + 1):
window = " ".join(string_words[i : i + target_length])
distance = lev.distance(target, window)
# Save the distance for this window
distances[window] = (distance / len(target)) * 100
else:
# If the string has fewer words than the target
distance = lev.distance(target, string)
distances[string] = (distance / len(target)) * 100
return distances
def replace_multiple_spaces(text):
return re.sub(" +", " ", text)
def get_matches(file, target):
result = []
for i, page_layout in enumerate(tqdm(extract_pages(file))):
_x1, _y1, _x2, _y2 = page_layout.bbox
texts = []
relative_coords = []
for element in page_layout:
if isinstance(element, LTTextContainer):
# print(element.get_text())
x1, y1, x2, y2 = element.bbox
relative_coords.append(
[x1 / _x2, y1 / _y2, (x2 - x1) / _x2, (y2 - y1) / _y2]
)
texts.append(
replace_multiple_spaces(element.get_text().replace("\n", ""))
)
distances = calculate_distances(target, texts)
for window, distance in distances.items():
if distance / len(target) < 0.2:
# print(i)
# print(window)
for j in range(len(texts)):
if window in texts[j]:
rel_coord = relative_coords[j]
break
result.append(
{
"page": i + 1,
"window": window,
"coordinates": rel_coord,
"distance": distance / len(target),
}
)
return result
# if __name__ == "__main__":
# file = "some.pdf"
# columns_to_use = [
# "font",
# "rank",
# "rank_squares",
# "bold_percentage",
# "id_percentage",
# ]
# checkpoint_name = "checkpoints/models.pkl"
#
# test_df, result = extract_test_features(file)
#
# if isinstance(test_df, pd.DataFrame):
# test_df = create_test_features(test_df)
# else:
# print(result)
#
# _, target = inference_models(checkpoint_name, test_df, columns_to_use)
#
# result = []
# for page_layout in tqdm(extract_pages(file)):
# texts = []
# for element in page_layout:
# if isinstance(element, LTTextContainer):
# texts.append(element.get_text().replace("\n", ""))
# distances = calculate_distances(target, texts)
#
# for window, distance in distances.items():
# if distance < 20:
# result.append(window)

5
ml/requirements.txt Normal file
View File

@ -0,0 +1,5 @@
catboost
Levenshtein
pdfminer.six
numpy
pandas

4156
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

63
production.yml Normal file
View File

@ -0,0 +1,63 @@
version: '3'
volumes:
production_postgres_data: {}
production_postgres_data_backups: {}
production_traefik: {}
services:
django: &django
build:
context: .
dockerfile: ./compose/production/django/Dockerfile
image: dock_checker_production_django
depends_on:
- postgres
- redis
env_file:
- ./.envs/.production/.django
- ./.envs/.production/.postgres
command: /start
postgres:
build:
context: .
dockerfile: ./compose/production/postgres/Dockerfile
image: dock_checker_production_postgres
volumes:
- production_postgres_data:/var/lib/postgresql/data
- production_postgres_data_backups:/backups
env_file:
- ./.envs/.production/.postgres
traefik:
build:
context: .
dockerfile: ./compose/production/traefik/Dockerfile
image: dock_checker_production_traefik
depends_on:
- django
volumes:
- production_traefik:/etc/traefik/acme
ports:
- "0.0.0.0:80:80"
- "0.0.0.0:443:443"
- "0.0.0.0:5555:5555"
redis:
image: redis:6
celeryworker:
<<: *django
image: dock_checker_production_celeryworker
command: /start-celeryworker
celerybeat:
<<: *django
image: dock_checker_production_celerybeat
command: /start-celerybeat
flower:
<<: *django
image: dock_checker_production_flower
command: /start-flower

Some files were not shown because too many files have changed in this diff Show More