Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
8c7d2cded6
Bump postgres from 14 to 16 in /compose/production/postgres
Bumps postgres from 14 to 16.

---
updated-dependencies:
- dependency-name: postgres
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-09-25 17:22:47 +00:00
14 changed files with 342 additions and 243 deletions

View File

@ -1,51 +0,0 @@
import pycld2 as cld2
import spacy
import torch
from transformers import AutoModel, AutoTokenizer
# load ml classes and models on first request
# TODO: move to outer server/service
nlp = None
ru_nlp = None
ru_model = None
ru_tokenizer = None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def get_text_embedding(text: str):
global nlp, ru_nlp, ru_model, ru_tokenizer
is_reliable, text_bytes_found, details = cld2.detect(text)
if is_reliable:
lang = details[0]
if lang[1] in ["ru", "en"]:
lang = lang[1]
else:
return None
else:
return None
if lang == "ru":
if not ru_nlp:
ru_nlp = spacy.load("ru_core_news_md", disable=["parser", "ner"])
lema = " ".join([token.lemma_ for token in ru_nlp(text)])
if not ru_model:
ru_model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased")
if not ru_tokenizer:
ru_tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
encodings = ru_tokenizer(
lema, # the texts to be tokenized
padding=True, # pad the texts to the maximum length (so that all outputs have the same length)
return_tensors="pt", # return the tensors (not lists)
)
with torch.no_grad():
# get the model embeddings
embeds = ru_model(**encodings)
embeds = embeds[0]
elif lang == "en":
embeds = None
else:
embeds = None
return embeds

View File

@ -1,10 +0,0 @@
from django.db import migrations
from pgvector.django import VectorExtension
class Migration(migrations.Migration):
dependencies = [
("files", "0024_alter_file_options_alter_filereport_options_and_more"),
]
operations = [VectorExtension()]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.2.5 on 2023-09-16 18:33
from django.db import migrations
import pgvector.django
class Migration(migrations.Migration):
dependencies = [
("files", "0025_create_vector_ps"),
]
operations = [
migrations.AddField(
model_name="file",
name="embeddings",
field=pgvector.django.VectorField(dimensions=768, null=True),
),
]

View File

@ -1,12 +0,0 @@
# Generated by Django 4.2.5 on 2023-09-25 17:23
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("files", "0025_file_notify_user_on_view_alter_basefileitem_parent"),
("files", "0026_file_embeddings"),
]
operations = []

View File

@ -1,26 +0,0 @@
# Generated by Django 4.2.5 on 2023-09-26 09:04
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("files", "0027_merge_20230925_2023"),
]
operations = [
migrations.AddField(
model_name="file",
name="content",
field=models.TextField(default="", max_length=10000),
preserve_default=False,
),
migrations.AddField(
model_name="file",
name="lang",
field=models.CharField(
choices=[("ru", "ru"), ("en", "en")], default="en", max_length=2
),
preserve_default=False,
),
]

View File

@ -17,7 +17,6 @@
from django.urls import reverse
from model_utils.fields import AutoCreatedField, AutoLastModifiedField
from model_utils.models import TimeStampedModel
from pgvector.django import VectorField
from polymorphic.models import PolymorphicModel
from akarpov.files.services.files import trash_file_upload, user_unique_file_upload
@ -70,9 +69,6 @@ class File(BaseFileItem, TimeStampedModel, ShortLinkModel, UserHistoryModel):
preview = FileField(blank=True, upload_to="file/previews/")
file_obj = FileField(blank=False, upload_to=user_unique_file_upload)
embeddings = VectorField(dimensions=768, null=True)
content = TextField(max_length=10000)
lang = CharField(max_length=2, choices=[("ru", "ru"), ("en", "en")])
# meta
name = CharField(max_length=255, null=True, blank=True)

View File

@ -1,7 +0,0 @@
import textract
def extract_file_text(file: str) -> str:
text = textract.process(file)
return text

View File

@ -132,10 +132,6 @@
<footer class="row bg-light py-1 mt-auto text-center">
<div class="col"> Writen by <a href="/about">sanspie</a>, find source code <a href="https://github.com/Alexander-D-Karpov/akarpov">here</a> </div>
</footer>
<div id="toastContainer" class="toast-container position-fixed bottom-0 end-0 p-3">
</div>
</div>
</div>
</div>
@ -146,63 +142,18 @@
{% endblock inline_javascript %}
{% if request.user.is_authenticated %}
<script>
{% if request.is_secure %}
let socket = new WebSocket(`wss://{{ request.get_host }}/ws/notifications/`);
{% else %}
let socket = new WebSocket(`ws://{{ request.get_host }}/ws/notifications/`);
{% endif %}
{# TODO: add automatic socket host retrieve #}
let socket = new WebSocket(`ws://127.0.0.1:8000/ws/notifications/`);
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function timeSince(date) {
let seconds = Math.floor((new Date() - date) / 1000);
let interval = seconds / 31536000;
if (interval > 1) {
return Math.floor(interval) + " years";
}
interval = seconds / 2592000;
if (interval > 1) {
return Math.floor(interval) + " months";
}
interval = seconds / 86400;
if (interval > 1) {
return Math.floor(interval) + " days";
}
interval = seconds / 3600;
if (interval > 1) {
return Math.floor(interval) + " hours";
}
interval = seconds / 60;
if (interval > 1) {
return Math.floor(interval) + " minutes";
}
return Math.floor(seconds) + " seconds";
}
const toastContainer = document.getElementById('toastContainer')
let fn = async function(event) {
let data = JSON.parse(event.data)
const toast = document.createElement("div")
toast.id = "liveToast"
toast.className = "toast mb-4 ml-2"
toast.setAttribute("role", "alert")
toast.setAttribute("aria-live", "assertive")
toast.setAttribute("aria-atomic", "true")
toast.innerHTML = `<div class="toast-header">
<strong class="me-auto">${data.title}</strong>
<small>${timeSince(Date.parse(data.created))} ago</small>
<button type="button" class="btn-close" data-bs-dismiss="toast" aria-label="Close"></button>
</div>
<div class="toast-body">
${data.body}
</div>`
toastContainer.appendChild(toast)
const toastBootstrap = bootstrap.Toast.getOrCreateInstance(toast)
toastBootstrap.show()
console.log(data)
alert(data.body)
{# TODO add pretty pop up #}
}
socket.onmessage = fn

View File

@ -28,8 +28,6 @@ RUN apt-get update && \
apt-get install -y build-essential libpq-dev gettext libmagic-dev libjpeg-dev zlib1g-dev && \
# Dependencies for file preview generation
apt-get install -y webp libimage-exiftool-perl libmagickwand-dev ffmpeg libgdal-dev && \
# ML dependencies \
# none for now
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
rm -rf /var/lib/apt/lists/*

View File

@ -1,10 +1,5 @@
FROM postgres:16
RUN apt-get update && \
apt-get install -y postgresql-14-pgvector && \
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
rm -rf /var/lib/apt/lists/*
COPY ./compose/production/postgres/maintenance /usr/local/bin/maintenance
RUN chmod +x /usr/local/bin/maintenance/*
RUN mv /usr/local/bin/maintenance/* /usr/local/bin \

View File

@ -61,5 +61,5 @@
# SHORTENER
# ------------------------------------------------------------------------------
SHORTENER_REDIRECT_TO = "https://dev2.akarpov.ru"
SHORTENER_HOST = "https://dev.akarpov.ru"
SHORTENER_REDIRECT_TO = "http://127.0.0.1:8000"
SHORTENER_HOST = "http://127.0.0.1:3000"

383
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -107,8 +107,6 @@ pytest-xdist = "^3.3.1"
pytest-mock = "^3.11.1"
pytest-asyncio = "^0.21.1"
pytest-lambda = "^2.2.0"
pgvector = "^0.2.2"
pycld2 = "^0.41"
[build-system]