mirror of
https://github.com/Alexander-D-Karpov/akarpov
synced 2024-11-24 03:13:43 +03:00
Compare commits
1 Commits
db88ec65e1
...
8c7d2cded6
Author | SHA1 | Date | |
---|---|---|---|
|
8c7d2cded6 |
|
@ -1,51 +0,0 @@
|
||||||
import pycld2 as cld2
|
|
||||||
import spacy
|
|
||||||
import torch
|
|
||||||
from transformers import AutoModel, AutoTokenizer
|
|
||||||
|
|
||||||
# load ml classes and models on first request
|
|
||||||
# TODO: move to outer server/service
|
|
||||||
nlp = None
|
|
||||||
ru_nlp = None
|
|
||||||
|
|
||||||
ru_model = None
|
|
||||||
ru_tokenizer = None
|
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
|
|
||||||
|
|
||||||
def get_text_embedding(text: str):
|
|
||||||
global nlp, ru_nlp, ru_model, ru_tokenizer
|
|
||||||
|
|
||||||
is_reliable, text_bytes_found, details = cld2.detect(text)
|
|
||||||
if is_reliable:
|
|
||||||
lang = details[0]
|
|
||||||
if lang[1] in ["ru", "en"]:
|
|
||||||
lang = lang[1]
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if lang == "ru":
|
|
||||||
if not ru_nlp:
|
|
||||||
ru_nlp = spacy.load("ru_core_news_md", disable=["parser", "ner"])
|
|
||||||
lema = " ".join([token.lemma_ for token in ru_nlp(text)])
|
|
||||||
if not ru_model:
|
|
||||||
ru_model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased")
|
|
||||||
if not ru_tokenizer:
|
|
||||||
ru_tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
|
|
||||||
encodings = ru_tokenizer(
|
|
||||||
lema, # the texts to be tokenized
|
|
||||||
padding=True, # pad the texts to the maximum length (so that all outputs have the same length)
|
|
||||||
return_tensors="pt", # return the tensors (not lists)
|
|
||||||
)
|
|
||||||
with torch.no_grad():
|
|
||||||
# get the model embeddings
|
|
||||||
embeds = ru_model(**encodings)
|
|
||||||
embeds = embeds[0]
|
|
||||||
elif lang == "en":
|
|
||||||
embeds = None
|
|
||||||
else:
|
|
||||||
embeds = None
|
|
||||||
|
|
||||||
return embeds
|
|
|
@ -1,10 +0,0 @@
|
||||||
from django.db import migrations
|
|
||||||
from pgvector.django import VectorExtension
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
dependencies = [
|
|
||||||
("files", "0024_alter_file_options_alter_filereport_options_and_more"),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [VectorExtension()]
|
|
|
@ -1,18 +0,0 @@
|
||||||
# Generated by Django 4.2.5 on 2023-09-16 18:33
|
|
||||||
|
|
||||||
from django.db import migrations
|
|
||||||
import pgvector.django
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
dependencies = [
|
|
||||||
("files", "0025_create_vector_ps"),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name="file",
|
|
||||||
name="embeddings",
|
|
||||||
field=pgvector.django.VectorField(dimensions=768, null=True),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -1,12 +0,0 @@
|
||||||
# Generated by Django 4.2.5 on 2023-09-25 17:23
|
|
||||||
|
|
||||||
from django.db import migrations
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
dependencies = [
|
|
||||||
("files", "0025_file_notify_user_on_view_alter_basefileitem_parent"),
|
|
||||||
("files", "0026_file_embeddings"),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = []
|
|
|
@ -1,26 +0,0 @@
|
||||||
# Generated by Django 4.2.5 on 2023-09-26 09:04
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
dependencies = [
|
|
||||||
("files", "0027_merge_20230925_2023"),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name="file",
|
|
||||||
name="content",
|
|
||||||
field=models.TextField(default="", max_length=10000),
|
|
||||||
preserve_default=False,
|
|
||||||
),
|
|
||||||
migrations.AddField(
|
|
||||||
model_name="file",
|
|
||||||
name="lang",
|
|
||||||
field=models.CharField(
|
|
||||||
choices=[("ru", "ru"), ("en", "en")], default="en", max_length=2
|
|
||||||
),
|
|
||||||
preserve_default=False,
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -17,7 +17,6 @@
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
from model_utils.fields import AutoCreatedField, AutoLastModifiedField
|
from model_utils.fields import AutoCreatedField, AutoLastModifiedField
|
||||||
from model_utils.models import TimeStampedModel
|
from model_utils.models import TimeStampedModel
|
||||||
from pgvector.django import VectorField
|
|
||||||
from polymorphic.models import PolymorphicModel
|
from polymorphic.models import PolymorphicModel
|
||||||
|
|
||||||
from akarpov.files.services.files import trash_file_upload, user_unique_file_upload
|
from akarpov.files.services.files import trash_file_upload, user_unique_file_upload
|
||||||
|
@ -70,9 +69,6 @@ class File(BaseFileItem, TimeStampedModel, ShortLinkModel, UserHistoryModel):
|
||||||
|
|
||||||
preview = FileField(blank=True, upload_to="file/previews/")
|
preview = FileField(blank=True, upload_to="file/previews/")
|
||||||
file_obj = FileField(blank=False, upload_to=user_unique_file_upload)
|
file_obj = FileField(blank=False, upload_to=user_unique_file_upload)
|
||||||
embeddings = VectorField(dimensions=768, null=True)
|
|
||||||
content = TextField(max_length=10000)
|
|
||||||
lang = CharField(max_length=2, choices=[("ru", "ru"), ("en", "en")])
|
|
||||||
|
|
||||||
# meta
|
# meta
|
||||||
name = CharField(max_length=255, null=True, blank=True)
|
name = CharField(max_length=255, null=True, blank=True)
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
import textract
|
|
||||||
|
|
||||||
|
|
||||||
def extract_file_text(file: str) -> str:
|
|
||||||
text = textract.process(file)
|
|
||||||
|
|
||||||
return text
|
|
|
@ -132,10 +132,6 @@
|
||||||
<footer class="row bg-light py-1 mt-auto text-center">
|
<footer class="row bg-light py-1 mt-auto text-center">
|
||||||
<div class="col"> Writen by <a href="/about">sanspie</a>, find source code <a href="https://github.com/Alexander-D-Karpov/akarpov">here</a> </div>
|
<div class="col"> Writen by <a href="/about">sanspie</a>, find source code <a href="https://github.com/Alexander-D-Karpov/akarpov">here</a> </div>
|
||||||
</footer>
|
</footer>
|
||||||
<div id="toastContainer" class="toast-container position-fixed bottom-0 end-0 p-3">
|
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -146,63 +142,18 @@
|
||||||
{% endblock inline_javascript %}
|
{% endblock inline_javascript %}
|
||||||
{% if request.user.is_authenticated %}
|
{% if request.user.is_authenticated %}
|
||||||
<script>
|
<script>
|
||||||
{% if request.is_secure %}
|
{# TODO: add automatic socket host retrieve #}
|
||||||
let socket = new WebSocket(`wss://{{ request.get_host }}/ws/notifications/`);
|
let socket = new WebSocket(`ws://127.0.0.1:8000/ws/notifications/`);
|
||||||
{% else %}
|
|
||||||
let socket = new WebSocket(`ws://{{ request.get_host }}/ws/notifications/`);
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
function sleep(ms) {
|
function sleep(ms) {
|
||||||
return new Promise(resolve => setTimeout(resolve, ms));
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
function timeSince(date) {
|
|
||||||
let seconds = Math.floor((new Date() - date) / 1000);
|
|
||||||
let interval = seconds / 31536000;
|
|
||||||
if (interval > 1) {
|
|
||||||
return Math.floor(interval) + " years";
|
|
||||||
}
|
|
||||||
interval = seconds / 2592000;
|
|
||||||
if (interval > 1) {
|
|
||||||
return Math.floor(interval) + " months";
|
|
||||||
}
|
|
||||||
interval = seconds / 86400;
|
|
||||||
if (interval > 1) {
|
|
||||||
return Math.floor(interval) + " days";
|
|
||||||
}
|
|
||||||
interval = seconds / 3600;
|
|
||||||
if (interval > 1) {
|
|
||||||
return Math.floor(interval) + " hours";
|
|
||||||
}
|
|
||||||
interval = seconds / 60;
|
|
||||||
if (interval > 1) {
|
|
||||||
return Math.floor(interval) + " minutes";
|
|
||||||
}
|
|
||||||
return Math.floor(seconds) + " seconds";
|
|
||||||
}
|
|
||||||
|
|
||||||
const toastContainer = document.getElementById('toastContainer')
|
|
||||||
|
|
||||||
|
|
||||||
let fn = async function(event) {
|
let fn = async function(event) {
|
||||||
let data = JSON.parse(event.data)
|
let data = JSON.parse(event.data)
|
||||||
const toast = document.createElement("div")
|
console.log(data)
|
||||||
toast.id = "liveToast"
|
alert(data.body)
|
||||||
toast.className = "toast mb-4 ml-2"
|
{# TODO add pretty pop up #}
|
||||||
toast.setAttribute("role", "alert")
|
|
||||||
toast.setAttribute("aria-live", "assertive")
|
|
||||||
toast.setAttribute("aria-atomic", "true")
|
|
||||||
toast.innerHTML = `<div class="toast-header">
|
|
||||||
<strong class="me-auto">${data.title}</strong>
|
|
||||||
<small>${timeSince(Date.parse(data.created))} ago</small>
|
|
||||||
<button type="button" class="btn-close" data-bs-dismiss="toast" aria-label="Close"></button>
|
|
||||||
</div>
|
|
||||||
<div class="toast-body">
|
|
||||||
${data.body}
|
|
||||||
</div>`
|
|
||||||
toastContainer.appendChild(toast)
|
|
||||||
const toastBootstrap = bootstrap.Toast.getOrCreateInstance(toast)
|
|
||||||
toastBootstrap.show()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
socket.onmessage = fn
|
socket.onmessage = fn
|
||||||
|
|
|
@ -28,8 +28,6 @@ RUN apt-get update && \
|
||||||
apt-get install -y build-essential libpq-dev gettext libmagic-dev libjpeg-dev zlib1g-dev && \
|
apt-get install -y build-essential libpq-dev gettext libmagic-dev libjpeg-dev zlib1g-dev && \
|
||||||
# Dependencies for file preview generation
|
# Dependencies for file preview generation
|
||||||
apt-get install -y webp libimage-exiftool-perl libmagickwand-dev ffmpeg libgdal-dev && \
|
apt-get install -y webp libimage-exiftool-perl libmagickwand-dev ffmpeg libgdal-dev && \
|
||||||
# ML dependencies \
|
|
||||||
# none for now
|
|
||||||
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
|
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
FROM postgres:16
|
FROM postgres:16
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y postgresql-14-pgvector && \
|
|
||||||
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
COPY ./compose/production/postgres/maintenance /usr/local/bin/maintenance
|
COPY ./compose/production/postgres/maintenance /usr/local/bin/maintenance
|
||||||
RUN chmod +x /usr/local/bin/maintenance/*
|
RUN chmod +x /usr/local/bin/maintenance/*
|
||||||
RUN mv /usr/local/bin/maintenance/* /usr/local/bin \
|
RUN mv /usr/local/bin/maintenance/* /usr/local/bin \
|
||||||
|
|
|
@ -61,5 +61,5 @@
|
||||||
|
|
||||||
# SHORTENER
|
# SHORTENER
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
SHORTENER_REDIRECT_TO = "https://dev2.akarpov.ru"
|
SHORTENER_REDIRECT_TO = "http://127.0.0.1:8000"
|
||||||
SHORTENER_HOST = "https://dev.akarpov.ru"
|
SHORTENER_HOST = "http://127.0.0.1:3000"
|
||||||
|
|
383
poetry.lock
generated
383
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
|
@ -107,8 +107,6 @@ pytest-xdist = "^3.3.1"
|
||||||
pytest-mock = "^3.11.1"
|
pytest-mock = "^3.11.1"
|
||||||
pytest-asyncio = "^0.21.1"
|
pytest-asyncio = "^0.21.1"
|
||||||
pytest-lambda = "^2.2.0"
|
pytest-lambda = "^2.2.0"
|
||||||
pgvector = "^0.2.2"
|
|
||||||
pycld2 = "^0.41"
|
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user