Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
b5c0ceb340
Merge 7d359c56dc into 2b2c16db2d 2024-03-31 02:46:13 +05:00
41 changed files with 5884 additions and 3923 deletions

View File

@ -15,5 +15,3 @@ LAST_FM_SECRET=
SPOTIFY_ID= SPOTIFY_ID=
SPOTIFY_SECRET= SPOTIFY_SECRET=
YANDEX_TOKEN= YANDEX_TOKEN=
PREVIEW_SERVICE_API_KEY=
PREVIEW_SERVICE_URL=

View File

@ -24,10 +24,10 @@ jobs:
steps: steps:
- name: Checkout Code Repository - name: Checkout Code Repository
uses: actions/checkout@v4.2.1 uses: actions/checkout@v3
- name: Cache packages - name: Cache packages
uses: actions/cache@v4.1.1 uses: actions/cache@v3
id: cache-packages id: cache-packages
with: with:
path: "~/packages/" path: "~/packages/"
@ -45,11 +45,11 @@ jobs:
sudo dpkg -L libimage-exiftool-perl libmagickwand-dev | while IFS= read -r f; do if test -f $f; then echo $f; fi; done | xargs cp --parents --target-directory ~/packages/ sudo dpkg -L libimage-exiftool-perl libmagickwand-dev | while IFS= read -r f; do if test -f $f; then echo $f; fi; done | xargs cp --parents --target-directory ~/packages/
fi fi
- uses: actions/checkout@v4.2.1 - uses: actions/checkout@v3
- name: Install poetry - name: Install poetry
run: pipx install poetry run: pipx install poetry
- uses: actions/setup-python@v5.2.0 - uses: actions/setup-python@v5
with: with:
python-version: '3.11' python-version: '3.11'
cache: 'poetry' cache: 'poetry'
@ -64,10 +64,7 @@ jobs:
steps: steps:
- name: Checkout Code Repository - name: Checkout Code Repository
uses: actions/checkout@v4.2.1 uses: actions/checkout@v3
- name: Install Docker Compose
run: sudo apt-get update && sudo apt-get install -y docker-compose
- name: Build the Stack - name: Build the Stack
run: docker-compose -f local.yml build run: docker-compose -f local.yml build

View File

@ -28,6 +28,10 @@ $ uvicorn redirect.app:app --reload
```shell ```shell
$ docker-compose -f local.yml up $ docker-compose -f local.yml up
``` ```
Install file preview dependencies
```shell
$ docker-compose -f local.yml exec django /install_preview_dependencies
```
- server - http://127.0.0.1:8000 - server - http://127.0.0.1:8000
- mail - http://127.0.0.1:8025 - mail - http://127.0.0.1:8025

View File

51
akarpov/common/ml/text.py Normal file
View File

@ -0,0 +1,51 @@
import pycld2 as cld2
import spacy
import torch
from transformers import AutoModel, AutoTokenizer
# load ml classes and models on first request
# TODO: move to outer server/service
nlp = None
ru_nlp = None
ru_model = None
ru_tokenizer = None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def get_text_embedding(text: str):
global nlp, ru_nlp, ru_model, ru_tokenizer
is_reliable, text_bytes_found, details = cld2.detect(text)
if is_reliable:
lang = details[0]
if lang[1] in ["ru", "en"]:
lang = lang[1]
else:
return None
else:
return None
if lang == "ru":
if not ru_nlp:
ru_nlp = spacy.load("ru_core_news_md", disable=["parser", "ner"])
lema = " ".join([token.lemma_ for token in ru_nlp(text)])
if not ru_model:
ru_model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased")
if not ru_tokenizer:
ru_tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
encodings = ru_tokenizer(
lema, # the texts to be tokenized
padding=True, # pad the texts to the maximum length (so that all outputs have the same length)
return_tensors="pt", # return the tensors (not lists)
)
with torch.no_grad():
# get the model embeddings
embeds = ru_model(**encodings)
embeds = embeds[0]
elif lang == "en":
embeds = None
else:
embeds = None
return embeds

View File

@ -1,10 +1,16 @@
import textract
from akarpov.files.models import File from akarpov.files.models import File
def view(file: File): def view(file: File):
static = "" static = ""
content = "" content = ""
text = file.content.replace("\t", " ") text = (
textract.process(file.file.path, extension="doc", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
for line in text.split("\n"): for line in text.split("\n"):
content += f"<p class='mt-1'>{line}</p>" content += f"<p class='mt-1'>{line}</p>"
return static, content return static, content

View File

@ -1,10 +1,16 @@
import textract
from akarpov.files.models import File from akarpov.files.models import File
def view(file: File): def view(file: File):
static = "" static = ""
content = "" content = ""
text = file.content.replace("\t", " ") text = (
textract.process(file.file.path, extension="docx", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
for line in text.split("\n"): for line in text.split("\n"):
content += f"<p class='mt-1'>{line}</p>" content += f"<p class='mt-1'>{line}</p>"
return static, content return static, content

View File

@ -1,10 +1,16 @@
import textract
from akarpov.files.models import File from akarpov.files.models import File
def view(file: File): def view(file: File):
static = "" static = ""
content = "" content = ""
text = file.content.replace("\t", " ") text = (
textract.process(file.file.path, extension="odt", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
for line in text.split("\n"): for line in text.split("\n"):
content += f"<p class='mt-1'>{line}</p>" content += f"<p class='mt-1'>{line}</p>"
return static, content return static, content

View File

@ -1,3 +1,5 @@
import textract
from akarpov.files.models import File from akarpov.files.models import File
@ -5,7 +7,11 @@ def view(file: File) -> (str, str):
static = f""" static = f"""
<meta property="og:title" content="{file.name}" /> <meta property="og:title" content="{file.name}" />
""" """
text = file.content.replace("\t", " ") text = (
textract.process(file.file.path, extension="ogg", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
content = ( content = (
""" """
<div id="waveform"> <div id="waveform">

View File

@ -0,0 +1,42 @@
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from pymorphy3 import MorphAnalyzer
# Set up stop words
english_stopwords = set(stopwords.words("english"))
russian_stopwords = set(stopwords.words("russian"))
# Set up lemmatizers
english_lemmatizer = None
russian_lemmatizer = None
def lemmatize_and_remove_stopwords(text, language="english"):
# Tokenize the text
global english_lemmatizer, russian_lemmatizer
tokens = word_tokenize(text)
# Lemmatize each token based on the specified language
lemmatized_tokens = []
for token in tokens:
if language == "russian":
if not russian_lemmatizer:
russian_lemmatizer = MorphAnalyzer()
lemmatized_token = russian_lemmatizer.parse(token)[0].normal_form
else: # Default to English
if not english_lemmatizer:
english_lemmatizer = WordNetLemmatizer()
lemmatized_token = english_lemmatizer.lemmatize(token)
lemmatized_tokens.append(lemmatized_token)
# Remove stop words
filtered_tokens = [
token
for token in lemmatized_tokens
if token not in english_stopwords and token not in russian_stopwords
]
# Reconstruct the text
filtered_text = " ".join(filtered_tokens)
return filtered_text

View File

@ -1,4 +1,8 @@
from math import ceil
import magic import magic
from PIL import Image, ImageDraw, ImageFont
from preview_generator.manager import PreviewManager
from akarpov.files.models import File from akarpov.files.models import File
@ -15,11 +19,90 @@
manager = None manager = None
def textfile_to_image(textfile_path) -> Image:
"""Convert text file to a grayscale image.
arguments:
textfile_path - the content of this file will be converted to an image
font_path - path to a font file (for example impact.ttf)
"""
# parse the file into lines stripped of whitespace on the right side
with open(textfile_path) as f:
lines = tuple(line.rstrip() for line in f.readlines())
font: ImageFont = None
large_font = 20 # get better resolution with larger size
for font_filename in COMMON_MONO_FONT_FILENAMES:
try:
font = ImageFont.truetype(font_filename, size=large_font)
print(f'Using font "{font_filename}".')
break
except OSError:
print(f'Could not load font "{font_filename}".')
if font is None:
font = ImageFont.load_default()
print("Using default font.")
def _font_points_to_pixels(pt):
return round(pt * 96.0 / 72)
margin_pixels = 20
# height of the background image
tallest_line = max(lines, key=lambda line: font.getsize(line)[PIL_HEIGHT_INDEX])
max_line_height = _font_points_to_pixels(
font.getsize(tallest_line)[PIL_HEIGHT_INDEX]
)
realistic_line_height = max_line_height * 0.8
image_height = int(ceil(realistic_line_height * len(lines) + 2 * margin_pixels))
widest_line = max(lines, key=lambda s: font.getsize(s)[PIL_WIDTH_INDEX])
max_line_width = _font_points_to_pixels(font.getsize(widest_line)[PIL_WIDTH_INDEX])
image_width = int(ceil(max_line_width + (2 * margin_pixels)))
# draw the background
background_color = 255 # white
image = Image.new(
PIL_GRAYSCALE, (image_width, image_height), color=background_color
)
draw = ImageDraw.Draw(image)
font_color = 0
horizontal_position = margin_pixels
for i, line in enumerate(lines):
vertical_position = int(round(margin_pixels + (i * realistic_line_height)))
draw.text(
(horizontal_position, vertical_position), line, fill=font_color, font=font
)
return image
def create_preview(file_path: str) -> str:
global manager
# TODO: add text image generation/code image
if not manager:
manager = PreviewManager(cache_path, create_folder=True)
if manager.has_jpeg_preview(file_path):
return manager.get_jpeg_preview(file_path, height=500)
return ""
def get_file_mimetype(file_path: str) -> str: def get_file_mimetype(file_path: str) -> str:
mime = magic.Magic(mime=True) mime = magic.Magic(mime=True)
return mime.from_file(file_path) return mime.from_file(file_path)
def get_description(file_path: str) -> str:
global manager
if not manager:
manager = PreviewManager(cache_path, create_folder=True)
if manager.has_text_preview(file_path):
return manager.get_text_preview(file_path)
return ""
def get_base_meta(file: File): def get_base_meta(file: File):
preview = file.preview.url if file.preview else "" preview = file.preview.url if file.preview else ""
description = file.description if file.description else "" description = file.description if file.description else ""

View File

@ -11,6 +11,12 @@
from akarpov.files.models import File from akarpov.files.models import File
from ..documents import FileDocument from ..documents import FileDocument
from .lema import lemmatize_and_remove_stopwords
"""
Calculus on types of searches:
https://new.akarpov.ru/files/FZUTFBIyfbdlDHVzxUNU
"""
class BaseSearch: class BaseSearch:
@ -134,20 +140,23 @@ class SimilaritySearch(BaseSearch):
def search(self, query: str) -> QuerySet[File]: def search(self, query: str) -> QuerySet[File]:
if self.queryset is None: if self.queryset is None:
raise ValueError("Queryset cannot be None for similarity search") raise ValueError("Queryset cannot be None for similarity search")
language = "russian" if re.search("[а-яА-Я]", query) else "english"
filtered_query = lemmatize_and_remove_stopwords(query, language=language)
queryset = ( queryset = (
self.queryset.annotate( self.queryset.annotate(
name_similarity=Coalesce( name_similarity=Coalesce(
TrigramSimilarity(UnaccentLower("name"), query), TrigramSimilarity(UnaccentLower("name"), filtered_query),
Value(0), Value(0),
output_field=FloatField(), output_field=FloatField(),
), ),
description_similarity=Coalesce( description_similarity=Coalesce(
TrigramSimilarity(UnaccentLower("description"), query), TrigramSimilarity(UnaccentLower("description"), filtered_query),
Value(0), Value(0),
output_field=FloatField(), output_field=FloatField(),
), ),
content_similarity=Coalesce( content_similarity=Coalesce(
TrigramSimilarity(UnaccentLower("content"), query), TrigramSimilarity(UnaccentLower("content"), filtered_query),
Value(0), Value(0),
output_field=FloatField(), output_field=FloatField(),
), ),

View File

@ -0,0 +1,18 @@
import chardet
import textract
from textract.exceptions import ExtensionNotSupported
def extract_file_text(file: str) -> str:
try:
text = textract.process(file)
except ExtensionNotSupported:
try:
rawdata = open(file, "rb").read()
enc = chardet.detect(rawdata)
with open(file, encoding=enc["encoding"]) as f:
text = f.read()
except Exception:
return ""
return text

View File

@ -1,69 +1,40 @@
import base64 import os
import time import time
from urllib.parse import urljoin
import requests
import structlog import structlog
from celery import shared_task from celery import shared_task
from django.conf import settings
from django.core import management from django.core import management
from django.core.files.base import ContentFile from django.core.files import File
from akarpov.files.models import File as FileModel from akarpov.files.models import File as FileModel
from akarpov.files.services.preview import create_preview, get_file_mimetype
from akarpov.files.services.text import extract_file_text
logger = structlog.get_logger(__name__) logger = structlog.get_logger(__name__)
def sanitize_content(content):
"""Remove NUL (0x00) characters from the content."""
if isinstance(content, str):
return content.replace("\x00", "")
elif isinstance(content, bytes):
return content.replace(b"\x00", b"")
return content
@shared_task() @shared_task()
def process_file(pk: int): def process_file(pk: int):
pth = None
file = FileModel.objects.get(pk=pk) file = FileModel.objects.get(pk=pk)
if not file.name: if not file.name:
file.name = file.file.name.split("/")[-1] file.name = file.file.name.split("/")[-1]
try: try:
api_url = urljoin(settings.PREVIEW_SERVICE_URL, "/process_file/") pth = create_preview(file.file.path)
if pth:
files = {"file": (file.name, file.file.open("rb"))} with open(pth, "rb") as f:
headers = { file.preview.save(
"X-API-Key": settings.PREVIEW_SERVICE_API_KEY, pth.split("/")[-1],
"Accept": "application/json", File(f),
} save=False,
)
response = requests.post(api_url, files=files, headers=headers)
if response.status_code != 200:
logger.error(f"Failed to process file {pk}: {response.text}")
return
result = response.json()
file.file_type = result["file_type"]
file.content = sanitize_content(result["content"])
if result["preview"]:
image_data = base64.b64decode(result["preview"])
file.preview.save(
f"{file.name}_preview.jpg", ContentFile(image_data), save=False
)
file.save()
logger.info(f"File {pk} processed successfully")
except Exception as e: except Exception as e:
logger.error(f"Error processing file {pk}: {str(e)}") logger.error(e)
finally: file.file_type = get_file_mimetype(file.file.path)
file.file.close() file.content = extract_file_text(file.file.path)
file.save(update_fields=["preview", "name", "file_type", "content"])
if pth and os.path.isfile(pth):
os.remove(pth)
return pk return pk

View File

@ -7,8 +7,6 @@
Album, Album,
AnonMusicUser, AnonMusicUser,
Author, Author,
MusicDraft,
MusicDraftFile,
Playlist, Playlist,
PlaylistSong, PlaylistSong,
Song, Song,
@ -380,61 +378,3 @@ class Meta:
"link": {"read_only": True}, "link": {"read_only": True},
"image": {"read_only": True}, "image": {"read_only": True},
} }
class AllSearchSerializer(serializers.Serializer):
songs = serializers.SerializerMethodField(method_name="get_songs")
authors = serializers.SerializerMethodField(method_name="get_authors")
albums = serializers.SerializerMethodField(method_name="get_albums")
@extend_schema_field(ListSongSerializer(many=True))
def get_songs(self, obj):
return ListSongSerializer(
Song.objects.cache().search(obj["query"]).to_queryset()[:10],
many=True,
context=self.context,
).data
@extend_schema_field(ListAuthorSerializer(many=True))
def get_authors(self, obj):
return ListAuthorSerializer(
Author.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data
@extend_schema_field(ListAlbumSerializer(many=True))
def get_albums(self, obj):
return ListAlbumSerializer(
Album.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data
class MusicDraftFileSerializer(serializers.ModelSerializer):
class Meta:
model = MusicDraftFile
fields = ["file", "original_name", "mime_type"]
class MusicDraftSerializer(serializers.ModelSerializer):
files = MusicDraftFileSerializer(many=True, read_only=True)
class Meta:
model = MusicDraft
fields = [
"id",
"status",
"provider",
"original_url",
"meta_data",
"file_token",
"created",
"updated",
"error_message",
"files",
]
read_only_fields = ["id", "file_token", "created", "updated"]
class MusicDraftCallbackSerializer(serializers.Serializer):
status = serializers.ChoiceField(choices=MusicDraft.STATUS_CHOICES)
meta_data = serializers.JSONField(required=False)
error_message = serializers.CharField(required=False)

View File

@ -16,14 +16,11 @@
ListSongPlaylistsAPIView, ListSongPlaylistsAPIView,
ListSongSlugsAPIView, ListSongSlugsAPIView,
ListUserListenedSongsAPIView, ListUserListenedSongsAPIView,
MusicDraftCallbackView,
MusicDraftDetailView,
RemoveSongFromPlaylistAPIView, RemoveSongFromPlaylistAPIView,
RetrieveUpdateDestroyAlbumAPIView, RetrieveUpdateDestroyAlbumAPIView,
RetrieveUpdateDestroyAuthorAPIView, RetrieveUpdateDestroyAuthorAPIView,
RetrieveUpdateDestroyPlaylistAPIView, RetrieveUpdateDestroyPlaylistAPIView,
RetrieveUpdateDestroySongAPIView, RetrieveUpdateDestroySongAPIView,
SearchAllAPIView,
) )
app_name = "music" app_name = "music"
@ -83,13 +80,4 @@
name="retrieve_update_delete_author", name="retrieve_update_delete_author",
), ),
path("anon/create/", CreateAnonMusicUserAPIView.as_view(), name="create-anon"), path("anon/create/", CreateAnonMusicUserAPIView.as_view(), name="create-anon"),
path("search/", SearchAllAPIView.as_view(), name="search_all"),
path(
"drafts/callback/<uuid:token>/",
MusicDraftCallbackView.as_view(),
name="draft-callback",
),
path(
"drafts/<str:file_token>/", MusicDraftDetailView.as_view(), name="draft-detail"
),
] ]

View File

@ -1,13 +1,11 @@
from drf_spectacular.utils import OpenApiExample, OpenApiParameter, extend_schema from drf_spectacular.utils import OpenApiExample, OpenApiParameter, extend_schema
from rest_framework import generics, permissions, status from rest_framework import generics, permissions
from rest_framework.generics import get_object_or_404
from rest_framework.response import Response from rest_framework.response import Response
from akarpov.common.api.pagination import StandardResultsSetPagination from akarpov.common.api.pagination import StandardResultsSetPagination
from akarpov.common.api.permissions import IsAdminOrReadOnly, IsCreatorOrReadOnly from akarpov.common.api.permissions import IsAdminOrReadOnly, IsCreatorOrReadOnly
from akarpov.music.api.serializers import ( from akarpov.music.api.serializers import (
AddSongToPlaylistSerializer, AddSongToPlaylistSerializer,
AllSearchSerializer,
AnonMusicUserSerializer, AnonMusicUserSerializer,
FullAlbumSerializer, FullAlbumSerializer,
FullAuthorSerializer, FullAuthorSerializer,
@ -19,23 +17,19 @@
ListPlaylistSerializer, ListPlaylistSerializer,
ListSongSerializer, ListSongSerializer,
ListSongSlugsSerializer, ListSongSlugsSerializer,
MusicDraftCallbackSerializer,
MusicDraftSerializer,
PlaylistSerializer, PlaylistSerializer,
SongSerializer, SongSerializer,
) )
from akarpov.music.models import ( from akarpov.music.models import (
Album, Album,
Author, Author,
MusicDraft,
Playlist, Playlist,
Song, Song,
SongUserRating, SongUserRating,
UserListenHistory, UserListenHistory,
) )
from akarpov.music.services.search import search_album, search_author, search_song from akarpov.music.services.search import search_song
from akarpov.music.tasks import listen_to_song, process_draft_callback from akarpov.music.tasks import listen_to_song
from akarpov.users.models import User
class LikedSongsContextMixin(generics.GenericAPIView): class LikedSongsContextMixin(generics.GenericAPIView):
@ -357,25 +351,7 @@ class ListAlbumsAPIView(generics.ListAPIView):
serializer_class = ListAlbumSerializer serializer_class = ListAlbumSerializer
pagination_class = StandardResultsSetPagination pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny] permission_classes = [permissions.AllowAny]
queryset = Album.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_album(search)
return Album.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for albums",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAlbumAPIView( class RetrieveUpdateDestroyAlbumAPIView(
@ -392,25 +368,7 @@ class ListAuthorsAPIView(generics.ListAPIView):
serializer_class = ListAuthorSerializer serializer_class = ListAuthorSerializer
pagination_class = StandardResultsSetPagination pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny] permission_classes = [permissions.AllowAny]
queryset = Author.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_author(search)
return Author.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for authors",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAuthorAPIView( class RetrieveUpdateDestroyAuthorAPIView(
@ -433,27 +391,12 @@ def get_queryset(self):
def post(self, request, *args, **kwargs): def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data) serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=False) serializer.is_valid(raise_exception=False)
data = serializer.validated_data
try: try:
song = Song.objects.cache().get(slug=self.request.data.get("song", "")) song = Song.objects.cache().get(slug=data["song"])
except Song.DoesNotExist: except Song.DoesNotExist:
return Response(status=404) return Response(status=404)
try:
user_id = self.request.data.get("user_id", None)
if user_id:
user_id_int = None
try:
user_id_int = int(user_id)
except ValueError:
...
if user_id_int:
user = User.objects.cache().get(id=user_id_int)
if user != self.request.user:
return Response(status=403)
except User.DoesNotExist:
...
if self.request.user.is_authenticated: if self.request.user.is_authenticated:
listen_to_song.apply_async( listen_to_song.apply_async(
kwargs={ kwargs={
@ -463,11 +406,11 @@ def post(self, request, *args, **kwargs):
}, },
countdown=2, countdown=2,
) )
elif "user_id" in self.request.data: elif "user_id" in data:
listen_to_song.apply_async( listen_to_song.apply_async(
kwargs={ kwargs={
"song_id": song.id, "song_id": song.id,
"user_id": self.request.data.get("user_id", None), "user_id": data["user_id"],
"anon": True, "anon": True,
}, },
countdown=2, countdown=2,
@ -496,88 +439,3 @@ def get_queryset(self):
class CreateAnonMusicUserAPIView(generics.CreateAPIView): class CreateAnonMusicUserAPIView(generics.CreateAPIView):
serializer_class = AnonMusicUserSerializer serializer_class = AnonMusicUserSerializer
permission_classes = [permissions.AllowAny] permission_classes = [permissions.AllowAny]
class SearchAllAPIView(LikedSongsContextMixin, generics.GenericAPIView):
permission_classes = [permissions.AllowAny]
serializer_class = AllSearchSerializer
def get_serializer_context(self):
context = super().get_serializer_context()
context["request"] = self.request
return context
@extend_schema(
parameters=[
OpenApiParameter(
name="query",
description="Search query",
required=True,
type=str,
),
],
responses={
200: AllSearchSerializer,
},
)
def get(self, request, *args, **kwargs):
query = request.query_params.get("query", "").strip()
if not query:
return Response({"songs": [], "albums": [], "authors": []})
songs = search_song(query)[:10] # Top 10 songs
albums = search_album(query)[:5] # Top 5 albums
authors = search_author(query)[:5] # Top 5 authors
song_serializer = ListSongSerializer(
songs, many=True, context=self.get_serializer_context()
)
album_serializer = ListAlbumSerializer(
albums, many=True, context=self.get_serializer_context()
)
author_serializer = ListAuthorSerializer(
authors, many=True, context=self.get_serializer_context()
)
return Response(
{
"songs": song_serializer.data,
"albums": album_serializer.data,
"authors": author_serializer.data,
}
)
class MusicDraftCallbackView(generics.GenericAPIView):
serializer_class = MusicDraftCallbackSerializer
@extend_schema(
description="Callback endpoint for external music service",
parameters=[
OpenApiParameter(
name="token",
type=str,
location=OpenApiParameter.PATH,
description="Draft callback token",
),
],
)
def post(self, request, token):
draft = get_object_or_404(MusicDraft, callback_token=token)
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
process_draft_callback.delay(
draft_id=str(draft.id),
status=serializer.validated_data["status"],
meta_data=serializer.validated_data.get("meta_data"),
error_message=serializer.validated_data.get("error_message"),
)
return Response(status=status.HTTP_202_ACCEPTED)
class MusicDraftDetailView(generics.RetrieveAPIView):
queryset = MusicDraft.objects.all()
serializer_class = MusicDraftSerializer
lookup_field = "file_token"

View File

@ -1,7 +1,7 @@
from django_elasticsearch_dsl import Document, fields from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry from django_elasticsearch_dsl.registries import registry
from akarpov.music.models import Album, Author, Song from akarpov.music.models import Song
@registry.register_document @registry.register_document
@ -14,12 +14,6 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"), "raw": fields.KeywordField(normalizer="lowercase"),
}, },
), ),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(), "link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True), "meta": fields.ObjectField(dynamic=True),
}, },
@ -33,12 +27,6 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"), "raw": fields.KeywordField(normalizer="lowercase"),
}, },
), ),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(), "link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True), "meta": fields.ObjectField(dynamic=True),
}, },
@ -47,18 +35,9 @@ class SongDocument(Document):
name = fields.TextField( name = fields.TextField(
attr="name", attr="name",
fields={ fields={
"raw": fields.KeywordField(), "raw": fields.KeywordField(normalizer="lowercase"),
"exact": fields.KeywordField(normalizer="lowercase"),
}, },
) )
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True) meta = fields.ObjectField(dynamic=True)
@ -69,17 +48,13 @@ class Index:
"number_of_replicas": 0, "number_of_replicas": 0,
"analysis": { "analysis": {
"filter": { "filter": {
"my_transliterator": {
"type": "icu_transform",
"id": "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC",
},
"russian_stop": { "russian_stop": {
"type": "stop", "type": "stop",
"stopwords": "_russian_", "stopwords": "_russian_",
}, },
"russian_keywords": { "russian_keywords": {
"type": "keyword_marker", "type": "keyword_marker",
"keywords": ["песня", "музыка", "певец", "альбом"], "keywords": ["пример"],
}, },
"russian_stemmer": { "russian_stemmer": {
"type": "stemmer", "type": "stemmer",
@ -105,13 +80,6 @@ class Index:
}, },
}, },
"analyzer": { "analyzer": {
"transliterate": {
"tokenizer": "standard",
"filter": [
"lowercase",
"my_transliterator",
],
},
"russian": { "russian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
@ -169,74 +137,3 @@ def get_instances_from_related(self, related_instance):
if isinstance(related_instance, Song): if isinstance(related_instance, Song):
return related_instance.album return related_instance.album
return related_instance.songs.all() return related_instance.songs.all()
@registry.register_document
class AuthorDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
class Index:
name = "authors"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Author
@registry.register_document
class AlbumDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
authors = fields.NestedField(
attr="authors",
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
)
class Index:
name = "albums"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Album

View File

@ -1,89 +0,0 @@
# Generated by Django 4.2.16 on 2024-10-26 10:37
from django.db import migrations, models
import django.db.models.deletion
import uuid
class Migration(migrations.Migration):
dependencies = [
("music", "0016_anonmusicuser_song_created_song_volume_and_more"),
]
operations = [
migrations.CreateModel(
name="MusicDraft",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
(
"status",
models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("failed", "Failed"),
("complete", "Complete"),
],
default="pending",
max_length=20,
),
),
(
"provider",
models.CharField(
choices=[
("spotify", "Spotify"),
("yandex", "Yandex"),
("youtube", "YouTube"),
],
max_length=20,
),
),
("original_url", models.URLField()),
("meta_data", models.JSONField(blank=True, null=True)),
("file_token", models.CharField(max_length=100, unique=True)),
("created", models.DateTimeField(auto_now_add=True)),
("updated", models.DateTimeField(auto_now=True)),
("error_message", models.TextField(blank=True, null=True)),
("user_id", models.IntegerField(null=True)),
(
"callback_token",
models.UUIDField(default=uuid.uuid4, editable=False),
),
],
),
migrations.CreateModel(
name="MusicDraftFile",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("file", models.FileField(upload_to="music_drafts/")),
("original_name", models.CharField(max_length=255)),
("mime_type", models.CharField(max_length=100)),
("created", models.DateTimeField(auto_now_add=True)),
(
"draft",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="files",
to="music.musicdraft",
),
),
],
),
]

View File

@ -1,7 +1,6 @@
import uuid import uuid
from django.contrib.postgres.fields import ArrayField from django.contrib.postgres.fields import ArrayField
from django.contrib.sites.models import Site
from django.db import models from django.db import models
from django.urls import reverse from django.urls import reverse
@ -87,11 +86,6 @@ def album_name(self):
def artists_names(self): def artists_names(self):
return cache_model_property(self, "_authors_names") return cache_model_property(self, "_authors_names")
def get_first_author_name(self):
if self.authors:
return self.authors.first().name
return ""
def __str__(self): def __str__(self):
return self.name return self.name
@ -99,50 +93,6 @@ class SlugMeta:
slug_length = 10 slug_length = 10
class MusicDraft(models.Model):
STATUS_CHOICES = (
("pending", "Pending"),
("processing", "Processing"),
("failed", "Failed"),
("complete", "Complete"),
)
PROVIDER_CHOICES = (
("spotify", "Spotify"),
("yandex", "Yandex"),
("youtube", "YouTube"),
)
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default="pending")
provider = models.CharField(max_length=20, choices=PROVIDER_CHOICES)
original_url = models.URLField()
meta_data = models.JSONField(null=True, blank=True)
file_token = models.CharField(max_length=100, unique=True)
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
error_message = models.TextField(null=True, blank=True)
user_id = models.IntegerField(null=True)
callback_token = models.UUIDField(default=uuid.uuid4, editable=False)
def get_callback_url(self):
site = Site.objects.get_current()
path = reverse(
"music:api:draft-callback", kwargs={"token": self.callback_token}
)
return f"https://{site.domain}{path}"
class MusicDraftFile(models.Model):
draft = models.ForeignKey(
MusicDraft, on_delete=models.CASCADE, related_name="files"
)
file = models.FileField(upload_to="music_drafts/")
original_name = models.CharField(max_length=255)
mime_type = models.CharField(max_length=100)
created = models.DateTimeField(auto_now_add=True)
class Playlist(ShortLinkModel, UserHistoryModel): class Playlist(ShortLinkModel, UserHistoryModel):
name = models.CharField(max_length=200) name = models.CharField(max_length=200)
private = models.BooleanField(default=True) private = models.BooleanField(default=True)

View File

@ -2,11 +2,7 @@
import re import re
import requests import requests
from deep_translator import GoogleTranslator
try:
from deep_translator import GoogleTranslator # TODO: move to another service
except requests.exceptions.JSONDecodeError:
print("Failed to initialize GoogleTranslator due to external API issues.")
from django.core.files import File from django.core.files import File
from django.db import transaction from django.db import transaction
from django.utils.text import slugify from django.utils.text import slugify
@ -126,15 +122,12 @@ def load_track(
album=album, album=album,
): ):
return sng.first() return sng.first()
try:
if not path.endswith(".mp3"): if not path.endswith(".mp3"):
mp3_path = path.replace(path.split(".")[-1], "mp3") mp3_path = path.replace(path.split(".")[-1], "mp3")
AudioSegment.from_file(path).export(mp3_path) AudioSegment.from_file(path).export(mp3_path)
os.remove(path) os.remove(path)
path = mp3_path path = mp3_path
except Exception as e:
print(e)
return Song.objects.none()
tag = MP3(path, ID3=ID3) tag = MP3(path, ID3=ID3)

View File

@ -1,84 +0,0 @@
import os
from django.core.files import File
from akarpov.music.models import Album, Author, MusicDraft, Song
def save_song_from_draft(draft: MusicDraft) -> Song | None:
"""
Create a Song instance from a completed MusicDraft
"""
try:
if not draft.files.exists():
draft.status = "failed"
draft.error_message = "No files associated with draft"
draft.save()
return None
# Get the music file
draft_file = draft.files.filter(mime_type__startswith="audio/").first()
if not draft_file:
draft.status = "failed"
draft.error_message = "No audio file found in draft"
draft.save()
return None
# Get metadata from draft
meta_data = draft.meta_data or {}
# Create song instance
song = Song(
name=meta_data.get("title", ""),
length=meta_data.get("length"),
link=draft.original_url,
meta=meta_data,
)
# Handle album
if "album" in meta_data:
album, _ = Album.objects.get_or_create(
name=meta_data["album"].get("name", ""),
defaults={"meta": meta_data["album"]},
)
song.album = album
# Save song to get an ID
with open(draft_file.file.path, "rb") as f:
song.file.save(
os.path.basename(draft_file.original_name), File(f), save=True
)
# Handle authors
if "artists" in meta_data:
authors = []
for artist_data in meta_data["artists"]:
author, _ = Author.objects.get_or_create(
name=artist_data.get("name", ""), defaults={"meta": artist_data}
)
authors.append(author)
song.authors.set(authors)
# Handle image if present
image_file = draft.files.filter(mime_type__startswith="image/").first()
if image_file:
with open(image_file.file.path, "rb") as f:
song.image.save(
os.path.basename(image_file.original_name), File(f), save=True
)
# Add user if specified
if draft.user_id:
song.creator_id = draft.user_id
song.save()
# Clean up draft files
draft.delete()
return song
except Exception as e:
draft.status = "failed"
draft.error_message = str(e)
draft.save()
return None

View File

@ -3,15 +3,10 @@
import requests import requests
import spotipy import spotipy
from deep_translator import GoogleTranslator
try:
from deep_translator import GoogleTranslator
except requests.exceptions.JSONDecodeError:
print("Failed to initialize GoogleTranslator due to external API issues.")
from django.conf import settings from django.conf import settings
from django.core.files import File from django.core.files import File
from django.db import transaction from django.db import transaction
from django.db.models import Model
from django.utils.text import slugify from django.utils.text import slugify
from spotipy import SpotifyClientCredentials from spotipy import SpotifyClientCredentials
from yandex_music import Client, Cover from yandex_music import Client, Cover
@ -22,33 +17,30 @@
from akarpov.utils.text import is_similar_artist, normalize_text from akarpov.utils.text import is_similar_artist, normalize_text
def generate_readable_slug(name: str, model: Model) -> str: def generate_readable_slug(name: str, model) -> str:
# Translate and slugify the name # Translate and slugify the name
slug = safe_translate(name) slug = str(
slugify(
GoogleTranslator(source="auto", target="en").translate(
name,
target_language="en",
)
)
)
# Truncate slug if it's too long
if len(slug) > 20: if len(slug) > 20:
slug = slug[:20] slug = slug[:20]
last_dash = slug.rfind("-") last_dash = slug.rfind("-")
if last_dash != -1: if last_dash != -1:
slug = slug[:last_dash] slug = slug[:last_dash]
original_slug = slug
# Ensure uniqueness
counter = 1
while model.objects.filter(slug=slug).exists(): while model.objects.filter(slug=slug).exists():
if len(original_slug) > 14: if len(slug) > 14:
truncated_slug = original_slug[:14] slug = slug[:14]
last_dash = truncated_slug.rfind("-") last_dash = slug.rfind("-")
if last_dash != -1: if last_dash != -1:
truncated_slug = truncated_slug[:last_dash] slug = slug[:last_dash]
else: slug = slug + "_" + generate_charset(5)
truncated_slug = original_slug
suffix = f"_{generate_charset(5)}" if counter == 1 else f"_{counter}"
slug = f"{truncated_slug}{suffix}"
counter += 1
return slug return slug
@ -221,12 +213,16 @@ def update_album_info(album: AlbumModel, author_name: str = None) -> None:
client = yandex_login() client = yandex_login()
spotify_session = create_spotify_session() spotify_session = create_spotify_session()
search_term = f"{album.name} - {author_name}" if author_name else album.name if author_name:
yandex_album_info = get_yandex_album_info(
yandex_album_info = get_api_info(get_yandex_album_info, search_term, client) album.name + " - " + author_name, client
spotify_album_info = get_api_info( )
get_spotify_album_info, search_term, spotify_session spotify_album_info = get_spotify_album_info(
) album.name + " - " + author_name, spotify_session
)
else:
yandex_album_info = get_yandex_album_info(album.name, client)
spotify_album_info = get_spotify_album_info(album.name, spotify_session)
# Combine and prioritize Spotify data # Combine and prioritize Spotify data
album_data = {} album_data = {}
@ -236,14 +232,14 @@ def update_album_info(album: AlbumModel, author_name: str = None) -> None:
"name": spotify_album_info.get("name", album.name), "name": spotify_album_info.get("name", album.name),
"release_date": spotify_album_info.get("release_date", ""), "release_date": spotify_album_info.get("release_date", ""),
"total_tracks": spotify_album_info.get("total_tracks", ""), "total_tracks": spotify_album_info.get("total_tracks", ""),
"link": spotify_album_info.get("external_urls", {}).get("spotify", ""), "link": spotify_album_info["external_urls"]["spotify"],
"genre": spotify_album_info.get("genres", []), "genre": spotify_album_info.get("genres", []),
} }
if yandex_album_info: if yandex_album_info:
album_data.update( album_data.update(
{ {
"name": album_data.get("name") or yandex_album_info.title, "name": album_data.get("name", yandex_album_info.title),
"genre": album_data.get("genre") or yandex_album_info.genre, "genre": album_data.get("genre", yandex_album_info.genre),
"description": yandex_album_info.description, "description": yandex_album_info.description,
"type": yandex_album_info.type, "type": yandex_album_info.type,
} }
@ -253,120 +249,102 @@ def update_album_info(album: AlbumModel, author_name: str = None) -> None:
album.save() album.save()
# Handle Album Image - Prefer Spotify, fallback to Yandex # Handle Album Image - Prefer Spotify, fallback to Yandex
image_path = get_album_image(spotify_album_info, yandex_album_info) image_path = None
if (
spotify_album_info
and "images" in spotify_album_info
and spotify_album_info["images"]
):
image_path = download_image(
spotify_album_info["images"][0]["url"], settings.MEDIA_ROOT
)
elif yandex_album_info and yandex_album_info.cover_uri:
image_path = download_image(
"https://" + yandex_album_info.cover_uri, settings.MEDIA_ROOT
)
generated_name = slugify(
GoogleTranslator(source="auto", target="en").translate(
album.name,
target_language="en",
)
)
if image_path: if image_path:
save_album_image(album, image_path)
# Update Album Authors from Spotify data if available
if spotify_album_info and "artists" in spotify_album_info:
update_album_authors(album, spotify_album_info["artists"])
album.slug = generate_readable_slug(album.name, AlbumModel)
album.save()
def get_album_image(spotify_info, yandex_info):
if spotify_info and "images" in spotify_info and spotify_info["images"]:
return download_image(spotify_info["images"][0]["url"], settings.MEDIA_ROOT)
elif yandex_info and yandex_info.cover_uri:
return download_image("https://" + yandex_info.cover_uri, settings.MEDIA_ROOT)
return None
def save_album_image(album, image_path):
if not image_path:
return
try:
generated_name = safe_translate(album.name)
with open(image_path, "rb") as f: with open(image_path, "rb") as f:
album.image.save( album.image.save(
generated_name + ".png", generated_name + ".png",
File(f, name=generated_name + ".png"), File(
f,
name=generated_name + ".png",
),
save=True, save=True,
) )
os.remove(image_path) os.remove(image_path)
album.save() album.save()
except Exception as e:
print(f"Error saving album image: {str(e)}")
# Update Album Authors from Spotify data if available
if spotify_album_info and "artists" in spotify_album_info:
album_authors = []
for artist in spotify_album_info["artists"]:
author, created = Author.objects.get_or_create(name=artist["name"])
album_authors.append(author)
album.authors.set(album_authors)
def update_album_authors(album, artists): album.slug = generate_readable_slug(album.name, AlbumModel)
album_authors = [] album.save()
for artist in artists:
author, created = Author.objects.get_or_create(name=artist["name"])
album_authors.append(author)
album.authors.set(album_authors)
def update_author_info(author: Author) -> None: def update_author_info(author: Author) -> None:
client = yandex_login() client = yandex_login()
spotify_session = create_spotify_session() spotify_session = create_spotify_session()
yandex_artist_info = get_api_info(get_yandex_artist_info, author.name, client) # Retrieve info from both services
spotify_artist_info = get_api_info( yandex_artist_info = get_yandex_artist_info(author.name, client)
get_spotify_artist_info, author.name, spotify_session spotify_artist_info = get_spotify_artist_info(author.name, spotify_session)
)
author_data = combine_artist_data(author, spotify_artist_info, yandex_artist_info) # Combine and prioritize Spotify data
with transaction.atomic():
author.meta = author_data
author.save()
image_path = get_author_image(spotify_artist_info, yandex_artist_info)
if image_path:
save_author_image(author, image_path)
author.slug = generate_readable_slug(author.name, Author)
with transaction.atomic():
author.save()
def get_api_info(api_func, search_term, session):
try:
return api_func(search_term, session)
except Exception as e:
print(f"Error fetching info from {api_func.__name__}: {str(e)}")
return None
def combine_artist_data(author, spotify_info, yandex_info):
author_data = {} author_data = {}
if spotify_info: if spotify_artist_info:
author_data = { author_data = {
"name": spotify_info.get("name", author.name), "name": spotify_artist_info.get("name", author.name),
"genres": spotify_info.get("genres", []), "genres": spotify_artist_info.get("genres", []),
"popularity": spotify_info.get("popularity", 0), "popularity": spotify_artist_info.get("popularity", 0),
"link": spotify_info.get("external_urls", {}).get("spotify", ""), "link": spotify_artist_info["external_urls"]["spotify"],
} }
if yandex_info: if yandex_artist_info:
author_data.update( author_data.update(
{ {
"name": author_data.get("name") or yandex_info.name, "name": author_data.get("name", yandex_artist_info.name),
"genres": author_data.get("genres") or yandex_info.genres, "genres": author_data.get("genres", yandex_artist_info.genres),
"description": yandex_info.description, "description": yandex_artist_info.description,
} }
) )
return author_data
author.meta = author_data
with transaction.atomic():
author.save()
def get_author_image(spotify_info, yandex_info): # Handle Author Image - Prefer Spotify, fallback to Yandex
if spotify_info and "images" in spotify_info and spotify_info["images"]: image_path = None
return download_image(spotify_info["images"][0]["url"], settings.MEDIA_ROOT) if (
elif yandex_info and yandex_info.cover: spotify_artist_info
return download_image(yandex_info.cover, settings.MEDIA_ROOT) and "images" in spotify_artist_info
return None and spotify_artist_info["images"]
):
image_path = download_image(
spotify_artist_info["images"][0]["url"], settings.MEDIA_ROOT
)
elif yandex_artist_info and yandex_artist_info.cover:
image_path = download_image(yandex_artist_info.cover, settings.MEDIA_ROOT)
generated_name = slugify(
def save_author_image(author, image_path): GoogleTranslator(source="auto", target="en").translate(
if not image_path: author.name,
return target_language="en",
)
try: )
generated_name = safe_translate(author.name) if image_path:
with open(image_path, "rb") as f: with open(image_path, "rb") as f:
author.image.save( author.image.save(
generated_name + ".png", generated_name + ".png",
@ -375,29 +353,21 @@ def save_author_image(author, image_path):
) )
os.remove(image_path) os.remove(image_path)
author.save() author.save()
except Exception as e:
print(f"Error saving author image: {str(e)}")
author.slug = generate_readable_slug(author.name, Author)
def safe_translate(text): with transaction.atomic():
try: author.save()
translated = GoogleTranslator(source="auto", target="en").translate(text)
return slugify(translated)
except Exception as e:
print(f"Error translating text: {str(e)}")
return slugify(text)
def search_all_platforms(track_name: str) -> dict: def search_all_platforms(track_name: str) -> dict:
print(track_name) print(track_name)
# session = spotipy.Spotify( session = spotipy.Spotify(
# auth_manager=spotipy.SpotifyClientCredentials( auth_manager=spotipy.SpotifyClientCredentials(
# client_id=settings.MUSIC_SPOTIFY_ID, client_id=settings.MUSIC_SPOTIFY_ID,
# client_secret=settings.MUSIC_SPOTIFY_SECRET, client_secret=settings.MUSIC_SPOTIFY_SECRET,
# ) )
# ) )
# spotify_info = get_spotify_info(track_name, session) spotify_info = get_spotify_info(track_name, session)
spotify_info = {} # TODO: add proxy for info retrieve
yandex_info = search_yandex(track_name) yandex_info = search_yandex(track_name)
if "album_image_path" in spotify_info and "album_image_path" in yandex_info: if "album_image_path" in spotify_info and "album_image_path" in yandex_info:
os.remove(yandex_info["album_image_path"]) os.remove(yandex_info["album_image_path"])

View File

@ -1,156 +1,48 @@
from django.core.cache import cache
from django.db.models import Case, When from django.db.models import Case, When
from django_elasticsearch_dsl.registries import registry
from elasticsearch_dsl import Q as ES_Q from elasticsearch_dsl import Q as ES_Q
from akarpov.music.documents import AlbumDocument, AuthorDocument, SongDocument from akarpov.music.documents import SongDocument
from akarpov.music.models import Album, Author, Song from akarpov.music.models import Song
def search_song(query): def search_song(query):
search = SongDocument.search() search = SongDocument.search()
search_query = ES_Q(
should_queries = [ "bool",
ES_Q("match_phrase", name={"query": query, "boost": 5}), should=[
ES_Q( ES_Q(
"nested", "multi_match",
path="authors", query=query,
query=ES_Q("match_phrase", name={"query": query, "boost": 4}), fields=["name^5", "authors.name^3", "album.name^3"],
), fuzziness="AUTO",
ES_Q(
"nested",
path="album",
query=ES_Q("match_phrase", name={"query": query, "boost": 4}),
),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q(
"nested",
path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
ES_Q(
"nested",
path="authors",
query=ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
), ),
), ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
ES_Q( ES_Q(
"nested", "nested",
path="album", path="authors",
query=ES_Q( query=ES_Q("wildcard", authors__name__raw=f"*{query.lower()}*"),
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
), ),
), ES_Q(
] "nested",
path="album",
query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"),
),
ES_Q("wildcard", meta__raw=f"*{query.lower()}*"),
],
minimum_should_match=1,
)
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) search = search.query(search_query)
search = search.query(search_query).extra(size=20)
response = search.execute() response = search.execute()
# Check for hits and get song instances
if response.hits: if response.hits:
hit_ids = [hit.meta.id for hit in response.hits] hit_ids = [hit.meta.id for hit in response.hits]
songs = Song.objects.filter(id__in=hit_ids).order_by( songs = Song.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
) )
return songs return songs
return Song.objects.none() return Song.objects.none()
def autocomplete_search(query):
s = SongDocument.search()
s = s.suggest("song_suggest", query, completion={"field": "suggest"})
suggestions = s.execute().suggest.song_suggest[0].options
return [option.text for option in suggestions]
def get_popular_songs():
if "popular_songs" in cache:
return cache.get("popular_songs")
else:
songs = Song.objects.filter(played__gt=300).order_by("-played")[:10]
cache.set("popular_songs", songs, timeout=3600)
return songs
def bulk_update_index(model_class):
qs = model_class.objects.all()
registry.update(qs, bulk_size=100)
def search_author(query):
search = AuthorDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
authors = Author.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return authors
return Author.objects.none()
def search_album(query):
search = AlbumDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
albums = Album.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return albums
return Album.objects.none()

View File

@ -75,40 +75,13 @@ def load_file_meta(track: int, user_id: int) -> str:
return str(song) return str(song)
def load_url(link: str, user_id: int): def load_playlist(link: str, user_id: int):
author = link.split("/")[4]
playlist_id = link.split("/")[-1]
client = login() client = login()
obj_id = link.split("/")[-1] playlist = client.users_playlists(int(playlist_id), author) # type: Playlist
obj_id = obj_id.split("?")[0] for track in playlist.fetch_tracks():
try:
obj_id = int(obj_id)
except ValueError:
print("Invalid link")
return None
if "/playlists/" in link:
author = link.split("/")[4]
playlist = client.users_playlists(obj_id, author) # type: Playlist
for track in playlist.fetch_tracks():
tasks.load_ym_file_meta.apply_async(
kwargs={"track": track.track.id, "user_id": user_id}
)
elif "/album/" in link:
album = client.albums_with_tracks(obj_id)
for volume in album.volumes:
for track in volume:
tasks.load_ym_file_meta.apply_async(
kwargs={"track": track.id, "user_id": user_id}
)
elif "/artist/" in link:
artist = client.artists(obj_id)[0]
albums = artist.get_albums(page_size=100)
for album in albums:
for track in album.fetch_tracks():
tasks.load_ym_file_meta.apply_async(
kwargs={"track": track.id, "user_id": user_id}
)
else:
tasks.load_ym_file_meta.apply_async( tasks.load_ym_file_meta.apply_async(
kwargs={"track": obj_id, "user_id": user_id} kwargs={"track": track.track.id, "user_id": user_id}
) )

View File

@ -2,190 +2,103 @@
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
import pylast import pylast
import requests
import spotipy import spotipy
import structlog import structlog
import ytmusicapi
from asgiref.sync import async_to_sync from asgiref.sync import async_to_sync
from celery import shared_task from celery import shared_task
from channels.layers import get_channel_layer from channels.layers import get_channel_layer
from django.conf import settings from django.conf import settings
from django.shortcuts import get_object_or_404
from django.utils import timezone from django.utils import timezone
from django.utils.timezone import now from django.utils.timezone import now
from spotipy import SpotifyClientCredentials from spotipy import SpotifyClientCredentials
from ytmusicapi import YTMusic
from akarpov.music.api.serializers import SongSerializer from akarpov.music.api.serializers import SongSerializer
from akarpov.music.models import ( from akarpov.music.models import (
AnonMusicUser, AnonMusicUser,
AnonMusicUserHistory, AnonMusicUserHistory,
MusicDraft,
RadioSong, RadioSong,
Song, Song,
UserListenHistory, UserListenHistory,
UserMusicProfile, UserMusicProfile,
) )
from akarpov.music.services import spotify, yandex, youtube from akarpov.music.services import spotify, yandex, youtube
from akarpov.music.services.drafts import save_song_from_draft
from akarpov.music.services.file import load_dir, load_file from akarpov.music.services.file import load_dir, load_file
from akarpov.utils.celery import get_scheduled_tasks_name from akarpov.utils.celery import get_scheduled_tasks_name
logger = structlog.get_logger(__name__) logger = structlog.get_logger(__name__)
@shared_task(soft_time_limit=60 * 60, time_limit=60 * 120) @shared_task(soft_time_limit=60 * 20, time_limit=60 * 30)
def list_tracks(self, url: str, user_id: int | None = None) -> str | None: def list_tracks(url, user_id):
"""Update list_tracks to handle failures""" if "music.youtube.com" in url or "youtu.be" in url:
try: url = url.replace("music.youtube.com", "youtube.com")
url = normalize_url(url) url = url.replace("youtu.be", "youtube.com")
handlers = { if "spotify.com" in url:
"spotify.com": handle_spotify, spotify.download_url(url, user_id)
"music.yandex.ru": handle_yandex, elif "music.yandex.ru" in url:
"youtube.com": handle_youtube, yandex.load_playlist(url, user_id)
} if "youtube.com" in url:
if "channel" in url or "/c/" in url:
ytmusic = ytmusicapi.YTMusic()
channel_id = url.split("/")[-1]
channel_songs = ytmusic.get_artist(channel_id)["songs"]["results"]
for domain, handler in handlers.items(): for song in channel_songs:
if domain in url: process_yb.apply_async(
return handler(url, user_id) kwargs={
"url": f"https://youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
)
return fallback_search(url, user_id) elif "playlist" in url or "&list=" in url:
except Exception as e: ytmusic = ytmusicapi.YTMusic()
draft = MusicDraft.objects.create(
provider="unknown",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
# Parse the URL and the query string
parsed_url = urlparse(url)
parsed_qs = parse_qs(parsed_url.query)
def normalize_url(url): # Get the playlist ID from the parsed query string
return url.replace("music.youtube.com", "youtube.com").replace( playlist_id = parsed_qs.get("list", [None])[0]
"youtu.be", "youtube.com"
)
if playlist_id:
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
def handle_spotify(url: str, user_id: int | None = None) -> str | None: else:
return download_spotify_url.delay(url, user_id) raise ValueError("No playlist ID found in the URL.")
for song in playlist_songs:
process_yb.apply_async(
def handle_yandex(url: str, user_id: int | None = None) -> str | None: kwargs={
return load_yandex_url.delay(url, user_id) "url": f"https://music.youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
def handle_youtube(url: str, user_id: int | None = None) -> str | None: )
"""Handle YouTube downloads""" else:
if "channel" in url or "/c/" in url: process_yb.apply_async(kwargs={"url": url, "user_id": user_id})
return handle_youtube_channel(url, user_id)
elif "playlist" in url or "&list=" in url:
return handle_youtube_playlist(url, user_id)
else: else:
return process_yb.delay(url, user_id) spotify_manager = SpotifyClientCredentials(
client_id=settings.MUSIC_SPOTIFY_ID,
client_secret=settings.MUSIC_SPOTIFY_SECRET,
def handle_youtube_channel(url, user_id):
ytmusic = YTMusic()
channel_id = url.split("/")[-1]
channel_songs = ytmusic.get_artist(channel_id)["songs"]["results"]
for song in channel_songs:
process_yb.apply_async(
kwargs={
"url": f"https://youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
) )
return url spotify_search = spotipy.Spotify(client_credentials_manager=spotify_manager)
results = spotify_search.search(q=url, type="track", limit=1)
def handle_youtube_playlist(url, user_id): top_track = (
ytmusic = YTMusic() results["tracks"]["items"][0] if results["tracks"]["items"] else None
parsed_url = urlparse(url)
parsed_qs = parse_qs(parsed_url.query)
playlist_id = parsed_qs.get("list", [None])[0]
if not playlist_id:
raise ValueError("No playlist ID found in the URL.")
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
for song in playlist_songs:
process_yb.apply_async(
kwargs={
"url": f"https://music.youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
) )
return url
if top_track:
def fallback_search(url, user_id): spotify.download_url(top_track["external_urls"]["spotify"], user_id)
spotify_manager = SpotifyClientCredentials( url = top_track["external_urls"]["spotify"]
client_id=settings.MUSIC_SPOTIFY_ID,
client_secret=settings.MUSIC_SPOTIFY_SECRET,
)
spotify_search = spotipy.Spotify(client_credentials_manager=spotify_manager)
results = spotify_search.search(q=url, type="track", limit=1)
top_track = results["tracks"]["items"][0] if results["tracks"]["items"] else None
if top_track:
spotify_url = top_track["external_urls"]["spotify"]
spotify.download_url(spotify_url, user_id)
return spotify_url
return url return url
@shared_task(bind=True) @shared_task(max_retries=5)
def process_yb(self, url: str, user_id: int | None = None) -> str | None: def process_yb(url, user_id):
"""Update YouTube download to handle failures""" youtube.download_from_youtube_link(url, user_id)
try: return url
return str(youtube.download_from_youtube_link(url, user_id))
except Exception as e:
draft = MusicDraft.objects.create(
provider="youtube",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
@shared_task(bind=True)
def download_spotify_url(self, url: str, user_id: int | None = None) -> str | None:
try:
return spotify.download_url(url, user_id)
except Exception as e:
draft = MusicDraft.objects.create(
provider="spotify",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
@shared_task(bind=True)
def load_yandex_url(self, url: str, user_id: int | None = None) -> str | None:
try:
return yandex.load_url(url, user_id)
except Exception as e:
draft = MusicDraft.objects.create(
provider="yandex",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
@shared_task @shared_task
@ -298,71 +211,14 @@ def listen_to_song(song_id, user_id=None, anon=True):
session_key=lastfm_token, session_key=lastfm_token,
) )
song = Song.objects.get(id=song_id) song = Song.objects.get(id=song_id)
artist_name = song.get_first_author_name() artist_name = song.artists_names
track_name = song.name track_name = song.name
album_name = song.album.name
timestamp = int(timezone.now().timestamp()) timestamp = int(timezone.now().timestamp())
network.scrobble( network.scrobble(
artist=artist_name, artist=artist_name, title=track_name, timestamp=timestamp
title=track_name,
timestamp=timestamp,
album=album_name,
)
network.update_now_playing(
artist=artist_name, title=track_name, album=album_name
) )
except UserMusicProfile.DoesNotExist: except UserMusicProfile.DoesNotExist:
pass pass
except Exception as e: except Exception as e:
logger.error(f"Last.fm scrobble error: {e}") logger.error(f"Last.fm scrobble error: {e}")
return song_id return song_id
@shared_task
def handle_download_failure(draft_id: str, original_task_id: str):
"""
Handle failed downloads by sending request to external service
"""
draft = MusicDraft.objects.get(id=draft_id)
external_service_url = "http://music-download-service/api/v1/download"
response = requests.post(
external_service_url,
json={
"url": draft.original_url,
"provider": draft.provider,
"callback_url": draft.get_callback_url(),
"file_token": draft.file_token,
},
)
if response.status_code != 202:
draft.status = "failed"
draft.error_message = f"External service request failed: {response.text}"
draft.save()
return False
draft.status = "processing"
draft.save()
return True
@shared_task
def process_draft_callback(
draft_id: str, status: str, meta_data: dict = None, error_message: str = None
):
"""
Process callback from external service
"""
draft = get_object_or_404(MusicDraft, id=draft_id)
draft.status = status
if meta_data:
draft.meta_data = meta_data
if error_message:
draft.error_message = error_message
draft.save()
if status == "complete":
save_song_from_draft(draft)

View File

@ -2,7 +2,7 @@
{% block content %} {% block content %}
<h1>Welcome to music app</h1> <h1>Welcome to music app</h1>
<p>This is mainly the backend of music, you should consider using side clients like: <a href="https://next.akarpov.ru/music">otomir23's client</a> or my <a href="https://t.me/akarpov_music_bot">inline telegram bot</a></p> <p>This is mainly the backend of music, you should consider using side clients like: <a href="https://next.akarpov.ru/music">otomir23's client</a></p>
{% if request.user.is_authenticated %} {% if request.user.is_authenticated %}
{% if last_fm_account %} {% if last_fm_account %}
<p>Last.fm connected to {{ last_fm_account }}, <a href="{% url 'music:lastfm_connect' %}">reconnect</a></p> <p>Last.fm connected to {{ last_fm_account }}, <a href="{% url 'music:lastfm_connect' %}">reconnect</a></p>

View File

@ -1,8 +1,6 @@
from drf_spectacular.extensions import OpenApiAuthenticationExtension
from drf_spectacular.plumbing import build_bearer_security_scheme_object
from rest_framework.authentication import BaseAuthentication from rest_framework.authentication import BaseAuthentication
from akarpov.users.models import User, UserAPIToken from akarpov.users.models import UserAPIToken
from akarpov.users.tasks import set_last_active_token from akarpov.users.tasks import set_last_active_token
@ -21,14 +19,4 @@ def authenticate(self, request):
return None return None
set_last_active_token.delay(token.token) set_last_active_token.delay(token.token)
return User.objects.cache().get(id=token.user_id), token return token.user, token
class UserTokenAuthenticationExtension(OpenApiAuthenticationExtension):
target_class = "akarpov.users.api.authentification.UserTokenAuthentication"
name = "UserTokenAuthentication"
def get_security_definition(self, auto_schema):
return build_bearer_security_scheme_object(
header_name="Authorization", token_prefix="Bearer"
)

View File

@ -18,8 +18,6 @@
) )
from akarpov.users.models import User from akarpov.users.models import User
from .authentification import UserTokenAuthentication # noqa: F401
class UserRegisterAPIViewSet(generics.CreateAPIView): class UserRegisterAPIViewSet(generics.CreateAPIView):
"""Creates new user and sends verification email""" """Creates new user and sends verification email"""

View File

@ -214,16 +214,23 @@ def list_tokens(request):
@login_required @login_required
def create_token(request): def create_token(request):
initial_data = {} initial_data = {}
# Обработка параметров 'name' и 'active_until'
if "name" in request.GET: if "name" in request.GET:
initial_data["name"] = request.GET["name"] initial_data["name"] = request.GET["name"]
if "active_until" in request.GET: if "active_until" in request.GET:
initial_data["active_until"] = request.GET["active_until"] initial_data["active_until"] = request.GET["active_until"]
# Создаем QueryDict для разрешений, чтобы правильно обработать повторяющиеся ключи
permissions_query_dict = QueryDict("", mutable=True) permissions_query_dict = QueryDict("", mutable=True)
# Разбор параметров разрешений
permissions = request.GET.getlist("permissions") permissions = request.GET.getlist("permissions")
for perm in permissions: for perm in permissions:
category, permission = perm.split(".") category, permission = perm.split(".")
permissions_query_dict.update({f"permissions_{category}": [permission]}) permissions_query_dict.update({f"permissions_{category}": [permission]})
# Переводим QueryDict в обычный словарь для использования в initial
permissions_data = {key: value for key, value in permissions_query_dict.lists()} permissions_data = {key: value for key, value in permissions_query_dict.lists()}
initial_data.update(permissions_data) initial_data.update(permissions_data)
@ -235,6 +242,7 @@ def create_token(request):
initial=initial_data, permissions_context=UserAPIToken.permission_template initial=initial_data, permissions_context=UserAPIToken.permission_template
) )
if request.method == "POST": if request.method == "POST":
print(request.POST)
form = TokenCreationForm(request.POST) form = TokenCreationForm(request.POST)
if form.is_valid(): if form.is_valid():
new_token = form.save(commit=False) new_token = form.save(commit=False)

View File

@ -28,6 +28,8 @@ RUN apt-get update && \
apt-get install -y build-essential libpq-dev gettext libmagic-dev libjpeg-dev zlib1g-dev && \ apt-get install -y build-essential libpq-dev gettext libmagic-dev libjpeg-dev zlib1g-dev && \
# Dependencies for file preview generation # Dependencies for file preview generation
apt-get install -y webp git libimage-exiftool-perl libmagickwand-dev ffmpeg libgdal-dev && \ apt-get install -y webp git libimage-exiftool-perl libmagickwand-dev ffmpeg libgdal-dev && \
# ML dependencies \
# none for now
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \ apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
@ -46,6 +48,7 @@ RUN poetry export --without-hashes -f requirements.txt | /venv/bin/pip install -
COPY . . COPY . .
RUN poetry build && /venv/bin/pip install dist/*.whl RUN poetry build && /venv/bin/pip install dist/*.whl
RUN /venv/bin/python -m nltk.downloader punkt stopwords wordnet
COPY ./compose/production/django/entrypoint /entrypoint COPY ./compose/production/django/entrypoint /entrypoint
@ -68,6 +71,10 @@ COPY ./compose/local/django/start-redirect /start-redirect
RUN sed -i 's/\r$//g' /start-redirect RUN sed -i 's/\r$//g' /start-redirect
RUN chmod +x /start-redirect RUN chmod +x /start-redirect
COPY ./compose/local/django/install_preview_dependencies /install_preview_dependencies
RUN sed -i 's/\r$//g' /install_preview_dependencies
RUN chmod +x /install_preview_dependencies
COPY ./compose/local/django/celery/worker/start /start-celeryworker COPY ./compose/local/django/celery/worker/start /start-celeryworker
RUN sed -i 's/\r$//g' /start-celeryworker RUN sed -i 's/\r$//g' /start-celeryworker
RUN chmod +x /start-celeryworker RUN chmod +x /start-celeryworker

View File

@ -3,4 +3,6 @@
set -o errexit set -o errexit
set -o nounset set -o nounset
/install_preview_dependencies
celery -A config.celery_app worker --autoscale 20 -l INFO celery -A config.celery_app worker --autoscale 20 -l INFO

View File

@ -0,0 +1,14 @@
#!/bin/bash
apt-get update
apt-get install wget libnotify4 scribus libappindicator3-1 libayatana-indicator3-7 libdbusmenu-glib4 libdbusmenu-gtk3-4
apt-get install -y poppler-utils libfile-mimeinfo-perl ghostscript libsecret-1-0 zlib1g-dev libjpeg-dev imagemagick libmagic1 libreoffice inkscape xvfb
apt-get install -y libxml2-dev libxslt1-dev antiword unrtf tesseract-ocr flac lame libmad0 libsox-fmt-mp3 sox swig
apt-get install -y python-dev-is-python3 libxml2-dev libxslt1-dev antiword unrtf poppler-utils tesseract-ocr \
flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig
wget https://github.com/jgraph/drawio-desktop/releases/download/v13.0.3/draw.io-amd64-13.0.3.deb
dpkg -i draw.io-amd64-13.0.3.deb
rm draw.io-amd64-13.0.3.deb
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
rm -rf /var/lib/apt/lists/*
preview --check-dependencies

View File

@ -80,7 +80,6 @@
"music.*": {"ops": ("fetch", "get", "list"), "timeout": 60 * 15}, "music.*": {"ops": ("fetch", "get", "list"), "timeout": 60 * 15},
"otp_totp.totpdevice": {"ops": "all", "timeout": 15 * 60}, "otp_totp.totpdevice": {"ops": "all", "timeout": 15 * 60},
"users.userapitoken": {"ops": "all", "timeout": 20 * 60}, "users.userapitoken": {"ops": "all", "timeout": 20 * 60},
"users.user": {"ops": "all", "timeout": 5 * 60},
} }
CACHEOPS_REDIS = env.str("REDIS_URL") CACHEOPS_REDIS = env.str("REDIS_URL")
@ -529,11 +528,6 @@
{"url": "http://127.0.0.1:8000", "description": "Local Development server"}, {"url": "http://127.0.0.1:8000", "description": "Local Development server"},
{"url": "https://new.akarpov.ru", "description": "Production server"}, {"url": "https://new.akarpov.ru", "description": "Production server"},
], ],
"EXTENSIONS": {
"authentication": [
"akarpov.users.api.authentification.UserTokenAuthenticationExtension"
],
},
} }
# CKEDITOR # CKEDITOR
@ -754,13 +748,6 @@
ELASTICSEARCH_DSL = { ELASTICSEARCH_DSL = {
"default": {"hosts": env("ELASTIC_SEARCH", default="http://127.0.0.1:9200/")}, "default": {"hosts": env("ELASTIC_SEARCH", default="http://127.0.0.1:9200/")},
} }
USE_DEBUG_TOOLBAR = False USE_DEBUG_TOOLBAR = False
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
USE_X_FORWARDED_HOST = True
USE_X_FORWARDED_PORT = True
# PREVIEW
# ------------------------------------------------------------------------------
PREVIEW_SERVICE_URL = env("PREVIEW_SERVICE_URL", default=None)
PREVIEW_SERVICE_API_KEY = env("PREVIEW_SERVICE_API_KEY", default=None)

5903
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -69,6 +69,7 @@ channels = {extras = ["daphne"], version = "^4.0.0"}
django-upload-validator = "^1.1.6" django-upload-validator = "^1.1.6"
markdown = "^3.4.4" markdown = "^3.4.4"
pydotplus = "^2.0.2" pydotplus = "^2.0.2"
preview-generator = "^0.29"
uuid = "^1.30" uuid = "^1.30"
mutagen = "^1.46.0" mutagen = "^1.46.0"
pydub = "^0.25.1" pydub = "^0.25.1"
@ -99,8 +100,11 @@ pytest-mock = "^3.11.1"
pytest-asyncio = "^0.21.1" pytest-asyncio = "^0.21.1"
pytest-lambda = "^2.2.0" pytest-lambda = "^2.2.0"
pgvector = "^0.2.2" pgvector = "^0.2.2"
pycld2 = "^0.41"
uuid6 = "^2023.5.2" uuid6 = "^2023.5.2"
uvicorn = "0.23.2" uvicorn = "0.23.2"
nltk = "^3.8.1"
pymorphy3 = "^1.2.1"
pymorphy3-dicts-ru = "^2.4.417150.4580142" pymorphy3-dicts-ru = "^2.4.417150.4580142"
fastapi = "0.103.0" fastapi = "0.103.0"
pydantic-settings = "^2.0.3" pydantic-settings = "^2.0.3"
@ -114,9 +118,9 @@ spotdl = "^4.2.4"
fuzzywuzzy = "^0.18.0" fuzzywuzzy = "^0.18.0"
python-levenshtein = "^0.23.0" python-levenshtein = "^0.23.0"
pylast = "^5.2.0" pylast = "^5.2.0"
textract = {git = "https://github.com/Alexander-D-Karpov/textract.git", branch = "master"}
librosa = "^0.10.1" librosa = "^0.10.1"
django-ckeditor-5 = "^0.2.12" django-ckeditor-5 = "^0.2.12"
chardet = "^5.2.0"
[build-system] [build-system]

0
search/__init__.py Normal file
View File

6
search/pipeline.py Normal file
View File

@ -0,0 +1,6 @@
from haystack import Document
from milvus_haystack import MilvusDocumentStore
ds = MilvusDocumentStore()
ds.write_documents([Document("Some Content")])
ds.get_all_documents()

2185
search/poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

18
search/pyproject.toml Normal file
View File

@ -0,0 +1,18 @@
[tool.poetry]
name = "search"
version = "0.1.0"
description = ""
authors = ["Alexander-D-Karpov <alexandr.d.karpov@gmail.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
fastapi = "0.99.1"
pydantic = "1.10.13"
transformers = {version = "4.34.1", extras = ["torch"]}
torch = ">=2.0.0, !=2.0.1, !=2.1.0"
farm-haystack = {extras = ["faiss"], version = "^1.21.2"}
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

4
spacy_setup.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
python -m spacy download en_core_web_lg
python -m spacy download xx_sent_ud_sm
python -m spacy download ru_core_news_lg