Compare commits

...

36 Commits

Author SHA1 Message Date
dependabot[bot]
909e0deb38
Merge 9d974f9954 into e7fd5a993e 2024-11-21 02:44:47 +00:00
e7fd5a993e updated music service to proxy download failed tracks 2024-10-26 13:42:34 +03:00
f320fa2d62 docker fix 2024-10-14 21:47:11 +03:00
cb41b4b6ba ci fix 2024-10-14 21:46:14 +03:00
060506a672 docker fix 2024-10-14 21:40:15 +03:00
f5835d2821 removed unused dependencies from project, moved files process to external service 2024-10-14 21:34:16 +03:00
03c7c5309c updated search 2024-10-04 17:03:31 +03:00
67d9dc324e bug fixes 2024-08-29 19:40:56 +03:00
4efce95785 bug fixes 2024-08-29 19:37:23 +03:00
5e9c01fc29 updated slugification 2024-08-28 21:12:29 +03:00
4726767e86 refactored album and author info retrieve 2024-08-28 20:53:20 +03:00
1fa8f1b9e3 updated music listing 2024-08-28 20:29:39 +03:00
b63beb8da8 fixed music info retrieve 2024-07-31 11:31:30 +03:00
4aa1d207aa bug fixes 2024-04-22 15:52:33 +03:00
f5545ed7d4 bug fixes 2024-04-22 15:35:51 +03:00
1fb6219b7c bug fixes 2024-04-22 15:15:42 +03:00
b4124d90bb bug fixes 2024-04-22 14:55:07 +03:00
a6740c4faf bug fixes 2024-04-22 14:40:20 +03:00
67dc026c7e fixed search 2024-04-22 14:29:45 +03:00
7e188865cc updated music search 2024-04-22 14:17:11 +03:00
4a2b86509e updated search 2024-04-11 12:48:09 +03:00
c6063942c2 fixed translator error 2024-04-11 12:37:08 +03:00
0fa2d34e2f updated search 2024-04-11 12:29:10 +03:00
f6e2d1fe4b updated last fm scrobling 2024-04-10 17:15:41 +03:00
9dd23e1a01 updated last fm scrobling 2024-04-08 14:08:56 +03:00
6bce18344f updated caching, api docs 2024-04-07 23:28:29 +03:00
c772c1a97b updated caches 2024-04-07 23:14:33 +03:00
709bda158a bug fixes 2024-04-07 23:05:25 +03:00
e9dcccbced bug fixes 2024-04-07 22:58:29 +03:00
06afed5882 bug fixes 2024-04-07 22:51:32 +03:00
2cce8813e9 updated file serving 2024-04-07 14:59:10 +03:00
d49be5c42e bug fixes 2024-04-07 00:22:51 +03:00
64f28fc1c8 updated music info 2024-04-07 00:06:54 +03:00
b148a3d591 updated music fetching 2024-04-06 23:48:03 +03:00
00668b6f18 updated music fetching 2024-04-06 23:34:05 +03:00
dependabot[bot]
9d974f9954
Bump traefik from 2.10.7 to 2.11.0 in /compose/production/traefik
Bumps traefik from 2.10.7 to 2.11.0.

---
updated-dependencies:
- dependency-name: traefik
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-13 14:38:57 +00:00
42 changed files with 3933 additions and 5894 deletions

View File

@ -15,3 +15,5 @@ LAST_FM_SECRET=
SPOTIFY_ID=
SPOTIFY_SECRET=
YANDEX_TOKEN=
PREVIEW_SERVICE_API_KEY=
PREVIEW_SERVICE_URL=

View File

@ -24,10 +24,10 @@ jobs:
steps:
- name: Checkout Code Repository
uses: actions/checkout@v3
uses: actions/checkout@v4.2.1
- name: Cache packages
uses: actions/cache@v3
uses: actions/cache@v4.1.1
id: cache-packages
with:
path: "~/packages/"
@ -45,18 +45,18 @@ jobs:
sudo dpkg -L libimage-exiftool-perl libmagickwand-dev | while IFS= read -r f; do if test -f $f; then echo $f; fi; done | xargs cp --parents --target-directory ~/packages/
fi
- uses: actions/checkout@v3
- uses: actions/checkout@v4.2.1
- name: Install poetry
run: pipx install poetry
- uses: actions/setup-python@v5
- uses: actions/setup-python@v5.2.0
with:
python-version: '3.11'
cache: 'poetry'
- run: poetry install
- name: Run pre-commit
uses: pre-commit/action@v2.0.3
uses: pre-commit/action@v3.0.1
# With no caching at all the entire ci process takes 4m 30s to complete!
pytest:
@ -64,7 +64,10 @@ jobs:
steps:
- name: Checkout Code Repository
uses: actions/checkout@v3
uses: actions/checkout@v4.2.1
- name: Install Docker Compose
run: sudo apt-get update && sudo apt-get install -y docker-compose
- name: Build the Stack
run: docker-compose -f local.yml build

View File

@ -28,10 +28,6 @@ $ uvicorn redirect.app:app --reload
```shell
$ docker-compose -f local.yml up
```
Install file preview dependencies
```shell
$ docker-compose -f local.yml exec django /install_preview_dependencies
```
- server - http://127.0.0.1:8000
- mail - http://127.0.0.1:8025

View File

@ -1,51 +0,0 @@
import pycld2 as cld2
import spacy
import torch
from transformers import AutoModel, AutoTokenizer
# load ml classes and models on first request
# TODO: move to outer server/service
nlp = None
ru_nlp = None
ru_model = None
ru_tokenizer = None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def get_text_embedding(text: str):
global nlp, ru_nlp, ru_model, ru_tokenizer
is_reliable, text_bytes_found, details = cld2.detect(text)
if is_reliable:
lang = details[0]
if lang[1] in ["ru", "en"]:
lang = lang[1]
else:
return None
else:
return None
if lang == "ru":
if not ru_nlp:
ru_nlp = spacy.load("ru_core_news_md", disable=["parser", "ner"])
lema = " ".join([token.lemma_ for token in ru_nlp(text)])
if not ru_model:
ru_model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased")
if not ru_tokenizer:
ru_tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
encodings = ru_tokenizer(
lema, # the texts to be tokenized
padding=True, # pad the texts to the maximum length (so that all outputs have the same length)
return_tensors="pt", # return the tensors (not lists)
)
with torch.no_grad():
# get the model embeddings
embeds = ru_model(**encodings)
embeds = embeds[0]
elif lang == "en":
embeds = None
else:
embeds = None
return embeds

View File

@ -1,16 +1,10 @@
import textract
from akarpov.files.models import File
def view(file: File):
static = ""
content = ""
text = (
textract.process(file.file.path, extension="doc", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
text = file.content.replace("\t", " ")
for line in text.split("\n"):
content += f"<p class='mt-1'>{line}</p>"
return static, content

View File

@ -1,16 +1,10 @@
import textract
from akarpov.files.models import File
def view(file: File):
static = ""
content = ""
text = (
textract.process(file.file.path, extension="docx", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
text = file.content.replace("\t", " ")
for line in text.split("\n"):
content += f"<p class='mt-1'>{line}</p>"
return static, content

View File

@ -1,16 +1,10 @@
import textract
from akarpov.files.models import File
def view(file: File):
static = ""
content = ""
text = (
textract.process(file.file.path, extension="odt", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
text = file.content.replace("\t", " ")
for line in text.split("\n"):
content += f"<p class='mt-1'>{line}</p>"
return static, content

View File

@ -1,5 +1,3 @@
import textract
from akarpov.files.models import File
@ -7,11 +5,7 @@ def view(file: File) -> (str, str):
static = f"""
<meta property="og:title" content="{file.name}" />
"""
text = (
textract.process(file.file.path, extension="ogg", output_encoding="utf8")
.decode("utf8")
.replace("\t", " ")
)
text = file.content.replace("\t", " ")
content = (
"""
<div id="waveform">

View File

@ -1,42 +0,0 @@
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from pymorphy3 import MorphAnalyzer
# Set up stop words
english_stopwords = set(stopwords.words("english"))
russian_stopwords = set(stopwords.words("russian"))
# Set up lemmatizers
english_lemmatizer = None
russian_lemmatizer = None
def lemmatize_and_remove_stopwords(text, language="english"):
# Tokenize the text
global english_lemmatizer, russian_lemmatizer
tokens = word_tokenize(text)
# Lemmatize each token based on the specified language
lemmatized_tokens = []
for token in tokens:
if language == "russian":
if not russian_lemmatizer:
russian_lemmatizer = MorphAnalyzer()
lemmatized_token = russian_lemmatizer.parse(token)[0].normal_form
else: # Default to English
if not english_lemmatizer:
english_lemmatizer = WordNetLemmatizer()
lemmatized_token = english_lemmatizer.lemmatize(token)
lemmatized_tokens.append(lemmatized_token)
# Remove stop words
filtered_tokens = [
token
for token in lemmatized_tokens
if token not in english_stopwords and token not in russian_stopwords
]
# Reconstruct the text
filtered_text = " ".join(filtered_tokens)
return filtered_text

View File

@ -1,8 +1,4 @@
from math import ceil
import magic
from PIL import Image, ImageDraw, ImageFont
from preview_generator.manager import PreviewManager
from akarpov.files.models import File
@ -19,90 +15,11 @@
manager = None
def textfile_to_image(textfile_path) -> Image:
"""Convert text file to a grayscale image.
arguments:
textfile_path - the content of this file will be converted to an image
font_path - path to a font file (for example impact.ttf)
"""
# parse the file into lines stripped of whitespace on the right side
with open(textfile_path) as f:
lines = tuple(line.rstrip() for line in f.readlines())
font: ImageFont = None
large_font = 20 # get better resolution with larger size
for font_filename in COMMON_MONO_FONT_FILENAMES:
try:
font = ImageFont.truetype(font_filename, size=large_font)
print(f'Using font "{font_filename}".')
break
except OSError:
print(f'Could not load font "{font_filename}".')
if font is None:
font = ImageFont.load_default()
print("Using default font.")
def _font_points_to_pixels(pt):
return round(pt * 96.0 / 72)
margin_pixels = 20
# height of the background image
tallest_line = max(lines, key=lambda line: font.getsize(line)[PIL_HEIGHT_INDEX])
max_line_height = _font_points_to_pixels(
font.getsize(tallest_line)[PIL_HEIGHT_INDEX]
)
realistic_line_height = max_line_height * 0.8
image_height = int(ceil(realistic_line_height * len(lines) + 2 * margin_pixels))
widest_line = max(lines, key=lambda s: font.getsize(s)[PIL_WIDTH_INDEX])
max_line_width = _font_points_to_pixels(font.getsize(widest_line)[PIL_WIDTH_INDEX])
image_width = int(ceil(max_line_width + (2 * margin_pixels)))
# draw the background
background_color = 255 # white
image = Image.new(
PIL_GRAYSCALE, (image_width, image_height), color=background_color
)
draw = ImageDraw.Draw(image)
font_color = 0
horizontal_position = margin_pixels
for i, line in enumerate(lines):
vertical_position = int(round(margin_pixels + (i * realistic_line_height)))
draw.text(
(horizontal_position, vertical_position), line, fill=font_color, font=font
)
return image
def create_preview(file_path: str) -> str:
global manager
# TODO: add text image generation/code image
if not manager:
manager = PreviewManager(cache_path, create_folder=True)
if manager.has_jpeg_preview(file_path):
return manager.get_jpeg_preview(file_path, height=500)
return ""
def get_file_mimetype(file_path: str) -> str:
mime = magic.Magic(mime=True)
return mime.from_file(file_path)
def get_description(file_path: str) -> str:
global manager
if not manager:
manager = PreviewManager(cache_path, create_folder=True)
if manager.has_text_preview(file_path):
return manager.get_text_preview(file_path)
return ""
def get_base_meta(file: File):
preview = file.preview.url if file.preview else ""
description = file.description if file.description else ""

View File

@ -11,12 +11,6 @@
from akarpov.files.models import File
from ..documents import FileDocument
from .lema import lemmatize_and_remove_stopwords
"""
Calculus on types of searches:
https://new.akarpov.ru/files/FZUTFBIyfbdlDHVzxUNU
"""
class BaseSearch:
@ -140,23 +134,20 @@ class SimilaritySearch(BaseSearch):
def search(self, query: str) -> QuerySet[File]:
if self.queryset is None:
raise ValueError("Queryset cannot be None for similarity search")
language = "russian" if re.search("[а-яА-Я]", query) else "english"
filtered_query = lemmatize_and_remove_stopwords(query, language=language)
queryset = (
self.queryset.annotate(
name_similarity=Coalesce(
TrigramSimilarity(UnaccentLower("name"), filtered_query),
TrigramSimilarity(UnaccentLower("name"), query),
Value(0),
output_field=FloatField(),
),
description_similarity=Coalesce(
TrigramSimilarity(UnaccentLower("description"), filtered_query),
TrigramSimilarity(UnaccentLower("description"), query),
Value(0),
output_field=FloatField(),
),
content_similarity=Coalesce(
TrigramSimilarity(UnaccentLower("content"), filtered_query),
TrigramSimilarity(UnaccentLower("content"), query),
Value(0),
output_field=FloatField(),
),

View File

@ -1,18 +0,0 @@
import chardet
import textract
from textract.exceptions import ExtensionNotSupported
def extract_file_text(file: str) -> str:
try:
text = textract.process(file)
except ExtensionNotSupported:
try:
rawdata = open(file, "rb").read()
enc = chardet.detect(rawdata)
with open(file, encoding=enc["encoding"]) as f:
text = f.read()
except Exception:
return ""
return text

View File

@ -1,40 +1,69 @@
import os
import base64
import time
from urllib.parse import urljoin
import requests
import structlog
from celery import shared_task
from django.conf import settings
from django.core import management
from django.core.files import File
from django.core.files.base import ContentFile
from akarpov.files.models import File as FileModel
from akarpov.files.services.preview import create_preview, get_file_mimetype
from akarpov.files.services.text import extract_file_text
logger = structlog.get_logger(__name__)
def sanitize_content(content):
"""Remove NUL (0x00) characters from the content."""
if isinstance(content, str):
return content.replace("\x00", "")
elif isinstance(content, bytes):
return content.replace(b"\x00", b"")
return content
@shared_task()
def process_file(pk: int):
pth = None
file = FileModel.objects.get(pk=pk)
if not file.name:
file.name = file.file.name.split("/")[-1]
try:
pth = create_preview(file.file.path)
if pth:
with open(pth, "rb") as f:
file.preview.save(
pth.split("/")[-1],
File(f),
save=False,
)
api_url = urljoin(settings.PREVIEW_SERVICE_URL, "/process_file/")
files = {"file": (file.name, file.file.open("rb"))}
headers = {
"X-API-Key": settings.PREVIEW_SERVICE_API_KEY,
"Accept": "application/json",
}
response = requests.post(api_url, files=files, headers=headers)
if response.status_code != 200:
logger.error(f"Failed to process file {pk}: {response.text}")
return
result = response.json()
file.file_type = result["file_type"]
file.content = sanitize_content(result["content"])
if result["preview"]:
image_data = base64.b64decode(result["preview"])
file.preview.save(
f"{file.name}_preview.jpg", ContentFile(image_data), save=False
)
file.save()
logger.info(f"File {pk} processed successfully")
except Exception as e:
logger.error(e)
file.file_type = get_file_mimetype(file.file.path)
file.content = extract_file_text(file.file.path)
file.save(update_fields=["preview", "name", "file_type", "content"])
if pth and os.path.isfile(pth):
os.remove(pth)
logger.error(f"Error processing file {pk}: {str(e)}")
finally:
file.file.close()
return pk

View File

@ -7,6 +7,8 @@
Album,
AnonMusicUser,
Author,
MusicDraft,
MusicDraftFile,
Playlist,
PlaylistSong,
Song,
@ -378,3 +380,61 @@ class Meta:
"link": {"read_only": True},
"image": {"read_only": True},
}
class AllSearchSerializer(serializers.Serializer):
songs = serializers.SerializerMethodField(method_name="get_songs")
authors = serializers.SerializerMethodField(method_name="get_authors")
albums = serializers.SerializerMethodField(method_name="get_albums")
@extend_schema_field(ListSongSerializer(many=True))
def get_songs(self, obj):
return ListSongSerializer(
Song.objects.cache().search(obj["query"]).to_queryset()[:10],
many=True,
context=self.context,
).data
@extend_schema_field(ListAuthorSerializer(many=True))
def get_authors(self, obj):
return ListAuthorSerializer(
Author.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data
@extend_schema_field(ListAlbumSerializer(many=True))
def get_albums(self, obj):
return ListAlbumSerializer(
Album.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data
class MusicDraftFileSerializer(serializers.ModelSerializer):
class Meta:
model = MusicDraftFile
fields = ["file", "original_name", "mime_type"]
class MusicDraftSerializer(serializers.ModelSerializer):
files = MusicDraftFileSerializer(many=True, read_only=True)
class Meta:
model = MusicDraft
fields = [
"id",
"status",
"provider",
"original_url",
"meta_data",
"file_token",
"created",
"updated",
"error_message",
"files",
]
read_only_fields = ["id", "file_token", "created", "updated"]
class MusicDraftCallbackSerializer(serializers.Serializer):
status = serializers.ChoiceField(choices=MusicDraft.STATUS_CHOICES)
meta_data = serializers.JSONField(required=False)
error_message = serializers.CharField(required=False)

View File

@ -16,11 +16,14 @@
ListSongPlaylistsAPIView,
ListSongSlugsAPIView,
ListUserListenedSongsAPIView,
MusicDraftCallbackView,
MusicDraftDetailView,
RemoveSongFromPlaylistAPIView,
RetrieveUpdateDestroyAlbumAPIView,
RetrieveUpdateDestroyAuthorAPIView,
RetrieveUpdateDestroyPlaylistAPIView,
RetrieveUpdateDestroySongAPIView,
SearchAllAPIView,
)
app_name = "music"
@ -80,4 +83,13 @@
name="retrieve_update_delete_author",
),
path("anon/create/", CreateAnonMusicUserAPIView.as_view(), name="create-anon"),
path("search/", SearchAllAPIView.as_view(), name="search_all"),
path(
"drafts/callback/<uuid:token>/",
MusicDraftCallbackView.as_view(),
name="draft-callback",
),
path(
"drafts/<str:file_token>/", MusicDraftDetailView.as_view(), name="draft-detail"
),
]

View File

@ -1,11 +1,13 @@
from drf_spectacular.utils import OpenApiExample, OpenApiParameter, extend_schema
from rest_framework import generics, permissions
from rest_framework import generics, permissions, status
from rest_framework.generics import get_object_or_404
from rest_framework.response import Response
from akarpov.common.api.pagination import StandardResultsSetPagination
from akarpov.common.api.permissions import IsAdminOrReadOnly, IsCreatorOrReadOnly
from akarpov.music.api.serializers import (
AddSongToPlaylistSerializer,
AllSearchSerializer,
AnonMusicUserSerializer,
FullAlbumSerializer,
FullAuthorSerializer,
@ -17,19 +19,23 @@
ListPlaylistSerializer,
ListSongSerializer,
ListSongSlugsSerializer,
MusicDraftCallbackSerializer,
MusicDraftSerializer,
PlaylistSerializer,
SongSerializer,
)
from akarpov.music.models import (
Album,
Author,
MusicDraft,
Playlist,
Song,
SongUserRating,
UserListenHistory,
)
from akarpov.music.services.search import search_song
from akarpov.music.tasks import listen_to_song
from akarpov.music.services.search import search_album, search_author, search_song
from akarpov.music.tasks import listen_to_song, process_draft_callback
from akarpov.users.models import User
class LikedSongsContextMixin(generics.GenericAPIView):
@ -351,7 +357,25 @@ class ListAlbumsAPIView(generics.ListAPIView):
serializer_class = ListAlbumSerializer
pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny]
queryset = Album.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_album(search)
return Album.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for albums",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAlbumAPIView(
@ -368,7 +392,25 @@ class ListAuthorsAPIView(generics.ListAPIView):
serializer_class = ListAuthorSerializer
pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny]
queryset = Author.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_author(search)
return Author.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for authors",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAuthorAPIView(
@ -391,12 +433,27 @@ def get_queryset(self):
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=False)
data = serializer.validated_data
try:
song = Song.objects.cache().get(slug=data["song"])
song = Song.objects.cache().get(slug=self.request.data.get("song", ""))
except Song.DoesNotExist:
return Response(status=404)
try:
user_id = self.request.data.get("user_id", None)
if user_id:
user_id_int = None
try:
user_id_int = int(user_id)
except ValueError:
...
if user_id_int:
user = User.objects.cache().get(id=user_id_int)
if user != self.request.user:
return Response(status=403)
except User.DoesNotExist:
...
if self.request.user.is_authenticated:
listen_to_song.apply_async(
kwargs={
@ -406,11 +463,11 @@ def post(self, request, *args, **kwargs):
},
countdown=2,
)
elif "user_id" in data:
elif "user_id" in self.request.data:
listen_to_song.apply_async(
kwargs={
"song_id": song.id,
"user_id": data["user_id"],
"user_id": self.request.data.get("user_id", None),
"anon": True,
},
countdown=2,
@ -439,3 +496,88 @@ def get_queryset(self):
class CreateAnonMusicUserAPIView(generics.CreateAPIView):
serializer_class = AnonMusicUserSerializer
permission_classes = [permissions.AllowAny]
class SearchAllAPIView(LikedSongsContextMixin, generics.GenericAPIView):
permission_classes = [permissions.AllowAny]
serializer_class = AllSearchSerializer
def get_serializer_context(self):
context = super().get_serializer_context()
context["request"] = self.request
return context
@extend_schema(
parameters=[
OpenApiParameter(
name="query",
description="Search query",
required=True,
type=str,
),
],
responses={
200: AllSearchSerializer,
},
)
def get(self, request, *args, **kwargs):
query = request.query_params.get("query", "").strip()
if not query:
return Response({"songs": [], "albums": [], "authors": []})
songs = search_song(query)[:10] # Top 10 songs
albums = search_album(query)[:5] # Top 5 albums
authors = search_author(query)[:5] # Top 5 authors
song_serializer = ListSongSerializer(
songs, many=True, context=self.get_serializer_context()
)
album_serializer = ListAlbumSerializer(
albums, many=True, context=self.get_serializer_context()
)
author_serializer = ListAuthorSerializer(
authors, many=True, context=self.get_serializer_context()
)
return Response(
{
"songs": song_serializer.data,
"albums": album_serializer.data,
"authors": author_serializer.data,
}
)
class MusicDraftCallbackView(generics.GenericAPIView):
serializer_class = MusicDraftCallbackSerializer
@extend_schema(
description="Callback endpoint for external music service",
parameters=[
OpenApiParameter(
name="token",
type=str,
location=OpenApiParameter.PATH,
description="Draft callback token",
),
],
)
def post(self, request, token):
draft = get_object_or_404(MusicDraft, callback_token=token)
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
process_draft_callback.delay(
draft_id=str(draft.id),
status=serializer.validated_data["status"],
meta_data=serializer.validated_data.get("meta_data"),
error_message=serializer.validated_data.get("error_message"),
)
return Response(status=status.HTTP_202_ACCEPTED)
class MusicDraftDetailView(generics.RetrieveAPIView):
queryset = MusicDraft.objects.all()
serializer_class = MusicDraftSerializer
lookup_field = "file_token"

View File

@ -1,7 +1,7 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from akarpov.music.models import Song
from akarpov.music.models import Album, Author, Song
@registry.register_document
@ -14,6 +14,12 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
@ -27,6 +33,12 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
@ -35,9 +47,18 @@ class SongDocument(Document):
name = fields.TextField(
attr="name",
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
@ -48,13 +69,17 @@ class Index:
"number_of_replicas": 0,
"analysis": {
"filter": {
"my_transliterator": {
"type": "icu_transform",
"id": "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC",
},
"russian_stop": {
"type": "stop",
"stopwords": "_russian_",
},
"russian_keywords": {
"type": "keyword_marker",
"keywords": ["пример"],
"keywords": ["песня", "музыка", "певец", "альбом"],
},
"russian_stemmer": {
"type": "stemmer",
@ -80,6 +105,13 @@ class Index:
},
},
"analyzer": {
"transliterate": {
"tokenizer": "standard",
"filter": [
"lowercase",
"my_transliterator",
],
},
"russian": {
"tokenizer": "standard",
"filter": [
@ -137,3 +169,74 @@ def get_instances_from_related(self, related_instance):
if isinstance(related_instance, Song):
return related_instance.album
return related_instance.songs.all()
@registry.register_document
class AuthorDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
class Index:
name = "authors"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Author
@registry.register_document
class AlbumDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
authors = fields.NestedField(
attr="authors",
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
)
class Index:
name = "albums"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Album

View File

@ -0,0 +1,89 @@
# Generated by Django 4.2.16 on 2024-10-26 10:37
from django.db import migrations, models
import django.db.models.deletion
import uuid
class Migration(migrations.Migration):
dependencies = [
("music", "0016_anonmusicuser_song_created_song_volume_and_more"),
]
operations = [
migrations.CreateModel(
name="MusicDraft",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
(
"status",
models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("failed", "Failed"),
("complete", "Complete"),
],
default="pending",
max_length=20,
),
),
(
"provider",
models.CharField(
choices=[
("spotify", "Spotify"),
("yandex", "Yandex"),
("youtube", "YouTube"),
],
max_length=20,
),
),
("original_url", models.URLField()),
("meta_data", models.JSONField(blank=True, null=True)),
("file_token", models.CharField(max_length=100, unique=True)),
("created", models.DateTimeField(auto_now_add=True)),
("updated", models.DateTimeField(auto_now=True)),
("error_message", models.TextField(blank=True, null=True)),
("user_id", models.IntegerField(null=True)),
(
"callback_token",
models.UUIDField(default=uuid.uuid4, editable=False),
),
],
),
migrations.CreateModel(
name="MusicDraftFile",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("file", models.FileField(upload_to="music_drafts/")),
("original_name", models.CharField(max_length=255)),
("mime_type", models.CharField(max_length=100)),
("created", models.DateTimeField(auto_now_add=True)),
(
"draft",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="files",
to="music.musicdraft",
),
),
],
),
]

View File

@ -1,6 +1,7 @@
import uuid
from django.contrib.postgres.fields import ArrayField
from django.contrib.sites.models import Site
from django.db import models
from django.urls import reverse
@ -86,6 +87,11 @@ def album_name(self):
def artists_names(self):
return cache_model_property(self, "_authors_names")
def get_first_author_name(self):
if self.authors:
return self.authors.first().name
return ""
def __str__(self):
return self.name
@ -93,6 +99,50 @@ class SlugMeta:
slug_length = 10
class MusicDraft(models.Model):
STATUS_CHOICES = (
("pending", "Pending"),
("processing", "Processing"),
("failed", "Failed"),
("complete", "Complete"),
)
PROVIDER_CHOICES = (
("spotify", "Spotify"),
("yandex", "Yandex"),
("youtube", "YouTube"),
)
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default="pending")
provider = models.CharField(max_length=20, choices=PROVIDER_CHOICES)
original_url = models.URLField()
meta_data = models.JSONField(null=True, blank=True)
file_token = models.CharField(max_length=100, unique=True)
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
error_message = models.TextField(null=True, blank=True)
user_id = models.IntegerField(null=True)
callback_token = models.UUIDField(default=uuid.uuid4, editable=False)
def get_callback_url(self):
site = Site.objects.get_current()
path = reverse(
"music:api:draft-callback", kwargs={"token": self.callback_token}
)
return f"https://{site.domain}{path}"
class MusicDraftFile(models.Model):
draft = models.ForeignKey(
MusicDraft, on_delete=models.CASCADE, related_name="files"
)
file = models.FileField(upload_to="music_drafts/")
original_name = models.CharField(max_length=255)
mime_type = models.CharField(max_length=100)
created = models.DateTimeField(auto_now_add=True)
class Playlist(ShortLinkModel, UserHistoryModel):
name = models.CharField(max_length=200)
private = models.BooleanField(default=True)

View File

@ -2,7 +2,11 @@
import re
import requests
from deep_translator import GoogleTranslator
try:
from deep_translator import GoogleTranslator # TODO: move to another service
except requests.exceptions.JSONDecodeError:
print("Failed to initialize GoogleTranslator due to external API issues.")
from django.core.files import File
from django.db import transaction
from django.utils.text import slugify
@ -122,12 +126,15 @@ def load_track(
album=album,
):
return sng.first()
if not path.endswith(".mp3"):
mp3_path = path.replace(path.split(".")[-1], "mp3")
AudioSegment.from_file(path).export(mp3_path)
os.remove(path)
path = mp3_path
try:
if not path.endswith(".mp3"):
mp3_path = path.replace(path.split(".")[-1], "mp3")
AudioSegment.from_file(path).export(mp3_path)
os.remove(path)
path = mp3_path
except Exception as e:
print(e)
return Song.objects.none()
tag = MP3(path, ID3=ID3)

View File

@ -0,0 +1,84 @@
import os
from django.core.files import File
from akarpov.music.models import Album, Author, MusicDraft, Song
def save_song_from_draft(draft: MusicDraft) -> Song | None:
"""
Create a Song instance from a completed MusicDraft
"""
try:
if not draft.files.exists():
draft.status = "failed"
draft.error_message = "No files associated with draft"
draft.save()
return None
# Get the music file
draft_file = draft.files.filter(mime_type__startswith="audio/").first()
if not draft_file:
draft.status = "failed"
draft.error_message = "No audio file found in draft"
draft.save()
return None
# Get metadata from draft
meta_data = draft.meta_data or {}
# Create song instance
song = Song(
name=meta_data.get("title", ""),
length=meta_data.get("length"),
link=draft.original_url,
meta=meta_data,
)
# Handle album
if "album" in meta_data:
album, _ = Album.objects.get_or_create(
name=meta_data["album"].get("name", ""),
defaults={"meta": meta_data["album"]},
)
song.album = album
# Save song to get an ID
with open(draft_file.file.path, "rb") as f:
song.file.save(
os.path.basename(draft_file.original_name), File(f), save=True
)
# Handle authors
if "artists" in meta_data:
authors = []
for artist_data in meta_data["artists"]:
author, _ = Author.objects.get_or_create(
name=artist_data.get("name", ""), defaults={"meta": artist_data}
)
authors.append(author)
song.authors.set(authors)
# Handle image if present
image_file = draft.files.filter(mime_type__startswith="image/").first()
if image_file:
with open(image_file.file.path, "rb") as f:
song.image.save(
os.path.basename(image_file.original_name), File(f), save=True
)
# Add user if specified
if draft.user_id:
song.creator_id = draft.user_id
song.save()
# Clean up draft files
draft.delete()
return song
except Exception as e:
draft.status = "failed"
draft.error_message = str(e)
draft.save()
return None

View File

@ -3,10 +3,15 @@
import requests
import spotipy
from deep_translator import GoogleTranslator
try:
from deep_translator import GoogleTranslator
except requests.exceptions.JSONDecodeError:
print("Failed to initialize GoogleTranslator due to external API issues.")
from django.conf import settings
from django.core.files import File
from django.db import transaction
from django.db.models import Model
from django.utils.text import slugify
from spotipy import SpotifyClientCredentials
from yandex_music import Client, Cover
@ -17,30 +22,33 @@
from akarpov.utils.text import is_similar_artist, normalize_text
def generate_readable_slug(name: str, model) -> str:
def generate_readable_slug(name: str, model: Model) -> str:
# Translate and slugify the name
slug = str(
slugify(
GoogleTranslator(source="auto", target="en").translate(
name,
target_language="en",
)
)
)
slug = safe_translate(name)
# Truncate slug if it's too long
if len(slug) > 20:
slug = slug[:20]
last_dash = slug.rfind("-")
if last_dash != -1:
slug = slug[:last_dash]
original_slug = slug
# Ensure uniqueness
counter = 1
while model.objects.filter(slug=slug).exists():
if len(slug) > 14:
slug = slug[:14]
last_dash = slug.rfind("-")
if len(original_slug) > 14:
truncated_slug = original_slug[:14]
last_dash = truncated_slug.rfind("-")
if last_dash != -1:
slug = slug[:last_dash]
slug = slug + "_" + generate_charset(5)
truncated_slug = truncated_slug[:last_dash]
else:
truncated_slug = original_slug
suffix = f"_{generate_charset(5)}" if counter == 1 else f"_{counter}"
slug = f"{truncated_slug}{suffix}"
counter += 1
return slug
@ -213,16 +221,12 @@ def update_album_info(album: AlbumModel, author_name: str = None) -> None:
client = yandex_login()
spotify_session = create_spotify_session()
if author_name:
yandex_album_info = get_yandex_album_info(
album.name + " - " + author_name, client
)
spotify_album_info = get_spotify_album_info(
album.name + " - " + author_name, spotify_session
)
else:
yandex_album_info = get_yandex_album_info(album.name, client)
spotify_album_info = get_spotify_album_info(album.name, spotify_session)
search_term = f"{album.name} - {author_name}" if author_name else album.name
yandex_album_info = get_api_info(get_yandex_album_info, search_term, client)
spotify_album_info = get_api_info(
get_spotify_album_info, search_term, spotify_session
)
# Combine and prioritize Spotify data
album_data = {}
@ -232,14 +236,14 @@ def update_album_info(album: AlbumModel, author_name: str = None) -> None:
"name": spotify_album_info.get("name", album.name),
"release_date": spotify_album_info.get("release_date", ""),
"total_tracks": spotify_album_info.get("total_tracks", ""),
"link": spotify_album_info["external_urls"]["spotify"],
"link": spotify_album_info.get("external_urls", {}).get("spotify", ""),
"genre": spotify_album_info.get("genres", []),
}
if yandex_album_info:
album_data.update(
{
"name": album_data.get("name", yandex_album_info.title),
"genre": album_data.get("genre", yandex_album_info.genre),
"name": album_data.get("name") or yandex_album_info.title,
"genre": album_data.get("genre") or yandex_album_info.genre,
"description": yandex_album_info.description,
"type": yandex_album_info.type,
}
@ -249,102 +253,120 @@ def update_album_info(album: AlbumModel, author_name: str = None) -> None:
album.save()
# Handle Album Image - Prefer Spotify, fallback to Yandex
image_path = None
if (
spotify_album_info
and "images" in spotify_album_info
and spotify_album_info["images"]
):
image_path = download_image(
spotify_album_info["images"][0]["url"], settings.MEDIA_ROOT
)
elif yandex_album_info and yandex_album_info.cover_uri:
image_path = download_image(
"https://" + yandex_album_info.cover_uri, settings.MEDIA_ROOT
)
generated_name = slugify(
GoogleTranslator(source="auto", target="en").translate(
album.name,
target_language="en",
)
)
image_path = get_album_image(spotify_album_info, yandex_album_info)
if image_path:
save_album_image(album, image_path)
# Update Album Authors from Spotify data if available
if spotify_album_info and "artists" in spotify_album_info:
update_album_authors(album, spotify_album_info["artists"])
album.slug = generate_readable_slug(album.name, AlbumModel)
album.save()
def get_album_image(spotify_info, yandex_info):
if spotify_info and "images" in spotify_info and spotify_info["images"]:
return download_image(spotify_info["images"][0]["url"], settings.MEDIA_ROOT)
elif yandex_info and yandex_info.cover_uri:
return download_image("https://" + yandex_info.cover_uri, settings.MEDIA_ROOT)
return None
def save_album_image(album, image_path):
if not image_path:
return
try:
generated_name = safe_translate(album.name)
with open(image_path, "rb") as f:
album.image.save(
generated_name + ".png",
File(
f,
name=generated_name + ".png",
),
File(f, name=generated_name + ".png"),
save=True,
)
os.remove(image_path)
album.save()
except Exception as e:
print(f"Error saving album image: {str(e)}")
# Update Album Authors from Spotify data if available
if spotify_album_info and "artists" in spotify_album_info:
album_authors = []
for artist in spotify_album_info["artists"]:
author, created = Author.objects.get_or_create(name=artist["name"])
album_authors.append(author)
album.authors.set(album_authors)
album.slug = generate_readable_slug(album.name, AlbumModel)
album.save()
def update_album_authors(album, artists):
album_authors = []
for artist in artists:
author, created = Author.objects.get_or_create(name=artist["name"])
album_authors.append(author)
album.authors.set(album_authors)
def update_author_info(author: Author) -> None:
client = yandex_login()
spotify_session = create_spotify_session()
# Retrieve info from both services
yandex_artist_info = get_yandex_artist_info(author.name, client)
spotify_artist_info = get_spotify_artist_info(author.name, spotify_session)
yandex_artist_info = get_api_info(get_yandex_artist_info, author.name, client)
spotify_artist_info = get_api_info(
get_spotify_artist_info, author.name, spotify_session
)
# Combine and prioritize Spotify data
author_data = {}
if spotify_artist_info:
author_data = {
"name": spotify_artist_info.get("name", author.name),
"genres": spotify_artist_info.get("genres", []),
"popularity": spotify_artist_info.get("popularity", 0),
"link": spotify_artist_info["external_urls"]["spotify"],
}
if yandex_artist_info:
author_data.update(
{
"name": author_data.get("name", yandex_artist_info.name),
"genres": author_data.get("genres", yandex_artist_info.genres),
"description": yandex_artist_info.description,
}
)
author_data = combine_artist_data(author, spotify_artist_info, yandex_artist_info)
author.meta = author_data
with transaction.atomic():
author.meta = author_data
author.save()
image_path = get_author_image(spotify_artist_info, yandex_artist_info)
if image_path:
save_author_image(author, image_path)
author.slug = generate_readable_slug(author.name, Author)
with transaction.atomic():
author.save()
# Handle Author Image - Prefer Spotify, fallback to Yandex
image_path = None
if (
spotify_artist_info
and "images" in spotify_artist_info
and spotify_artist_info["images"]
):
image_path = download_image(
spotify_artist_info["images"][0]["url"], settings.MEDIA_ROOT
)
elif yandex_artist_info and yandex_artist_info.cover:
image_path = download_image(yandex_artist_info.cover, settings.MEDIA_ROOT)
generated_name = slugify(
GoogleTranslator(source="auto", target="en").translate(
author.name,
target_language="en",
def get_api_info(api_func, search_term, session):
try:
return api_func(search_term, session)
except Exception as e:
print(f"Error fetching info from {api_func.__name__}: {str(e)}")
return None
def combine_artist_data(author, spotify_info, yandex_info):
author_data = {}
if spotify_info:
author_data = {
"name": spotify_info.get("name", author.name),
"genres": spotify_info.get("genres", []),
"popularity": spotify_info.get("popularity", 0),
"link": spotify_info.get("external_urls", {}).get("spotify", ""),
}
if yandex_info:
author_data.update(
{
"name": author_data.get("name") or yandex_info.name,
"genres": author_data.get("genres") or yandex_info.genres,
"description": yandex_info.description,
}
)
)
if image_path:
return author_data
def get_author_image(spotify_info, yandex_info):
if spotify_info and "images" in spotify_info and spotify_info["images"]:
return download_image(spotify_info["images"][0]["url"], settings.MEDIA_ROOT)
elif yandex_info and yandex_info.cover:
return download_image(yandex_info.cover, settings.MEDIA_ROOT)
return None
def save_author_image(author, image_path):
if not image_path:
return
try:
generated_name = safe_translate(author.name)
with open(image_path, "rb") as f:
author.image.save(
generated_name + ".png",
@ -353,21 +375,29 @@ def update_author_info(author: Author) -> None:
)
os.remove(image_path)
author.save()
except Exception as e:
print(f"Error saving author image: {str(e)}")
author.slug = generate_readable_slug(author.name, Author)
with transaction.atomic():
author.save()
def safe_translate(text):
try:
translated = GoogleTranslator(source="auto", target="en").translate(text)
return slugify(translated)
except Exception as e:
print(f"Error translating text: {str(e)}")
return slugify(text)
def search_all_platforms(track_name: str) -> dict:
print(track_name)
session = spotipy.Spotify(
auth_manager=spotipy.SpotifyClientCredentials(
client_id=settings.MUSIC_SPOTIFY_ID,
client_secret=settings.MUSIC_SPOTIFY_SECRET,
)
)
spotify_info = get_spotify_info(track_name, session)
# session = spotipy.Spotify(
# auth_manager=spotipy.SpotifyClientCredentials(
# client_id=settings.MUSIC_SPOTIFY_ID,
# client_secret=settings.MUSIC_SPOTIFY_SECRET,
# )
# )
# spotify_info = get_spotify_info(track_name, session)
spotify_info = {} # TODO: add proxy for info retrieve
yandex_info = search_yandex(track_name)
if "album_image_path" in spotify_info and "album_image_path" in yandex_info:
os.remove(yandex_info["album_image_path"])

View File

@ -1,48 +1,156 @@
from django.core.cache import cache
from django.db.models import Case, When
from django_elasticsearch_dsl.registries import registry
from elasticsearch_dsl import Q as ES_Q
from akarpov.music.documents import SongDocument
from akarpov.music.models import Song
from akarpov.music.documents import AlbumDocument, AuthorDocument, SongDocument
from akarpov.music.models import Album, Author, Song
def search_song(query):
search = SongDocument.search()
search_query = ES_Q(
"bool",
should=[
ES_Q(
"multi_match",
query=query,
fields=["name^5", "authors.name^3", "album.name^3"],
fuzziness="AUTO",
),
ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", authors__name__raw=f"*{query.lower()}*"),
),
ES_Q(
"nested",
path="album",
query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"),
),
ES_Q("wildcard", meta__raw=f"*{query.lower()}*"),
],
minimum_should_match=1,
)
search = search.query(search_query)
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q(
"nested",
path="authors",
query=ES_Q("match_phrase", name={"query": query, "boost": 4}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match_phrase", name={"query": query, "boost": 4}),
),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q(
"nested",
path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
ES_Q(
"nested",
path="authors",
query=ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
),
),
ES_Q(
"nested",
path="album",
query=ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
),
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=20)
response = search.execute()
# Check for hits and get song instances
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
songs = Song.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return songs
return Song.objects.none()
def autocomplete_search(query):
s = SongDocument.search()
s = s.suggest("song_suggest", query, completion={"field": "suggest"})
suggestions = s.execute().suggest.song_suggest[0].options
return [option.text for option in suggestions]
def get_popular_songs():
if "popular_songs" in cache:
return cache.get("popular_songs")
else:
songs = Song.objects.filter(played__gt=300).order_by("-played")[:10]
cache.set("popular_songs", songs, timeout=3600)
return songs
def bulk_update_index(model_class):
qs = model_class.objects.all()
registry.update(qs, bulk_size=100)
def search_author(query):
search = AuthorDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
authors = Author.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return authors
return Author.objects.none()
def search_album(query):
search = AlbumDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
albums = Album.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return albums
return Album.objects.none()

View File

@ -75,13 +75,40 @@ def load_file_meta(track: int, user_id: int) -> str:
return str(song)
def load_playlist(link: str, user_id: int):
author = link.split("/")[4]
playlist_id = link.split("/")[-1]
def load_url(link: str, user_id: int):
client = login()
playlist = client.users_playlists(int(playlist_id), author) # type: Playlist
for track in playlist.fetch_tracks():
obj_id = link.split("/")[-1]
obj_id = obj_id.split("?")[0]
try:
obj_id = int(obj_id)
except ValueError:
print("Invalid link")
return None
if "/playlists/" in link:
author = link.split("/")[4]
playlist = client.users_playlists(obj_id, author) # type: Playlist
for track in playlist.fetch_tracks():
tasks.load_ym_file_meta.apply_async(
kwargs={"track": track.track.id, "user_id": user_id}
)
elif "/album/" in link:
album = client.albums_with_tracks(obj_id)
for volume in album.volumes:
for track in volume:
tasks.load_ym_file_meta.apply_async(
kwargs={"track": track.id, "user_id": user_id}
)
elif "/artist/" in link:
artist = client.artists(obj_id)[0]
albums = artist.get_albums(page_size=100)
for album in albums:
for track in album.fetch_tracks():
tasks.load_ym_file_meta.apply_async(
kwargs={"track": track.id, "user_id": user_id}
)
else:
tasks.load_ym_file_meta.apply_async(
kwargs={"track": track.track.id, "user_id": user_id}
kwargs={"track": obj_id, "user_id": user_id}
)

View File

@ -2,103 +2,190 @@
from urllib.parse import parse_qs, urlparse
import pylast
import requests
import spotipy
import structlog
import ytmusicapi
from asgiref.sync import async_to_sync
from celery import shared_task
from channels.layers import get_channel_layer
from django.conf import settings
from django.shortcuts import get_object_or_404
from django.utils import timezone
from django.utils.timezone import now
from spotipy import SpotifyClientCredentials
from ytmusicapi import YTMusic
from akarpov.music.api.serializers import SongSerializer
from akarpov.music.models import (
AnonMusicUser,
AnonMusicUserHistory,
MusicDraft,
RadioSong,
Song,
UserListenHistory,
UserMusicProfile,
)
from akarpov.music.services import spotify, yandex, youtube
from akarpov.music.services.drafts import save_song_from_draft
from akarpov.music.services.file import load_dir, load_file
from akarpov.utils.celery import get_scheduled_tasks_name
logger = structlog.get_logger(__name__)
@shared_task(soft_time_limit=60 * 20, time_limit=60 * 30)
def list_tracks(url, user_id):
if "music.youtube.com" in url or "youtu.be" in url:
url = url.replace("music.youtube.com", "youtube.com")
url = url.replace("youtu.be", "youtube.com")
if "spotify.com" in url:
spotify.download_url(url, user_id)
elif "music.yandex.ru" in url:
yandex.load_playlist(url, user_id)
if "youtube.com" in url:
if "channel" in url or "/c/" in url:
ytmusic = ytmusicapi.YTMusic()
channel_id = url.split("/")[-1]
channel_songs = ytmusic.get_artist(channel_id)["songs"]["results"]
@shared_task(soft_time_limit=60 * 60, time_limit=60 * 120)
def list_tracks(self, url: str, user_id: int | None = None) -> str | None:
"""Update list_tracks to handle failures"""
try:
url = normalize_url(url)
handlers = {
"spotify.com": handle_spotify,
"music.yandex.ru": handle_yandex,
"youtube.com": handle_youtube,
}
for song in channel_songs:
process_yb.apply_async(
kwargs={
"url": f"https://youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
)
for domain, handler in handlers.items():
if domain in url:
return handler(url, user_id)
elif "playlist" in url or "&list=" in url:
ytmusic = ytmusicapi.YTMusic()
return fallback_search(url, user_id)
except Exception as e:
draft = MusicDraft.objects.create(
provider="unknown",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
# Parse the URL and the query string
parsed_url = urlparse(url)
parsed_qs = parse_qs(parsed_url.query)
# Get the playlist ID from the parsed query string
playlist_id = parsed_qs.get("list", [None])[0]
def normalize_url(url):
return url.replace("music.youtube.com", "youtube.com").replace(
"youtu.be", "youtube.com"
)
if playlist_id:
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
else:
raise ValueError("No playlist ID found in the URL.")
for song in playlist_songs:
process_yb.apply_async(
kwargs={
"url": f"https://music.youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
)
else:
process_yb.apply_async(kwargs={"url": url, "user_id": user_id})
def handle_spotify(url: str, user_id: int | None = None) -> str | None:
return download_spotify_url.delay(url, user_id)
def handle_yandex(url: str, user_id: int | None = None) -> str | None:
return load_yandex_url.delay(url, user_id)
def handle_youtube(url: str, user_id: int | None = None) -> str | None:
"""Handle YouTube downloads"""
if "channel" in url or "/c/" in url:
return handle_youtube_channel(url, user_id)
elif "playlist" in url or "&list=" in url:
return handle_youtube_playlist(url, user_id)
else:
spotify_manager = SpotifyClientCredentials(
client_id=settings.MUSIC_SPOTIFY_ID,
client_secret=settings.MUSIC_SPOTIFY_SECRET,
)
spotify_search = spotipy.Spotify(client_credentials_manager=spotify_manager)
return process_yb.delay(url, user_id)
results = spotify_search.search(q=url, type="track", limit=1)
top_track = (
results["tracks"]["items"][0] if results["tracks"]["items"] else None
)
if top_track:
spotify.download_url(top_track["external_urls"]["spotify"], user_id)
url = top_track["external_urls"]["spotify"]
def handle_youtube_channel(url, user_id):
ytmusic = YTMusic()
channel_id = url.split("/")[-1]
channel_songs = ytmusic.get_artist(channel_id)["songs"]["results"]
for song in channel_songs:
process_yb.apply_async(
kwargs={
"url": f"https://youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
)
return url
def handle_youtube_playlist(url, user_id):
ytmusic = YTMusic()
parsed_url = urlparse(url)
parsed_qs = parse_qs(parsed_url.query)
playlist_id = parsed_qs.get("list", [None])[0]
if not playlist_id:
raise ValueError("No playlist ID found in the URL.")
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
for song in playlist_songs:
process_yb.apply_async(
kwargs={
"url": f"https://music.youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
)
return url
def fallback_search(url, user_id):
spotify_manager = SpotifyClientCredentials(
client_id=settings.MUSIC_SPOTIFY_ID,
client_secret=settings.MUSIC_SPOTIFY_SECRET,
)
spotify_search = spotipy.Spotify(client_credentials_manager=spotify_manager)
results = spotify_search.search(q=url, type="track", limit=1)
top_track = results["tracks"]["items"][0] if results["tracks"]["items"] else None
if top_track:
spotify_url = top_track["external_urls"]["spotify"]
spotify.download_url(spotify_url, user_id)
return spotify_url
return url
@shared_task(max_retries=5)
def process_yb(url, user_id):
youtube.download_from_youtube_link(url, user_id)
return url
@shared_task(bind=True)
def process_yb(self, url: str, user_id: int | None = None) -> str | None:
"""Update YouTube download to handle failures"""
try:
return str(youtube.download_from_youtube_link(url, user_id))
except Exception as e:
draft = MusicDraft.objects.create(
provider="youtube",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
@shared_task(bind=True)
def download_spotify_url(self, url: str, user_id: int | None = None) -> str | None:
try:
return spotify.download_url(url, user_id)
except Exception as e:
draft = MusicDraft.objects.create(
provider="spotify",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
@shared_task(bind=True)
def load_yandex_url(self, url: str, user_id: int | None = None) -> str | None:
try:
return yandex.load_url(url, user_id)
except Exception as e:
draft = MusicDraft.objects.create(
provider="yandex",
original_url=url,
user_id=user_id,
status="pending",
error_message=str(e),
)
handle_download_failure.delay(str(draft.id), self.request.id)
return None
@shared_task
@ -211,14 +298,71 @@ def listen_to_song(song_id, user_id=None, anon=True):
session_key=lastfm_token,
)
song = Song.objects.get(id=song_id)
artist_name = song.artists_names
artist_name = song.get_first_author_name()
track_name = song.name
album_name = song.album.name
timestamp = int(timezone.now().timestamp())
network.scrobble(
artist=artist_name, title=track_name, timestamp=timestamp
artist=artist_name,
title=track_name,
timestamp=timestamp,
album=album_name,
)
network.update_now_playing(
artist=artist_name, title=track_name, album=album_name
)
except UserMusicProfile.DoesNotExist:
pass
except Exception as e:
logger.error(f"Last.fm scrobble error: {e}")
return song_id
@shared_task
def handle_download_failure(draft_id: str, original_task_id: str):
"""
Handle failed downloads by sending request to external service
"""
draft = MusicDraft.objects.get(id=draft_id)
external_service_url = "http://music-download-service/api/v1/download"
response = requests.post(
external_service_url,
json={
"url": draft.original_url,
"provider": draft.provider,
"callback_url": draft.get_callback_url(),
"file_token": draft.file_token,
},
)
if response.status_code != 202:
draft.status = "failed"
draft.error_message = f"External service request failed: {response.text}"
draft.save()
return False
draft.status = "processing"
draft.save()
return True
@shared_task
def process_draft_callback(
draft_id: str, status: str, meta_data: dict = None, error_message: str = None
):
"""
Process callback from external service
"""
draft = get_object_or_404(MusicDraft, id=draft_id)
draft.status = status
if meta_data:
draft.meta_data = meta_data
if error_message:
draft.error_message = error_message
draft.save()
if status == "complete":
save_song_from_draft(draft)

View File

@ -2,7 +2,7 @@
{% block content %}
<h1>Welcome to music app</h1>
<p>This is mainly the backend of music, you should consider using side clients like: <a href="https://next.akarpov.ru/music">otomir23's client</a></p>
<p>This is mainly the backend of music, you should consider using side clients like: <a href="https://next.akarpov.ru/music">otomir23's client</a> or my <a href="https://t.me/akarpov_music_bot">inline telegram bot</a></p>
{% if request.user.is_authenticated %}
{% if last_fm_account %}
<p>Last.fm connected to {{ last_fm_account }}, <a href="{% url 'music:lastfm_connect' %}">reconnect</a></p>

View File

@ -1,6 +1,8 @@
from drf_spectacular.extensions import OpenApiAuthenticationExtension
from drf_spectacular.plumbing import build_bearer_security_scheme_object
from rest_framework.authentication import BaseAuthentication
from akarpov.users.models import UserAPIToken
from akarpov.users.models import User, UserAPIToken
from akarpov.users.tasks import set_last_active_token
@ -19,4 +21,14 @@ def authenticate(self, request):
return None
set_last_active_token.delay(token.token)
return token.user, token
return User.objects.cache().get(id=token.user_id), token
class UserTokenAuthenticationExtension(OpenApiAuthenticationExtension):
target_class = "akarpov.users.api.authentification.UserTokenAuthentication"
name = "UserTokenAuthentication"
def get_security_definition(self, auto_schema):
return build_bearer_security_scheme_object(
header_name="Authorization", token_prefix="Bearer"
)

View File

@ -18,6 +18,8 @@
)
from akarpov.users.models import User
from .authentification import UserTokenAuthentication # noqa: F401
class UserRegisterAPIViewSet(generics.CreateAPIView):
"""Creates new user and sends verification email"""

View File

@ -214,23 +214,16 @@ def list_tokens(request):
@login_required
def create_token(request):
initial_data = {}
# Обработка параметров 'name' и 'active_until'
if "name" in request.GET:
initial_data["name"] = request.GET["name"]
if "active_until" in request.GET:
initial_data["active_until"] = request.GET["active_until"]
# Создаем QueryDict для разрешений, чтобы правильно обработать повторяющиеся ключи
permissions_query_dict = QueryDict("", mutable=True)
# Разбор параметров разрешений
permissions = request.GET.getlist("permissions")
for perm in permissions:
category, permission = perm.split(".")
permissions_query_dict.update({f"permissions_{category}": [permission]})
# Переводим QueryDict в обычный словарь для использования в initial
permissions_data = {key: value for key, value in permissions_query_dict.lists()}
initial_data.update(permissions_data)
@ -242,7 +235,6 @@ def create_token(request):
initial=initial_data, permissions_context=UserAPIToken.permission_template
)
if request.method == "POST":
print(request.POST)
form = TokenCreationForm(request.POST)
if form.is_valid():
new_token = form.save(commit=False)

View File

@ -28,8 +28,6 @@ RUN apt-get update && \
apt-get install -y build-essential libpq-dev gettext libmagic-dev libjpeg-dev zlib1g-dev && \
# Dependencies for file preview generation
apt-get install -y webp git libimage-exiftool-perl libmagickwand-dev ffmpeg libgdal-dev && \
# ML dependencies \
# none for now
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
rm -rf /var/lib/apt/lists/*
@ -48,7 +46,6 @@ RUN poetry export --without-hashes -f requirements.txt | /venv/bin/pip install -
COPY . .
RUN poetry build && /venv/bin/pip install dist/*.whl
RUN /venv/bin/python -m nltk.downloader punkt stopwords wordnet
COPY ./compose/production/django/entrypoint /entrypoint
@ -71,10 +68,6 @@ COPY ./compose/local/django/start-redirect /start-redirect
RUN sed -i 's/\r$//g' /start-redirect
RUN chmod +x /start-redirect
COPY ./compose/local/django/install_preview_dependencies /install_preview_dependencies
RUN sed -i 's/\r$//g' /install_preview_dependencies
RUN chmod +x /install_preview_dependencies
COPY ./compose/local/django/celery/worker/start /start-celeryworker
RUN sed -i 's/\r$//g' /start-celeryworker
RUN chmod +x /start-celeryworker

View File

@ -3,6 +3,4 @@
set -o errexit
set -o nounset
/install_preview_dependencies
celery -A config.celery_app worker --autoscale 20 -l INFO

View File

@ -1,14 +0,0 @@
#!/bin/bash
apt-get update
apt-get install wget libnotify4 scribus libappindicator3-1 libayatana-indicator3-7 libdbusmenu-glib4 libdbusmenu-gtk3-4
apt-get install -y poppler-utils libfile-mimeinfo-perl ghostscript libsecret-1-0 zlib1g-dev libjpeg-dev imagemagick libmagic1 libreoffice inkscape xvfb
apt-get install -y libxml2-dev libxslt1-dev antiword unrtf tesseract-ocr flac lame libmad0 libsox-fmt-mp3 sox swig
apt-get install -y python-dev-is-python3 libxml2-dev libxslt1-dev antiword unrtf poppler-utils tesseract-ocr \
flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig
wget https://github.com/jgraph/drawio-desktop/releases/download/v13.0.3/draw.io-amd64-13.0.3.deb
dpkg -i draw.io-amd64-13.0.3.deb
rm draw.io-amd64-13.0.3.deb
apt-get purge -y --auto-remove -o APT:AutoRemove:RecommendsImportant=false && \
rm -rf /var/lib/apt/lists/*
preview --check-dependencies

View File

@ -1,4 +1,4 @@
FROM traefik:2.10.7
FROM traefik:2.11.0
RUN mkdir -p /etc/traefik/acme \
&& touch /etc/traefik/acme/acme.json \
&& chmod 600 /etc/traefik/acme/acme.json

View File

@ -80,6 +80,7 @@
"music.*": {"ops": ("fetch", "get", "list"), "timeout": 60 * 15},
"otp_totp.totpdevice": {"ops": "all", "timeout": 15 * 60},
"users.userapitoken": {"ops": "all", "timeout": 20 * 60},
"users.user": {"ops": "all", "timeout": 5 * 60},
}
CACHEOPS_REDIS = env.str("REDIS_URL")
@ -528,6 +529,11 @@
{"url": "http://127.0.0.1:8000", "description": "Local Development server"},
{"url": "https://new.akarpov.ru", "description": "Production server"},
],
"EXTENSIONS": {
"authentication": [
"akarpov.users.api.authentification.UserTokenAuthenticationExtension"
],
},
}
# CKEDITOR
@ -748,6 +754,13 @@
ELASTICSEARCH_DSL = {
"default": {"hosts": env("ELASTIC_SEARCH", default="http://127.0.0.1:9200/")},
}
USE_DEBUG_TOOLBAR = False
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
USE_X_FORWARDED_HOST = True
USE_X_FORWARDED_PORT = True
# PREVIEW
# ------------------------------------------------------------------------------
PREVIEW_SERVICE_URL = env("PREVIEW_SERVICE_URL", default=None)
PREVIEW_SERVICE_API_KEY = env("PREVIEW_SERVICE_API_KEY", default=None)

5903
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -69,7 +69,6 @@ channels = {extras = ["daphne"], version = "^4.0.0"}
django-upload-validator = "^1.1.6"
markdown = "^3.4.4"
pydotplus = "^2.0.2"
preview-generator = "^0.29"
uuid = "^1.30"
mutagen = "^1.46.0"
pydub = "^0.25.1"
@ -100,11 +99,8 @@ pytest-mock = "^3.11.1"
pytest-asyncio = "^0.21.1"
pytest-lambda = "^2.2.0"
pgvector = "^0.2.2"
pycld2 = "^0.41"
uuid6 = "^2023.5.2"
uvicorn = "0.23.2"
nltk = "^3.8.1"
pymorphy3 = "^1.2.1"
pymorphy3-dicts-ru = "^2.4.417150.4580142"
fastapi = "0.103.0"
pydantic-settings = "^2.0.3"
@ -118,9 +114,9 @@ spotdl = "^4.2.4"
fuzzywuzzy = "^0.18.0"
python-levenshtein = "^0.23.0"
pylast = "^5.2.0"
textract = {git = "https://github.com/Alexander-D-Karpov/textract.git", branch = "master"}
librosa = "^0.10.1"
django-ckeditor-5 = "^0.2.12"
chardet = "^5.2.0"
[build-system]

View File

View File

@ -1,6 +0,0 @@
from haystack import Document
from milvus_haystack import MilvusDocumentStore
ds = MilvusDocumentStore()
ds.write_documents([Document("Some Content")])
ds.get_all_documents()

2185
search/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +0,0 @@
[tool.poetry]
name = "search"
version = "0.1.0"
description = ""
authors = ["Alexander-D-Karpov <alexandr.d.karpov@gmail.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
fastapi = "0.99.1"
pydantic = "1.10.13"
transformers = {version = "4.34.1", extras = ["torch"]}
torch = ">=2.0.0, !=2.0.1, !=2.1.0"
farm-haystack = {extras = ["faiss"], version = "^1.21.2"}
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

View File

@ -1,4 +0,0 @@
#!/bin/bash
python -m spacy download en_core_web_lg
python -m spacy download xx_sent_ud_sm
python -m spacy download ru_core_news_lg