added museums, clickhouse for user preference, fixed position

This commit is contained in:
Alexander Karpov 2023-05-22 22:29:46 +03:00
parent 3bf8042ec2
commit 90b6cfab8e
20 changed files with 492 additions and 2179332 deletions

View File

@ -1,12 +1,11 @@
from django.urls import path, include
from rest_framework.routers import DefaultRouter
from passfinder.recomendations.api.views import TinderView, PersonalRecommendation
from passfinder.users.api.views import UserViewSet
from passfinder.users.api.views import UserViewSet, CreateUserPreferenceApiView
router = DefaultRouter()
router.register('tinder', TinderView)
router.register("tinder", TinderView)
router.register("recommendations", PersonalRecommendation)
router.register("user", UserViewSet)
@ -14,5 +13,6 @@
urlpatterns = [
path("", include("passfinder.events.api.urls")),
path("auth/", include("passfinder.users.api.urls")),
path("user/preference", CreateUserPreferenceApiView.as_view()),
]
urlpatterns += router.urls

View File

@ -1,10 +1,13 @@
"""
Base settings to build other settings files upon.
"""
import warnings
from datetime import timedelta
from pathlib import Path
import environ
import structlog
from urllib3.connectionpool import InsecureRequestWarning
ROOT_DIR = Path(__file__).resolve(strict=True).parent.parent.parent
# passfinder/
@ -76,6 +79,7 @@
"drf_spectacular",
"location_field",
"polymorphic",
"django_clickhouse",
]
LOCAL_APPS = ["passfinder.users", "passfinder.events", "passfinder.recomendations"]
@ -278,39 +282,28 @@
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
# CELERY
# --------------------------------------------------------------------------------------------------
CELERY_REDIS_HOST = env("CELERY_REDIS_HOST", default="127.0.0.1")
CELERY_REDIS_PORT = env.int("CELERY_REDIS_PORT", default=6379)
CELERY_REDIS_USER = env("CELERY_REDIS_USER", default=None)
CELERY_REDIS_PASSWORD = env("CELERY_REDIS_PASSWORD", default=None)
CELERY_REDIS_DB = env("CELERY_REDIS_DB", default=0)
# Celery
# ------------------------------------------------------------------------------
if USE_TZ:
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-timezone
CELERY_TIMEZONE = TIME_ZONE
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-broker_url
# CELERY_BROKER_URL = env("CELERY_BROKER_URL")
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_backend
# CELERY_RESULT_BACKEND = CELERY_BROKER_URL
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#result-extended
CELERY_RESULT_EXTENDED = True
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#result-backend-always-retry
# https://github.com/celery/celery/pull/6122
CELERY_RESULT_BACKEND_ALWAYS_RETRY = True
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#result-backend-max-retries
CELERY_RESULT_BACKEND_MAX_RETRIES = 10
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-accept_content
CELERY_ACCEPT_CONTENT = ["json"]
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-task_serializer
CELERY_REDIS_SSL = env.bool("CELERY_REDIS_SSL", default=False)
CELERY_BROKER_URL = env("CELERY_BROKER_URL")
CELERY_TASK_SERIALIZER = "json"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_serializer
CELERY_RESULT_SERIALIZER = "json"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-time-limit
CELERY_TASK_TIME_LIMIT = 5 * 60
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-soft-time-limit
CELERY_TASK_SOFT_TIME_LIMIT = 60
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-scheduler
CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#worker-send-task-events
CELERY_WORKER_SEND_TASK_EVENTS = True
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-task_send_sent_event
CELERY_TASK_SEND_SENT_EVENT = True
CELERY_ACCEPT_CONTENT = ["application/json"]
CELERY_ENABLE_UTC = True
CELERY_BEAT_SCHEDULE = {
"clickhouse_auto_sync": {
"task": "django_clickhouse.tasks.clickhouse_auto_sync",
"schedule": timedelta(seconds=5),
"options": {"expires": 1},
},
}
# DRF
# -------------------------------------------------------------------------------
# django-rest-framework - https://www.django-rest-framework.org/api-guide/settings/
@ -342,3 +335,32 @@
"map.provider": "openstreetmap",
"search.provider": "nominatim",
}
SIMPLE_JWT = {
"ACCESS_TOKEN_LIFETIME": timedelta(days=30),
"REFRESH_TOKEN_LIFETIME": timedelta(weeks=50),
}
# CLICKHOUSE
# ------------------------------------------------------------------------------
CLICKHOUSE_DATABASES = {
"default": {
"db_url": env("CLICKHOUSE_URL", default="http://localhost:8123"),
"db_name": env("CLICKHOUSE_DB", default="default"),
"username": env("CLICKHOUSE_USER", default="default"),
"password": env("CLICKHOUSE_PASSWORD", default="default"),
"verify_ssl_cert": False,
}
}
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
CLICKHOUSE_REDIS_CONFIG = {
"host": env("CLICKHOUSE_REDIS_HOST", default="127.0.0.1"),
"port": env("CLICKHOUSE_REDIS_PORT", default=6379),
"db": env("CLICKHOUSE_REDIS_DB", default=0),
"username": env("CLICKHOUSE_REDIS_USER", default=None),
"password": env("CLICKHOUSE_REDIS_PASSWORD", default=None),
"socket_timeout": 10,
}

2179283
data.json

File diff suppressed because one or more lines are too long

BIN
data/ext.zip Normal file

Binary file not shown.

1
data/only_cords.json Normal file

File diff suppressed because one or more lines are too long

67
parsers/extract_mus.py Normal file
View File

@ -0,0 +1,67 @@
import os
import json
import requests
from urllib.parse import urlparse
from bs4 import BeautifulSoup
result = []
url = "http://vrm.museum.ru/mus/list.asp?by=alpha"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
t = soup.find_all("tr")
for j in range(20, len(t)):
try:
el = t[j]
l = str(el.find_all(href=True)[0]).split('"')[1]
link = "http://vrm.museum.ru" + l
response = requests.get(link)
name = BeautifulSoup(
[x for x in response.text.splitlines() if f"http://www.museum.ru{l}" in x][
0
],
"html.parser",
).text
soup2 = BeautifulSoup(response.text, "html.parser")
data2 = []
for table in soup2.find_all("table"):
rows = table.find_all("tr")
data = []
for row in rows:
cols = row.find_all("td")
cols = [ele.text.strip() for ele in cols]
data.append([ele for ele in cols if ele])
data2 += data
data3 = {}
for row in data2:
if len(row) > 0:
rec = []
for el in row:
rec += el.split(":")
if len(rec) > 1:
c_name = " ".join(rec[0].split())
desc = " ".join(" ".join(rec[1:]).split())
data3[c_name] = desc
images = []
img_tags = soup2.find_all("img")
urls = [img["src"] for img in img_tags]
add = {
"name": name,
"urls": [x for x in urls if "asp" in x],
"link": link,
} | data3
result.append(add)
print(name)
except Exception as e:
print(e)
print(j, "/", len(t))
with open("ext.json", "w", encoding="utf-16") as f:
json.dump({"links": result}, f, ensure_ascii=False, indent=4)
with open("ext.json", "w", encoding="utf-16") as f:
json.dump({"links": result}, f, ensure_ascii=False, indent=4)

58
parsers/mus_load.py Normal file
View File

@ -0,0 +1,58 @@
import json
from pprint import pprint
with open("data/ext.json", "r", encoding="utf-16") as f:
data = json.load(f)
with open("data/only_cords.json", "r") as f:
data2 = json.load(f)
ret = []
for j in range(len(data2)):
info = data["links"][j]
pos = data2[j]
if "cords" in pos:
p_name = [x for x in info.keys() if "плата" in x.lower() or "цена" in x.lower()]
res = {
"sort": j,
"type": "museum",
"parser_source": "museum.ru",
"title": info["name"],
"lat": pos["cords"][0],
"lon": pos["cords"][1],
}
if p_name and info[p_name[0]] != "См. здесь":
for n in p_name:
m = []
if "руб" in info[n]:
ppp = info[n].split()
for ind, eee in enumerate(ppp):
if "руб" in eee:
try:
val = int(ppp[ind - 1])
m.append(val)
except Exception:
try:
val = int(ppp[ind + 1])
m.append(val)
except Exception:
...
if m:
res["ticket_price"] = max(m)
break
if "Режим работы" in info and info["Режим работы"] != "См. здесь":
res["schedule"] = {"plain": info["Режим работы"]}
if "Описание" in info:
res["description"] = info["Описание"]
ret.append(res)
def get_mus():
return ret

View File

@ -38,10 +38,12 @@ class Meta:
class PointSerializer(serializers.ModelSerializer):
# location = serializers.ListSerializer(
# child=serializers.FloatField(), source="bare_location", max_length=2
# )
class Meta:
model = BasePoint
fields = ["title", "description", "location", "icon"]
class RouteSerializer(serializers.Serializer):
name = serializers.CharField()
description = serializers.CharField()
points = serializers.ListSerializer(child=PointSerializer())

View File

@ -3,18 +3,25 @@
from rest_framework.generics import GenericAPIView
from rest_framework.response import Response
from passfinder.events.api.serializers import PointSerializer
from passfinder.events.api.serializers import PointSerializer, RouteSerializer
from passfinder.events.models import BasePoint
class BuildRouteApiView(GenericAPIView):
filter_backends = (DjangoFilterBackend,)
filterset_class = DateFilter
serializer_class = PointSerializer
serializer_class = RouteSerializer
def get(self, request):
return Response(
data=PointSerializer(many=True).to_representation(
BasePoint.objects.order_by("?")[:10]
routes = []
for _ in range(10):
routes.append(
{
"name": "bebra",
"description": "bebra bebra bebra",
"points": PointSerializer(many=True).to_representation(
BasePoint.objects.order_by("?")[:10]
),
}
)
)
return Response(data=routes)

View File

@ -1,5 +1,8 @@
from django.contrib.postgres.fields import ArrayField
from django.db import models
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import extend_schema_field
from rest_framework.serializers import ListSerializer, FloatField
from polymorphic.models import PolymorphicModel
from passfinder.utils.choices import count_max_length
@ -42,7 +45,7 @@ class City(OIDModel):
@property
def location(self):
return [self.lon, self.lat]
return [self.lat, self.lon]
def __str__(self):
return self.title
@ -67,7 +70,7 @@ class Place(OIDModel):
@property
def location(self):
return [self.lon, self.lat]
return [self.lat, self.lon]
class Tag(OIDModel):
@ -91,16 +94,20 @@ class BasePoint(OIDModel, PolymorphicModel):
"Place", related_name="points", null=True, on_delete=models.SET_NULL
)
sort = models.IntegerField(default=0)
lon = models.FloatField(default=0, db_index=True)
lat = models.FloatField(default=0, db_index=True)
lon = models.FloatField(default=0, db_index=True)
can_buy = models.BooleanField(default=True)
priority = models.BooleanField(default=False)
@property
@extend_schema_field(
field=ListSerializer(child=FloatField(), min_length=2, max_length=2)
)
def location(self):
return [self.lon, self.lat]
return [self.lat, self.lon]
@property
@extend_schema_field(field=OpenApiTypes.URI)
def icon(self):
# TODO: change to icon/first image
return "https://akarpov.ru/media/uploads/files/qMO4dDfIXP.webp"

View File

@ -1,5 +1,10 @@
from django.contrib.auth import get_user_model
from rest_framework import serializers
from rest_framework.generics import get_object_or_404
from passfinder.events.models import BasePoint
from passfinder.users.clickhouse_models import UserPreferenceClickHouse
from passfinder.users.models import UserPreference
User = get_user_model()
@ -29,3 +34,19 @@ def create(self, validated_data):
user.save()
return user
class UserPreferenceSerializer(serializers.ModelSerializer):
point = serializers.CharField(max_length=24, min_length=24)
class Meta:
model = UserPreference
fields = ["point", "type"]
def validate_point(self, val):
return get_object_or_404(BasePoint, oid=val)
def create(self, validated_data):
return UserPreference.objects.create(
user=self.context["request"].user, **validated_data
)

View File

@ -6,7 +6,11 @@
from rest_framework.response import Response
from rest_framework.viewsets import GenericViewSet
from .serializers import UserSerializer, UserRegisterSerializer
from .serializers import (
UserSerializer,
UserRegisterSerializer,
UserPreferenceSerializer,
)
User = get_user_model()
@ -37,3 +41,7 @@ class RegisterApiView(generics.CreateAPIView):
)
def post(self, request, *args, **kwargs):
return self.create(request, *args, **kwargs)
class CreateUserPreferenceApiView(generics.CreateAPIView):
serializer_class = UserPreferenceSerializer

View File

@ -0,0 +1,7 @@
from django_clickhouse import migrations
from passfinder.users.clickhouse_models import UserPreferenceClickHouse
class Migration(migrations.Migration):
operations = [migrations.CreateTable(UserPreferenceClickHouse)]

View File

@ -0,0 +1,23 @@
from enum import Enum
from django_clickhouse.clickhouse_models import ClickHouseModel
from django_clickhouse.engines import MergeTree
from infi.clickhouse_orm import fields
from passfinder.users.models import UserPreference, UserPreferenceType
UserPreferenceEnumType = Enum(
"UserPreferenceEnumType", [c[0] for c in UserPreferenceType.choices]
)
class UserPreferenceClickHouse(ClickHouseModel):
django_model = UserPreference
sync_enabled = True
user_id = fields.Int32Field()
point_id = fields.StringField()
type = fields.Enum16Field(UserPreferenceEnumType)
created_at = fields.DateTimeField()
engine = MergeTree("created_at", ("type", "point_id", "user_id", "created_at"))

View File

@ -0,0 +1,68 @@
# Generated by Django 4.2.1 on 2023-05-22 14:36
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("events", "0016_remove_basepoint_location_remove_city_location_and_more"),
("users", "0001_initial"),
]
operations = [
migrations.CreateModel(
name="UserPreference",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"type",
models.CharField(
choices=[
("like", "Like"),
("dislike", "Dislike"),
("favorite", "Favorite"),
("unfavorite", "Unfavorite"),
("view", "View"),
("ignore", "Ignore"),
],
max_length=10,
),
),
("created_at", models.DateTimeField(auto_now_add=True, db_index=True)),
(
"point",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="user_preferences",
to="events.basepoint",
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="preferences",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"indexes": [
models.Index(
fields=["user", "point"], name="users_userp_user_id_7550b0_idx"
)
],
},
),
]

View File

@ -0,0 +1,21 @@
# Generated by Django 4.2.1 on 2023-05-22 14:51
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
("users", "0002_userpreference"),
]
operations = [
migrations.AlterField(
model_name="userpreference",
name="created_at",
field=models.DateTimeField(
db_index=True, default=django.utils.timezone.now
),
),
]

View File

@ -1,4 +1,16 @@
from django.contrib.auth.models import AbstractUser
from django.db.models import (
TextChoices,
ForeignKey,
CASCADE,
DateTimeField,
CharField,
Index,
)
from django.utils.timezone import now
from django_clickhouse.models import ClickHouseSyncModel
from passfinder.utils.choices import count_max_length
class User(AbstractUser):
@ -11,3 +23,27 @@ class User(AbstractUser):
#: First and last name do not cover name patterns around the globe
first_name = None # type: ignore
last_name = None # type: ignore
class UserPreferenceType(TextChoices):
like = "like"
dislike = "dislike"
favorite = "favorite"
unfavorite = "unfavorite"
view = "view"
ignore = "ignore"
class UserPreference(ClickHouseSyncModel):
user = ForeignKey("User", related_name="preferences", on_delete=CASCADE)
point = ForeignKey(
"events.BasePoint", related_name="user_preferences", on_delete=CASCADE
)
type = CharField(
choices=UserPreferenceType.choices,
max_length=count_max_length(UserPreferenceType),
)
created_at = DateTimeField(default=now, db_index=True)
class Meta:
indexes = [Index(fields=["user", "point"])]

93
poetry.lock generated
View File

@ -243,6 +243,25 @@ files = [
[package.extras]
tzdata = ["tzdata"]
[[package]]
name = "beautifulsoup4"
version = "4.12.2"
description = "Screen-scraping library"
category = "main"
optional = false
python-versions = ">=3.6.0"
files = [
{file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"},
{file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"},
]
[package.dependencies]
soupsieve = ">1.2"
[package.extras]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "billiard"
version = "3.6.4.0"
@ -770,6 +789,24 @@ django-timezone-field = ">=5.0"
python-crontab = ">=2.3.4"
tzdata = "*"
[[package]]
name = "django-clickhouse"
version = "1.2.1"
description = "Django extension to integrate with ClickHouse database"
category = "main"
optional = false
python-versions = "*"
files = [
{file = "django-clickhouse-1.2.1.tar.gz", hash = "sha256:0222a483e92538be1a958ec3053280caa6e62fae1a32080c0f13e1097360997d"},
{file = "django_clickhouse-1.2.1-py2.py3-none-any.whl", hash = "sha256:3925cca55bffe0f8574069ea6503e45221d5998d9740fc12b7ae7d58d9ea2d7a"},
]
[package.dependencies]
celery = "*"
Django = ">=1.7"
"infi.clickhouse-orm" = "*"
statsd = "*"
[[package]]
name = "django-cors-headers"
version = "3.14.0"
@ -1298,6 +1335,24 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
[[package]]
name = "infi-clickhouse-orm"
version = "2.1.3"
description = "A Python library for working with the ClickHouse database"
category = "main"
optional = false
python-versions = "*"
files = [
{file = "infi.clickhouse_orm-2.1.3-py3-none-any.whl", hash = "sha256:8edb3406afa84ba1bd744229fe585f07f7335aaad194d987b9f644909aa94275"},
{file = "infi.clickhouse_orm-2.1.3.tar.gz", hash = "sha256:54484d5167fa87e9112071a30b9b20cab5b5d164465c11a6a67d1223160a356f"},
]
[package.dependencies]
iso8601 = ">=0.1.12"
pytz = "*"
requests = "*"
setuptools = "*"
[[package]]
name = "inflection"
version = "0.5.1"
@ -1379,6 +1434,18 @@ qtconsole = ["qtconsole"]
test = ["pytest (<7.1)", "pytest-asyncio", "testpath"]
test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pandas", "pytest (<7.1)", "pytest-asyncio", "testpath", "trio"]
[[package]]
name = "iso8601"
version = "1.1.0"
description = "Simple module to parse ISO 8601 dates"
category = "main"
optional = false
python-versions = ">=3.6.2,<4.0"
files = [
{file = "iso8601-1.1.0-py3-none-any.whl", hash = "sha256:8400e90141bf792bce2634df533dc57e3bee19ea120a87bebcd3da89a58ad73f"},
{file = "iso8601-1.1.0.tar.gz", hash = "sha256:32811e7b81deee2063ea6d2e94f8819a86d1f3811e49d23623a41fa832bef03f"},
]
[[package]]
name = "isort"
version = "5.12.0"
@ -2487,6 +2554,18 @@ files = [
{file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
]
[[package]]
name = "soupsieve"
version = "2.4.1"
description = "A modern CSS selector implementation for Beautiful Soup."
category = "main"
optional = false
python-versions = ">=3.7"
files = [
{file = "soupsieve-2.4.1-py3-none-any.whl", hash = "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8"},
{file = "soupsieve-2.4.1.tar.gz", hash = "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"},
]
[[package]]
name = "sqlparse"
version = "0.4.4"
@ -2524,6 +2603,18 @@ pure-eval = "*"
[package.extras]
tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
[[package]]
name = "statsd"
version = "4.0.1"
description = "A simple statsd client."
category = "main"
optional = false
python-versions = "*"
files = [
{file = "statsd-4.0.1-py2.py3-none-any.whl", hash = "sha256:c2676519927f7afade3723aca9ca8ea986ef5b059556a980a867721ca69df093"},
{file = "statsd-4.0.1.tar.gz", hash = "sha256:99763da81bfea8daf6b3d22d11aaccb01a8d0f52ea521daab37e758a4ca7d128"},
]
[[package]]
name = "structlog"
version = "23.1.0"
@ -2988,4 +3079,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
content-hash = "9abf5a717109289c45bdc20e08a2b6ad6d3e0ef05dc56d194fa3a77450648a57"
content-hash = "c715e551faec63d192743332fc0fd568147d4f164ff58a2ad843a7c10a5f4799"

View File

@ -51,6 +51,8 @@ django-polymorphic = "^3.1.0"
annoy = "^1.17.2"
django-filter = "^23.2"
djangorestframework-simplejwt = "^5.2.2"
beautifulsoup4 = "^4.12.2"
django-clickhouse = "^1.2.1"
[build-system]