ml/search.py
2023-08-27 02:58:30 +03:00

164 lines
4.1 KiB
Python

from annoy import AnnoyIndex
from sentence_transformers import SentenceTransformer
import pickle
import os
model = None
data = None
index = None
def get_model():
global model
if not model:
model = SentenceTransformer("sentence-transformers/LaBSE")
return model
def get_data():
global data
if not data:
with open("./data.pic", "rb") as file:
data = pickle.load(file)
for i in range(len(data)):
if 'image' not in data[i].keys():
data[i].update({'image': None})
return data
def get_index():
global index
if not index:
index = AnnoyIndex(768, "angular")
index.load("index.ann")
return index
def search(search_string):
embs = get_model().encode([search_string])[0]
indexes = get_index().get_nns_by_vector(embs, 5)
res = []
for i in indexes:
try:
res.append(get_data()[i])
except: pass
return list(map(lambda x: {'logo': x['image'], 'name': x['name'], 'description': x['description']}, res))
def search_investemnts(search_string):
embs = get_model().encode([search_string])[0]
indexes = get_index().get_nns_by_vector(embs, 5)
res = []
for i in indexes:
try:
res.append(get_data()[i])
except:
pass
if not len(res):
res = [{'total_investments': 2000000}]
return list(map(lambda x: x['total_investments'], res))
comp_data = {
"B2C": {
"CAC": 10,
"LTV": 12,
},
"B2B": {
"CAC": 1000,
"LTV": 1300
},
"TAM": {
"AI": 40680000000000,
"Business Software": 50600000000000,
"IndustrialTech": 5000000000000,
"E-commerce": 1440000000000000,
"Advertising & Marketing": 47700000000000,
"Hardware": 65700000000000,
"RetailTech": 1080000000000,
"ConstructionTech": 484200000000,
"Web3": 5400000000000000,
"EdTech": 10350000000000,
"Business Intelligence": 2430000000000,
"Cybersecurity": 15300000000000,
"HrTech": 2160000000000,
"Telecom & Communication": 162000000000000,
"Media & Entertainment": 225000000000000,
"FinTech": 810000000000000,
"MedTech": 46080000000000,
"Transport & Logistics": 90000000000000,
"Gaming": 31230000000000,
"FoodTech": 22230000000000,
"WorkTech": 90000000000000,
"Consumer Goods & Services": 8370000000000,
"Aero & SpaceTech": 34200000000000,
"Legal & RegTech": 720000000000,
"Travel": 180000000000000,
"PropTech": 2700000000000,
"Energy": 162000000000,
"GreenTech": 5580000000000,
},
"SAM": 0.3,
"SOM": 0.13,
}
def calculate_metrics(category, description, type):
cac = comp_data[type]['CAC']
ltv = comp_data[type]['LTV']
try:
tam = comp_data['TAM'][category] // 1000
except:
tam = comp_data['TAM']['AI'] // 1000
sam = comp_data['SAM'] * tam
som = comp_data["SOM"] * comp_data['SAM'] * tam
percent_now = 0.02
percent_then = 0.18
investments = search_investemnts(description)
invest = sum(investments) / len(investments) / 300
company_value = invest * 3
invest_then = company_value * 1.3
return [
{
'type': 'users_metrics',
'value': {
'cac': cac,
'ltv': ltv
},
},
{
'type': 'market_values',
'value': {
'tam': tam,
'sam': sam,
'som': som
}
},
{
'type': 'percentage',
'value': {
'now': percent_now,
'then': percent_then
}
},
{
'type': 'how_much_investments',
'value': invest
},
{
'type': 'company_value',
'value': company_value
},
{
'type': 'future_value',
'value': invest_then
}
]
def pdf_to_pptx(filename: str):
os.system(f'pdf2pptx {filename}')