mirror of
https://github.com/Alexander-D-Karpov/scripts.git
synced 2025-06-04 13:13:15 +03:00
220 lines
8.5 KiB
Python
220 lines
8.5 KiB
Python
import re
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup, NavigableString
|
|
import time
|
|
import random
|
|
|
|
|
|
def get_telegram_channel_info(channel_username):
|
|
channel_username = channel_username.strip('@')
|
|
url = f'https://t.me/s/{channel_username}'
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
'Accept-Language': 'en-US,en;q=0.9',
|
|
'Referer': 'https://t.me/',
|
|
'Sec-Ch-Ua': '"Google Chrome";v="122", "Chromium";v="122", "Not(A:Brand";v="24"',
|
|
'Sec-Ch-Ua-Mobile': '?0',
|
|
'Sec-Ch-Ua-Platform': '"Windows"',
|
|
'Sec-Fetch-Dest': 'document',
|
|
'Sec-Fetch-Mode': 'navigate',
|
|
'Sec-Fetch-Site': 'same-site',
|
|
'Sec-Fetch-User': '?1',
|
|
'Upgrade-Insecure-Requests': '1'
|
|
}
|
|
|
|
time.sleep(random.uniform(1, 2))
|
|
|
|
try:
|
|
response = requests.get(url, headers=headers)
|
|
response.raise_for_status()
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching channel: {e}")
|
|
return None
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
channel_info = {
|
|
'username': channel_username,
|
|
'url': f'https://t.me/{channel_username}',
|
|
'subscriber_count': 'Unknown',
|
|
'channel_name': 'Unknown',
|
|
'description': 'Unknown',
|
|
'photo_url': None,
|
|
'recent_posts': []
|
|
}
|
|
|
|
# Extract channel name
|
|
channel_name_elem = soup.find('div', class_='tgme_channel_info_header_title')
|
|
if channel_name_elem and channel_name_elem.find('span'):
|
|
channel_info['channel_name'] = channel_name_elem.find('span').text.strip()
|
|
|
|
# Extract subscriber count
|
|
subscriber_counter = soup.find('div', class_='tgme_channel_info_counter',
|
|
string=lambda text: 'subscribers' in text if text else False)
|
|
if subscriber_counter and subscriber_counter.find('span', class_='counter_value'):
|
|
channel_info['subscriber_count'] = subscriber_counter.find('span', class_='counter_value').text.strip()
|
|
|
|
# Alternative location for subscriber count
|
|
if channel_info['subscriber_count'] == 'Unknown':
|
|
header_counter = soup.find('div', class_='tgme_header_counter')
|
|
if header_counter:
|
|
channel_info['subscriber_count'] = header_counter.text.strip().replace('subscribers', '').strip()
|
|
|
|
# Extract description
|
|
description_elem = soup.find('div', class_='tgme_channel_info_description')
|
|
if description_elem:
|
|
channel_info['description'] = description_elem.text.strip()
|
|
|
|
# Extract channel photo
|
|
photo_elem = soup.find('i', class_='tgme_page_photo_image')
|
|
if photo_elem and photo_elem.find('img'):
|
|
channel_info['photo_url'] = photo_elem.find('img').get('src')
|
|
|
|
# Extract recent posts
|
|
message_containers = soup.find_all('div', class_='tgme_widget_message')
|
|
|
|
for container in message_containers[::-1]:
|
|
post_data = {}
|
|
|
|
post_data['id'] = container.get('data-post')
|
|
|
|
text_elem = container.find('div', class_='tgme_widget_message_text')
|
|
if text_elem:
|
|
post_data['html_text'] = str(text_elem)
|
|
|
|
formatted_text = ""
|
|
|
|
def process_node(node):
|
|
nonlocal formatted_text
|
|
|
|
if node.name == 'br':
|
|
formatted_text += '\n'
|
|
elif node.name == 'b':
|
|
formatted_text += f"<b>{node.get_text()}</b>"
|
|
elif node.name == 'i':
|
|
formatted_text += f"<i>{node.get_text()}</i>"
|
|
elif node.name == 'a':
|
|
href = node.get('href', '')
|
|
formatted_text += f"<a href='{href}'>{node.get_text()}</a>"
|
|
elif node.name == 'pre' or node.name == 'code':
|
|
formatted_text += f"<code>{node.get_text()}</code>"
|
|
elif node.name == 'tg-emoji':
|
|
emoji_code = node.find('i', class_='emoji').get_text() if node.find('i', class_='emoji') else ''
|
|
formatted_text += emoji_code
|
|
elif isinstance(node, NavigableString):
|
|
formatted_text += str(node)
|
|
else:
|
|
for child in node.children:
|
|
process_node(child)
|
|
|
|
for child in text_elem.children:
|
|
process_node(child)
|
|
|
|
post_data['formatted_text'] = formatted_text
|
|
post_data['plain_text'] = text_elem.get_text(separator=' ', strip=True)
|
|
else:
|
|
post_data['html_text'] = ""
|
|
post_data['formatted_text'] = ""
|
|
post_data['plain_text'] = ""
|
|
|
|
# Get message date
|
|
date_elem = container.find('a', class_='tgme_widget_message_date')
|
|
if date_elem and date_elem.find('time'):
|
|
post_data['date'] = date_elem.find('time')['datetime']
|
|
|
|
# Get view count
|
|
views_elem = container.find('span', class_='tgme_widget_message_views')
|
|
if views_elem:
|
|
post_data['views'] = views_elem.text.strip()
|
|
|
|
# Check for media attachments
|
|
photo_elem = container.find('a', class_='tgme_widget_message_photo_wrap')
|
|
video_elem = container.find('a', class_='tgme_widget_message_video_player')
|
|
|
|
if photo_elem:
|
|
post_data['has_photo'] = True
|
|
if 'style' in photo_elem.attrs:
|
|
style = photo_elem['style']
|
|
if 'background-image' in style:
|
|
url_start = style.find('url(') + 4
|
|
url_end = style.find(')', url_start)
|
|
photo_url = style[url_start:url_end].strip("'")
|
|
post_data['photo_url'] = photo_url.replace('\'', '').replace('"', '')
|
|
else:
|
|
post_data['has_photo'] = False
|
|
|
|
if video_elem:
|
|
post_data['has_video'] = True
|
|
video_thumb = video_elem.find('i', class_='tgme_widget_message_video_thumb')
|
|
if video_thumb and 'style' in video_thumb.attrs:
|
|
style = video_thumb['style']
|
|
if 'background-image' in style:
|
|
url_start = style.find('url(') + 4
|
|
url_end = style.find(')', url_start)
|
|
thumbnail_url = style[url_start:url_end].strip("'")
|
|
post_data['video_thumbnail'] = thumbnail_url.replace('\'', '').replace('"', '')
|
|
|
|
# Get video duration
|
|
duration_elem = video_elem.find('time', class_='message_video_duration')
|
|
if duration_elem:
|
|
post_data['video_duration'] = duration_elem.text.strip()
|
|
else:
|
|
post_data['has_video'] = False
|
|
|
|
# Add post to list
|
|
channel_info['recent_posts'].append(post_data)
|
|
|
|
return channel_info
|
|
|
|
|
|
def print_channel_info(channel_info):
|
|
if not channel_info:
|
|
print("Could not retrieve channel information.")
|
|
return
|
|
|
|
print(f"TELEGRAM CHANNEL: @{channel_info['username']}")
|
|
print(f"Name: {channel_info['channel_name']}")
|
|
print(f"Subscribers: {channel_info['subscriber_count']}")
|
|
print(f"URL: {channel_info['url']}")
|
|
|
|
print("\nDescription:")
|
|
print(channel_info['description'])
|
|
|
|
print(f"\nRecent Posts ({len(channel_info['recent_posts'])} found):")
|
|
for i, post in enumerate(channel_info['recent_posts'], 1):
|
|
print(f"\nPost {i}:")
|
|
print(f" Date: {post.get('date', 'Unknown')}")
|
|
print(f" Views: {post.get('views', 'Unknown')}")
|
|
|
|
media_types = []
|
|
if post.get('has_photo'):
|
|
media_types.append("Photo")
|
|
if post.get('has_video'):
|
|
media_types.append("Video")
|
|
|
|
media_str = ", ".join(media_types) if media_types else "None"
|
|
print(f" Media: {media_str}")
|
|
|
|
formatted_text = post.get('formatted_text', '')
|
|
if formatted_text:
|
|
formatted_text = re.sub(r'<br\s*/?>', '\n', formatted_text)
|
|
formatted_text = re.sub(r'\n+', '\n', formatted_text)
|
|
|
|
if len(formatted_text) > 150:
|
|
preview = formatted_text[:150] + "..."
|
|
else:
|
|
preview = formatted_text
|
|
|
|
preview = re.sub(r'\s+', ' ', preview)
|
|
print(f" Content: {preview}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
channel_name = input("Enter Telegram channel username (with or without @): ")
|
|
|
|
print(f"\nFetching information for {channel_name}...")
|
|
channel_info = get_telegram_channel_info(channel_name)
|
|
|
|
print_channel_info(channel_info)
|