from bs4 import BeautifulSoup import requests import urllib.parse import dateutil.parser from datetime import datetime from feedgen.feed import FeedGenerator rus_month_to_eng = { 'января': 'jan', 'февраля': 'feb', 'марта': 'mar', 'апреля': 'apr', 'мая': 'may', 'июня': 'jun', 'июля': 'jul', 'августа': 'aug', 'сентября': 'sep', 'октября': 'oct', 'ноября': 'nov', 'декабря': 'dec' } def parse_date(date_str): # print(date_str) day, month_rus, year = date_str.lower().split(' ') res = datetime.strptime('-'.join([day, rus_month_to_eng[month_rus], year, '+03:00']), '%d-%b-%Y-%z') return res headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"} def generate(url): r = requests.get(url, headers=headers) soup = BeautifulSoup(r.content, features='lxml') fg = FeedGenerator() fg.id(url) fg.title(soup.find('title').get_text(strip=True)) fg.link( href='http://www.drive2.ru', rel='alternate' ) fg.language('ru') for article in soup.find_all('div', {'class': 'c-block-card'}): fe = fg.add_entry() author = article.find('div', {'class': 'c-car-card__owner'}) car = article.find('div', {'class': 'c-car-card__caption'}) final_author = f"{author.get_text(strip=True)} ({car.get_text(strip=True)})" fe.author({'name': final_author}) post_title = article.find('a', {'data-ym-target': 'post_title'}) fe.title(post_title.get_text(strip=True)) post_url = post_title['href'] id = [p for p in post_url.split('/') if p.strip()][-1] fe.id(id) full_url = urllib.parse.urljoin(url, post_url) fe.link({'href': full_url}) article_r = requests.get(full_url, headers=headers) article_soup = BeautifulSoup(article_r.content, features='lxml') header = article_soup.find('header', {'class': 'x-title-header'}) if header: date_div = header.find('div', {'class': 'x-secondary-color'}) if date_div: date_txt = date_div.get_text(strip=True).lower() date = parse_date(date_txt) fe.pubDate(date) desc = article.find('div', {'class': 'c-post-preview__lead'}) description = desc.get_text(strip=True).rstrip('Читать дальше') preview_uri_div = article.find('div', {'class': 'c-preview-pic'}) if (preview_uri_div): preview_uri = preview_uri_div.find('img')['src'] # fe.enclosure(preview_uri) description = f" {description}" fe.summary(description, type='html') return fg.atom_str()