drive2 feed
This commit is contained in:
122
services/drive2/dev.ipynb
Normal file
122
services/drive2/dev.ipynb
Normal file
@@ -0,0 +1,122 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import requests\n",
|
||||
"import urllib.parse\n",
|
||||
"import dateutil.parser\n",
|
||||
"from datetime import datetime\n",
|
||||
"from feedgen.feed import FeedGenerator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rus_month_to_eng = {\n",
|
||||
" 'января': 'jan',\n",
|
||||
" 'февраля': 'feb',\n",
|
||||
" 'марта': 'mar',\n",
|
||||
" 'апреля': 'apr',\n",
|
||||
" 'мая': 'may',\n",
|
||||
" 'июня': 'jun',\n",
|
||||
" 'июля': 'jul',\n",
|
||||
" 'августа': 'aug',\n",
|
||||
" 'сентябся': 'sep',\n",
|
||||
" 'октября': 'oct',\n",
|
||||
" 'ноября': 'nov',\n",
|
||||
" 'декабря': 'dec'\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"def parse_date(date_str):\n",
|
||||
" day, month_rus, year = date_str.lower().split(' ')\n",
|
||||
" res = datetime.strptime('-'.join([day, rus_month_to_eng[month_rus], year, '+03:00']), '%d-%b-%Y-%z')\n",
|
||||
" return res"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36\"}\n",
|
||||
"\n",
|
||||
"url = \"https://www.drive2.ru/experience/jetta/g644510077776067186?to=133933297133670330#results\"\n",
|
||||
"r = requests.get(url, headers=headers)\n",
|
||||
"\n",
|
||||
"soup = BeautifulSoup(r.content)\n",
|
||||
"\n",
|
||||
"fg = FeedGenerator()\n",
|
||||
"fg.id(url)\n",
|
||||
"fg.title(soup.find('title').get_text(strip=True))\n",
|
||||
"fg.link( href='http://www.drive2.ru', rel='alternate' )\n",
|
||||
"fg.language('ru')\n",
|
||||
"\n",
|
||||
"for article in soup.find_all('div', {'class': 'c-block-card'}):\n",
|
||||
" fe = fg.add_entry()\n",
|
||||
"\n",
|
||||
" author = article.find('div', {'class': 'c-car-card__owner'})\n",
|
||||
" car = article.find('div', {'class': 'c-car-card__caption'})\n",
|
||||
" final_author = f\"{author.get_text(strip=True)} ({car.get_text(strip=True)})\"\n",
|
||||
"\n",
|
||||
" fe.author({'name': final_author})\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" post_title = article.find('a', {'data-ym-target': 'post_title'})\n",
|
||||
" fe.title(post_title.get_text(strip=True))\n",
|
||||
" post_url = post_title['href']\n",
|
||||
" id = [p for p in post_url.split('/') if p.strip()][-1]\n",
|
||||
" fe.id(id)\n",
|
||||
" full_url = urllib.parse.urljoin(url, post_url)\n",
|
||||
" fe.link({'href': full_url})\n",
|
||||
"\n",
|
||||
" article_r = requests.get(full_url, headers=headers)\n",
|
||||
" article_soup = BeautifulSoup(article_r.content)\n",
|
||||
" header = article_soup.find('header', {'class': 'x-title-header'})\n",
|
||||
" date_div = header.find('div', {'class': 'x-secondary-color'})\n",
|
||||
" date_txt = date_div.get_text(strip=True).lower()\n",
|
||||
" date = parse_date(date_txt)\n",
|
||||
" fe.pubDate(date)\n",
|
||||
"\n",
|
||||
" desc = article.find('div', {'class': 'c-post-preview__lead'})\n",
|
||||
" fe.description(desc.get_text(strip=True))\n",
|
||||
"\n",
|
||||
" preview_uri_div = article.find('div', {'class': 'c-preview-pic'})\n",
|
||||
" if (preview_uri_div):\n",
|
||||
" preview_uri = preview_uri_div.find('img')['src']\n",
|
||||
" fe.enclosure(preview_uri)\n",
|
||||
"\n",
|
||||
"fg.atom_file('result.xml')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user