222 lines
9.1 KiB
Python
222 lines
9.1 KiB
Python
import feedparser
|
|
import requests
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from dateutil import parser
|
|
from typing import List, Dict
|
|
from bs4 import BeautifulSoup
|
|
import logging
|
|
import os
|
|
import re
|
|
import schedule
|
|
|
|
# Webhook URLs
|
|
DEFAULT_WEBHOOK_URL "WEBHOOK" = # Webhook for feeds.txt
|
|
ADVISORES_WEBHOOK = "WEBHOOK" # Webhook for threatintel.txt
|
|
ALERTS_WEBHOOK = "WEBHOOK"
|
|
BUTBOUNTY_WEBHOOK = WEBHOOK"
|
|
# File paths
|
|
PROCESSED_LINKS_FILE = 'processed_links.txt' # File to store processed links
|
|
ADVISORES = 'feeds/advisories.txt' # File for threat intel feeds (common webhook)
|
|
FEEDS_FILE = 'feeds/feeds.txt' # File for regular feeds (different webhook)
|
|
ALERTS = "feeds/alerts.txt"
|
|
BUGBOUNTY = "feeds/bugbounty.txt"
|
|
|
|
# Set up logging
|
|
logging.basicConfig(filename='rss_feed_watcher.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
# Helper function to send an embed message to Discord
|
|
def send_discord_embed(title: str, link: str, published: datetime, description: str, webhook_url: str):
|
|
embed = {
|
|
"embeds": [{
|
|
"title": title,
|
|
"url": link,
|
|
"description": description,
|
|
"color": 5814783, # Hex color code for the embed (optional)
|
|
"timestamp": published.isoformat() # ISO format timestamp
|
|
}]
|
|
}
|
|
try:
|
|
response = requests.post(webhook_url, json=embed)
|
|
if response.status_code != 204:
|
|
logging.error(f"Failed to send message to Discord: {response.status_code} {response.text}")
|
|
except requests.exceptions.RequestException as e:
|
|
logging.error(f"Exception while sending message to Discord: {e}")
|
|
|
|
# Helper function to clean HTML and extract plain text
|
|
def clean_html(html_content: str) -> str:
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
text = soup.get_text(separator='\n').strip()
|
|
text = re.sub(r'\n+', ' ', text)
|
|
text = re.sub(r'(read more|continue reading|more details|[ \t]*\n[ \t]*\n[ \t]*)', '', text, flags=re.IGNORECASE)
|
|
return text.strip()
|
|
|
|
# Helper function to get posts from RSS feed
|
|
def get_recent_posts(feed_url: str, webhook_url: str, since_time: datetime, processed_links: set) -> List[dict]:
|
|
try:
|
|
response = requests.get(feed_url, timeout=10)
|
|
response.raise_for_status() # Raise an error for bad HTTP responses
|
|
feed = feedparser.parse(response.content)
|
|
recent_posts = []
|
|
|
|
for entry in feed.entries:
|
|
# Try to get and parse the published date from the entry
|
|
published = getattr(entry, 'published', None)
|
|
if published:
|
|
try:
|
|
published_time = parser.parse(published).replace(tzinfo=None)
|
|
except (ValueError, TypeError):
|
|
logging.warning(f"Skipping entry with invalid published date: {published}")
|
|
continue
|
|
else:
|
|
logging.warning("Skipping entry without published date")
|
|
continue
|
|
|
|
if published_time > since_time:
|
|
link = entry.link
|
|
if link in processed_links:
|
|
continue # Skip already processed posts
|
|
|
|
description = getattr(entry, 'description', '')
|
|
plain_text_description = clean_html(description)
|
|
|
|
recent_posts.append({
|
|
'title': entry.title,
|
|
'link': link,
|
|
'published': published_time,
|
|
'description': plain_text_description
|
|
})
|
|
|
|
# Add link to the set of processed links
|
|
processed_links.add(link)
|
|
|
|
# Send an embed message to Discord for each new post
|
|
send_discord_embed(entry.title, link, published_time, plain_text_description, webhook_url)
|
|
|
|
return recent_posts
|
|
except requests.exceptions.Timeout as e:
|
|
logging.error(f"Request timeout for {feed_url}: {e}")
|
|
except requests.exceptions.ConnectionError as e:
|
|
logging.error(f"Connection error for {feed_url}: {e}")
|
|
except requests.exceptions.RequestException as e:
|
|
logging.error(f"HTTP request failed for {feed_url}: {e}")
|
|
|
|
# Load processed links from file
|
|
def load_processed_links() -> set:
|
|
if os.path.exists(PROCESSED_LINKS_FILE):
|
|
with open(PROCESSED_LINKS_FILE, 'r') as file:
|
|
return set(line.strip() for line in file)
|
|
return set()
|
|
|
|
# Save processed links to file
|
|
def save_processed_links(processed_links: set):
|
|
with open(PROCESSED_LINKS_FILE, 'w') as file:
|
|
for link in processed_links:
|
|
file.write(f"{link}\n")
|
|
|
|
# Helper function to load URLs from a file (feed or threat intel)
|
|
def load_feed_urls(file_path: str) -> List[str]:
|
|
if os.path.exists(file_path):
|
|
with open(file_path, 'r') as file:
|
|
return [line.strip() for line in file if line.strip()]
|
|
logging.error(f"{file_path} not found.")
|
|
return []
|
|
|
|
# Function to process threat intel feeds
|
|
def process_threatintel_feeds(threatintel_feeds: List[str], since_time: datetime, processed_links: set):
|
|
for feed_url in threatintel_feeds:
|
|
logging.info(f"Checking threat intel feed: {feed_url}")
|
|
recent_posts = get_recent_posts(feed_url, ADVISORES_WEBHOOK, since_time, processed_links)
|
|
try:
|
|
for post in recent_posts:
|
|
logging.info(f"New threat intel post: {post['title']}")
|
|
logging.info(f"Link: {post['link']}")
|
|
logging.info(f"Published: {post['published']}")
|
|
logging.info(f"Description: {post['description']}")
|
|
logging.info("-" * 40)
|
|
except TypeError:
|
|
pass
|
|
|
|
# Function to process regular feeds
|
|
def process_regular_feeds(regular_feeds: List[str], since_time: datetime, processed_links: set):
|
|
for feed_url in regular_feeds:
|
|
logging.info(f"Checking regular feed: {feed_url}")
|
|
recent_posts = get_recent_posts(feed_url, DEFAULT_WEBHOOK_URL, since_time, processed_links)
|
|
try:
|
|
for post in recent_posts:
|
|
logging.info(f"New regular post: {post['title']}")
|
|
logging.info(f"Link: {post['link']}")
|
|
logging.info(f"Published: {post['published']}")
|
|
logging.info(f"Description: {post['description']}")
|
|
logging.info("-" * 40)
|
|
except TypeError:
|
|
pass
|
|
|
|
def process_alert_feeds(regular_feeds: List[str], since_time: datetime, processed_links: set):
|
|
for feed_url in regular_feeds:
|
|
logging.info(f"Checking regular feed: {feed_url}")
|
|
recent_posts = get_recent_posts(feed_url, ALERTS_WEBHOOK, since_time, processed_links)
|
|
try:
|
|
for post in recent_posts:
|
|
logging.info(f"New regular post: {post['title']}")
|
|
logging.info(f"Link: {post['link']}")
|
|
logging.info(f"Published: {post['published']}")
|
|
logging.info(f"Description: {post['description']}")
|
|
logging.info("-" * 40)
|
|
except TypeError:
|
|
pass
|
|
|
|
def process_bugbounty_feeds(regular_feeds: List[str], since_time: datetime, processed_links: set):
|
|
for feed_url in regular_feeds:
|
|
logging.info(f"Checking regular feed: {feed_url}")
|
|
recent_posts = get_recent_posts(feed_url, BUTBOUNTY_WEBHOOK, since_time, processed_links)
|
|
try:
|
|
for post in recent_posts:
|
|
logging.info(f"New regular post: {post['title']}")
|
|
logging.info(f"Link: {post['link']}")
|
|
logging.info(f"Published: {post['published']}")
|
|
logging.info(f"Description: {post['description']}")
|
|
logging.info("-" * 40)
|
|
except TypeError:
|
|
pass
|
|
|
|
# Main function to run the watcher
|
|
def rss_feed_watcher():
|
|
print("RUNNING...")
|
|
processed_links = load_processed_links() # Load previously processed links
|
|
regular_feeds = load_feed_urls(FEEDS_FILE)
|
|
# Load threat intel and regular feeds
|
|
threatintel_feeds = load_feed_urls(ADVISORES)
|
|
bug_bounty_feeds = load_feed_urls(BUGBOUNTY)
|
|
alert_feeds = load_feed_urls(ALERTS)
|
|
|
|
# Get the timestamp to compare recent posts (last 30 minutes)
|
|
since_time = datetime.now() - timedelta(hours=12)
|
|
since_time = since_time.replace(tzinfo=None)
|
|
print("going over bug bounties...")
|
|
process_bugbounty_feeds(bug_bounty_feeds,since_time, processed_links)
|
|
print("going over regular feeds...")
|
|
process_regular_feeds(regular_feeds, since_time, processed_links)
|
|
# Process threat intel feeds
|
|
print("going over threat intel...")
|
|
process_threatintel_feeds(threatintel_feeds, since_time, processed_links)
|
|
print("going over alerts...")
|
|
process_alert_feeds(alert_feeds, since_time, processed_links)
|
|
|
|
# Save updated processed links
|
|
save_processed_links(processed_links)
|
|
|
|
# Schedule the RSS feed watcher to run every 30 minutes
|
|
def schedule_rss_watcher():
|
|
schedule.every(1).hours.do(rss_feed_watcher)
|
|
logging.info("RSS Feed Watcher scheduled to run every 30 minutes.")
|
|
|
|
while True:
|
|
schedule.run_pending()
|
|
time.sleep(1)
|
|
|
|
if __name__ == "__main__":
|
|
rss_feed_watcher()
|
|
# Start the scheduled watcher
|
|
schedule_rss_watcher()
|