import requests from bs4 import BeautifulSoup from textblob import TextBlob import datetime import pandas as pd import matplotlib.pyplot as plt # Settings plt.rcParams.update({'font.size': 10}) # āœ… Function to scrape one date's articles from Dhaka Tribune def scrape_date(date): url = f"https://www.dhakatribune.com/archive/{date.strftime('%Y-%m-%d')}" print(f"Scraping URL: {url}") try: response = requests.get(url, timeout=10) response.raise_for_status() except requests.RequestException as e: print(f"āŒ Failed to fetch data for {date.strftime('%Y-%m-%d')}: {e}") return [] soup = BeautifulSoup(response.text, 'html.parser') # Target text-containing tags (adjusted for Dhaka Tribune) text_elements = soup.find_all(['h2', 'p']) # or just 'p' if you want less noise sentiments = [] for element in text_elements: text = element.get_text(strip=True) if len(text) > 30: # Filter out short/irrelevant texts polarity = TextBlob(text).sentiment.polarity if polarity != 0.0: sentiments.append(polarity) print(f"Sentiment: {polarity:.2f} | Text: {text[:60]}...") return sentiments # āœ… Ask for start and end date start_date_input = input("Please enter the start date (YYYY-MM-DD): ") end_date_input = input("Please enter the end date (YYYY-MM-DD): ") start_date = datetime.datetime.strptime(start_date_input, '%Y-%m-%d') end_date = datetime.datetime.strptime(end_date_input, '%Y-%m-%d') # āœ… Scrape all dates in range all_sentiments = [] current_date = start_date while current_date <= end_date: print(f"\nšŸ“… Processing: {current_date.strftime('%Y-%m-%d')}") sentiments = scrape_date(current_date) all_sentiments.extend(sentiments) current_date += datetime.timedelta(days=1) # āœ… Save to Excel on your desktop output_path = r"C:\Users\lab44\Desktop\New Microsoft Excel Worksheet.xlsx" df = pd.DataFrame(all_sentiments, columns=["Sentiment Score"]) df.to_excel(output_path, index=False) print(f"\nāœ… Sentiment scores saved to: {output_path}") # āœ… Plot histogram plt.rcParams.update({'font.size': 8}) plt.figure(figsize=(5, 4)) plt.hist(all_sentiments, bins='sturges', color='magenta', range=(-1, 1), edgecolor="black") plt.title(f'Dhaka Tribune ({start_date_input} to {end_date_input})', fontsize=9) plt.xlabel('Sentiment Score', fontsize=8) plt.ylabel('Frequency', fontsize=8) plt.xticks(fontsize=8) plt.yticks(fontsize=8) plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7) plt.tight_layout() plt.show()