#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.neural_network import MLPRegressor from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.model_selection import train_test_split # In[2]: df = pd.read_csv("Data.csv") # In[3]: df.info() df.describe() # In[4]: df.isnull().sum() # In[5]: # Drop rows where variables (except MXT, MNT) have missing values df = df.dropna(subset=["TEM", "SLP", "RIN"]) # In[6]: df["MXT"].fillna(df["MXT"].mean(), inplace=True) df["MNT"].fillna(df["MNT"].mean(), inplace=True) # In[7]: df.isnull().sum() # In[9]: def normalize(series): return 0.1 + 0.8 * (series - series.min()) / (series.max() - series.min()) # Target variable (maximum temperature) y = df["MXT"] # All other variables used as predictors X = df.drop(columns=["MXT", "Year", "Month", "Day"]) # Normalize predictors X_norm = X.apply(normalize) y_norm = normalize(y) # In[10]: seed = 210412 X_train, X_test, y_train, y_test = train_test_split( X_norm, y_norm, test_size=0.20, random_state=seed ) # In[11]: model = MLPRegressor(hidden_layer_sizes=(20, 20), max_iter=1000, random_state=seed) model.fit(X_train, y_train) # In[12]: y_pred_train = model.predict(X_train) y_pred_test = model.predict(X_test) y_pred_all = model.predict(X_norm) # In[13]: train_mse = mean_squared_error(y_train, y_pred_train) train_mae = mean_absolute_error(y_train, y_pred_train) train_r2 = r2_score(y_train, y_pred_train) # In[14]: test_mse = mean_squared_error(y_test, y_pred_test) test_mae = mean_absolute_error(y_test, y_pred_test) test_r2 = r2_score(y_test, y_pred_test) # In[15]: overall_mse = mean_squared_error(y_norm, y_pred_all) overall_mae = mean_absolute_error(y_norm, y_pred_all) overall_r2 = r2_score(y_norm, y_pred_all) # In[16]: print("Training Metrics:") print(train_mse, train_mae, train_r2) print("\nTesting Metrics:") print(test_mse, test_mae, test_r2) print("\nOverall Metrics:") print(overall_mse, overall_mae, overall_r2) # In[ ]: # In[18]: plt.figure(figsize=(6, 4)) plt.scatter(y_train, y_pred_train, color="red", label="Training", s=20) plt.scatter(y_test, y_pred_test, color="skyblue", label="Test", s=20) plt.xlabel("Actual") plt.ylabel("Predicted") plt.title("Actual vs Predicted") plt.legend() plt.show() # In[1]: import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPRegressor from sklearn.metrics import r2_score, mean_squared_error import matplotlib.pyplot as plt df = pd.read_csv("Data.csv") df.head() # In[2]: df = df.dropna() df.columns # In[3]: def custom_scale(x): return 0.1 + 0.8 * ( (x - x.min()) / (x.max() - x.min()) ) scaled_df = df.copy() for col in df.columns: if col not in ["Year", "Month", "Day"]: scaled_df[col] = custom_scale(df[col]) scaled_df.head() # In[4]: seed = 210412 X = scaled_df[["TEM","MXT","MNT","HUM","SLP","WIS","SSH","CLD"]] y = scaled_df["RIN"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=seed ) # In[5]: model = MLPRegressor(hidden_layer_sizes=(10,10), activation="relu", solver="adam", max_iter=2000, random_state=seed) model.fit(X_train, y_train) # Predictions train_pred = model.predict(X_train) test_pred = model.predict(X_test) # In[6]: print("TRAIN R²:", r2_score(y_train, train_pred)) print("TEST R²:", r2_score(y_test, test_pred)) print("\nTRAIN RMSE:", np.sqrt(mean_squared_error(y_train, train_pred))) print("TEST RMSE:", np.sqrt(mean_squared_error(y_test, test_pred))) # In[7]: plt.figure(figsize=(7,6)) plt.scatter(y_train, train_pred, alpha=0.6) plt.xlabel("Actual Maximum Rainfall (RIN)") plt.ylabel("Predicted Maximum Rainfall (RIN)") plt.title("TRAIN DATA: Actual vs Predicted Maximum Rainfall") plt.grid(True) plt.show() # In[8]: plt.figure(figsize=(7,6)) plt.scatter(y_test, test_pred, alpha=0.6, color="orange") plt.xlabel("Actual Maximum Rainfall (RIN)") plt.ylabel("Predicted Maximum Rainfall (RIN)") plt.title("TEST DATA: Actual vs Predicted Maximum Rainfall") plt.grid(True) plt.show() # In[9]: overall_pred = model.predict(X) plt.figure(figsize=(7,6)) plt.scatter(scaled_df["RIN"], overall_pred, alpha=0.6, color="green") plt.xlabel("Actual Maximum Rainfall (RIN)") plt.ylabel("Predicted Maximum Rainfall (RIN)") plt.title("OVERALL DATA: Actual vs Predicted Maximum Rainfall") plt.grid(True) plt.show()