# -*- coding: utf-8 -*-
"""Untitled6.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1m5oXxY30h1sS1BuxKgqZuxOI0zUXTYy3
"""

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt

# 1. LOAD DATA
# -------------------------------------------
df = pd.read_csv("/content/Data.csv")

# Display first rows
df.head()

# 2. HANDLE MISSING DATA (Basic Cleaning)
# -------------------------------------------

# Remove rows with missing values
df = df.dropna()

# Rename columns if necessary (optional)
df.columns

# 3. NORMALIZATION
# Scale = 0.1 + 0.8 * (Actual - Min) / (Max - Min)
# -------------------------------------------

def custom_scale(x):
    return 0.1 + 0.8 * ( (x - x.min()) / (x.max() - x.min()) )

scaled_df = df.copy()
for col in df.columns:
    if col not in ["Year", "Month", "Day"]:
        scaled_df[col] = custom_scale(df[col])

scaled_df.head()

# 4. TRAIN–TEST SPLIT (Roll number used as seed)
# -------------------------------------------

seed = 210428  # <<< REPLACE THIS with your roll number

# Features: all atmospheric variables except maximum temp (JMXT)
X = scaled_df[["TEM","MXT","MNT","HUM","SLP","WIS","SSH","CLD"]]

# Target: Maximum Temperature (MXT)
y = scaled_df["RIN"]

# 80% training, 20% test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=seed
)

# 5. NEURAL NETWORK MODEL
# -------------------------------------------

model = MLPRegressor(hidden_layer_sizes=(10,10),
                     activation="relu",
                     solver="adam",
                     max_iter=2000,
                     random_state=seed)

model.fit(X_train, y_train)

# Predictions
train_pred = model.predict(X_train)
test_pred  = model.predict(X_test)

# 6. ACCURACY RESULTS
# -------------------------------------------

print("TRAIN R²:", r2_score(y_train, train_pred))
print("TEST  R²:", r2_score(y_test,  test_pred))

print("\nTRAIN RMSE:", np.sqrt(mean_squared_error(y_train, train_pred)))
print("TEST  RMSE:", np.sqrt(mean_squared_error(y_test,  test_pred)))

# 7.-------------------------------------------------------------
# SCATTER PLOTS FOR ACTUAL vs PREDICTED MAXIMUM RAINFALL (JRIN)
# Train Data, Test Data, Overall Data
# -------------------------------------------------------------
# -------- TRAIN DATA PLOT --------
plt.figure(figsize=(7,6))
plt.scatter(y_train, train_pred, alpha=0.6)
plt.xlabel("Actual Maximum Rainfall (RIN)")
plt.ylabel("Predicted Maximum Rainfall (RIN)")
plt.title("TRAIN DATA: Actual vs Predicted Maximum Rainfall")
plt.grid(True)
plt.show()

#-------- TEST DATA PLOT --------
plt.figure(figsize=(7,6))
plt.scatter(y_test, test_pred, alpha=0.6, color="orange")
plt.xlabel("Actual Maximum Rainfall (RIN)")
plt.ylabel("Predicted Maximum Rainfall (RIN)")
plt.title("TEST DATA: Actual vs Predicted Maximum Rainfall")
plt.grid(True)
plt.show()

# -------- OVERALL DATA PLOT --------
overall_pred = model.predict(X)

plt.figure(figsize=(7,6))
plt.scatter(scaled_df["RIN"], overall_pred, alpha=0.6, color="green")
plt.xlabel("Actual Maximum Rainfall (RIN)")
plt.ylabel("Predicted Maximum Rainfall (RIN)")
plt.title("OVERALL DATA: Actual vs Predicted Maximum Rainfall")
plt.grid(True)
plt.show()