import os
import cv2
import numpy as np
import joblib

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

DATASET_PATH = "data/eurosat/EuroSAT_RGB"

features = []
labels = []

CLASS_MAPPING = {
    "River": 0,
    "SeaLake": 0,

    "Forest": 1,
    "Pasture": 1,
    "HerbaceousVegetation": 1,

    "Residential": 2,
    "Industrial": 2,
    "Highway": 2,

    "AnnualCrop": 3
}

for class_name, label in CLASS_MAPPING.items():

    class_path = os.path.join(DATASET_PATH, class_name)

    if not os.path.exists(class_path):
        continue

    for image_name in os.listdir(class_path):

        image_path = os.path.join(class_path, image_name)

        image = cv2.imread(image_path)

        if image is None:
            continue

        image = cv2.resize(image, (64, 64))

        blue_mean = np.mean(image[:, :, 0])
        green_mean = np.mean(image[:, :, 1])
        red_mean = np.mean(image[:, :, 2])

        ndvi = (green_mean - red_mean) / (
            green_mean + red_mean + 1e-10
        )

        features.append([
            red_mean,
            green_mean,
            blue_mean,
            ndvi
        ])

        labels.append(label)

X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)

predictions = model.predict(X_test)

print(classification_report(y_test, predictions))

joblib.dump(model, "app/ml/model.pkl")

print("Model saved successfully")