diff --git a/confidence.py b/confidence.py new file mode 100644 index 0000000..11de350 --- /dev/null +++ b/confidence.py @@ -0,0 +1,44 @@ +import pandas as pd +''' +Class Confidence: + Calculate the confidence using the R-Square score + https://en.wikipedia.org/wiki/Coefficient_of_determination +''' + +class Confidence: + def __init__(self, thetas_path="thetas.csv", data_path="datasets/data.csv"): + self.theta_path = thetas_path + self.data_path = data_path + self.data = [] + self.theta0 = 0 + self.theta1 = 0 + self.get_thetas() + self.get_data() + def get_thetas(self): + try: + with open(self.theta_path, 'r') as file: + data = pd.read_csv(file) + self.theta0 = data["theta0"].iloc[0] + self.theta1 = data["theta1"].iloc[0] + except: + print("! Warning, no trained model has been found") + + def get_data(self): + try: + with open(self.data_path, 'r') as file: + self.data = pd.read_csv(file) + except: + print("! Warning, no data has been found") + + def estimate_price(self, mileage): + return self.theta0 + (self.theta1 * mileage) + + def get_confidence(self): + predicted_price = [] + for data in self.data["km"]: + predicted_price.append(self.estimate_price(data)) + avg_price = sum(self.data["price"]) / len(self.data["price"]) + ss_tot = sum((y - avg_price) ** 2 for y in self.data["price"]) + ss_res = sum((y - y_hat) ** 2 for y, y_hat in zip(self.data["price"], predicted_price)) + r2 = 1 - (ss_res / ss_tot) + return r2 \ No newline at end of file diff --git a/model.py b/model.py index 9e44967..bbd5d39 100644 --- a/model.py +++ b/model.py @@ -3,8 +3,12 @@ import numpy as np import matplotlib.pyplot as plt import csv +from confidence import Confidence + +data_path="datasets/data.csv" + class Model: - def __init__(self, data_name="data.csv", learning_rate=0.01, epochs=2000): + def __init__(self, data_name=data_path, learning_rate=0.01, epochs=2000): with open(data_name, 'r') as file: self.data = pd.read_csv(file) self.km = [self.data["km"][i] for i in range(len(self.data))] @@ -61,6 +65,8 @@ class Model: writer.writerows(data) def visualize(self): + confidence = Confidence(data_path=data_path) + km_range = np.linspace(min(self.data["km"]), max(self.data["km"]), 100) predicted_prices = self.theta0 + (self.theta1 * (km_range - self.km_min) / (self.km_max - self.km_min)) fig, (regression, cost) = plt.subplots(nrows=1, ncols=2, figsize=(12, 5)) @@ -73,11 +79,11 @@ class Model: regression.set_title("Linear Regression") regression.legend() regression.grid(True) - + regression.text(0.05, 0.95, f"Confidence: {confidence.get_confidence()}", transform=regression.transAxes, fontsize=8, verticalalignment='top', horizontalalignment='left', color="black") cost.plot(range(1, len(self.cost_history) + 1), self.cost_history, color='green') - cost.set_title("Coût vs Itérations") - cost.set_xlabel("Itérations") - cost.set_ylabel("Coût") + cost.set_title("Cost vs Epochs") + cost.set_xlabel("Epochs") + cost.set_ylabel("Cost") cost.grid(True) plt.show()