add: R2 Score to get confidence
This commit is contained in:
44
confidence.py
Normal file
44
confidence.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
import pandas as pd
|
||||||
|
'''
|
||||||
|
Class Confidence:
|
||||||
|
Calculate the confidence using the R-Square score
|
||||||
|
https://en.wikipedia.org/wiki/Coefficient_of_determination
|
||||||
|
'''
|
||||||
|
|
||||||
|
class Confidence:
|
||||||
|
def __init__(self, thetas_path="thetas.csv", data_path="datasets/data.csv"):
|
||||||
|
self.theta_path = thetas_path
|
||||||
|
self.data_path = data_path
|
||||||
|
self.data = []
|
||||||
|
self.theta0 = 0
|
||||||
|
self.theta1 = 0
|
||||||
|
self.get_thetas()
|
||||||
|
self.get_data()
|
||||||
|
def get_thetas(self):
|
||||||
|
try:
|
||||||
|
with open(self.theta_path, 'r') as file:
|
||||||
|
data = pd.read_csv(file)
|
||||||
|
self.theta0 = data["theta0"].iloc[0]
|
||||||
|
self.theta1 = data["theta1"].iloc[0]
|
||||||
|
except:
|
||||||
|
print("! Warning, no trained model has been found")
|
||||||
|
|
||||||
|
def get_data(self):
|
||||||
|
try:
|
||||||
|
with open(self.data_path, 'r') as file:
|
||||||
|
self.data = pd.read_csv(file)
|
||||||
|
except:
|
||||||
|
print("! Warning, no data has been found")
|
||||||
|
|
||||||
|
def estimate_price(self, mileage):
|
||||||
|
return self.theta0 + (self.theta1 * mileage)
|
||||||
|
|
||||||
|
def get_confidence(self):
|
||||||
|
predicted_price = []
|
||||||
|
for data in self.data["km"]:
|
||||||
|
predicted_price.append(self.estimate_price(data))
|
||||||
|
avg_price = sum(self.data["price"]) / len(self.data["price"])
|
||||||
|
ss_tot = sum((y - avg_price) ** 2 for y in self.data["price"])
|
||||||
|
ss_res = sum((y - y_hat) ** 2 for y, y_hat in zip(self.data["price"], predicted_price))
|
||||||
|
r2 = 1 - (ss_res / ss_tot)
|
||||||
|
return r2
|
||||||
16
model.py
16
model.py
@@ -3,8 +3,12 @@ import numpy as np
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import csv
|
import csv
|
||||||
|
|
||||||
|
from confidence import Confidence
|
||||||
|
|
||||||
|
data_path="datasets/data.csv"
|
||||||
|
|
||||||
class Model:
|
class Model:
|
||||||
def __init__(self, data_name="data.csv", learning_rate=0.01, epochs=2000):
|
def __init__(self, data_name=data_path, learning_rate=0.01, epochs=2000):
|
||||||
with open(data_name, 'r') as file:
|
with open(data_name, 'r') as file:
|
||||||
self.data = pd.read_csv(file)
|
self.data = pd.read_csv(file)
|
||||||
self.km = [self.data["km"][i] for i in range(len(self.data))]
|
self.km = [self.data["km"][i] for i in range(len(self.data))]
|
||||||
@@ -61,6 +65,8 @@ class Model:
|
|||||||
|
|
||||||
writer.writerows(data)
|
writer.writerows(data)
|
||||||
def visualize(self):
|
def visualize(self):
|
||||||
|
confidence = Confidence(data_path=data_path)
|
||||||
|
|
||||||
km_range = np.linspace(min(self.data["km"]), max(self.data["km"]), 100)
|
km_range = np.linspace(min(self.data["km"]), max(self.data["km"]), 100)
|
||||||
predicted_prices = self.theta0 + (self.theta1 * (km_range - self.km_min) / (self.km_max - self.km_min))
|
predicted_prices = self.theta0 + (self.theta1 * (km_range - self.km_min) / (self.km_max - self.km_min))
|
||||||
fig, (regression, cost) = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))
|
fig, (regression, cost) = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))
|
||||||
@@ -73,11 +79,11 @@ class Model:
|
|||||||
regression.set_title("Linear Regression")
|
regression.set_title("Linear Regression")
|
||||||
regression.legend()
|
regression.legend()
|
||||||
regression.grid(True)
|
regression.grid(True)
|
||||||
|
regression.text(0.05, 0.95, f"Confidence: {confidence.get_confidence()}", transform=regression.transAxes, fontsize=8, verticalalignment='top', horizontalalignment='left', color="black")
|
||||||
cost.plot(range(1, len(self.cost_history) + 1), self.cost_history, color='green')
|
cost.plot(range(1, len(self.cost_history) + 1), self.cost_history, color='green')
|
||||||
cost.set_title("Coût vs Itérations")
|
cost.set_title("Cost vs Epochs")
|
||||||
cost.set_xlabel("Itérations")
|
cost.set_xlabel("Epochs")
|
||||||
cost.set_ylabel("Coût")
|
cost.set_ylabel("Cost")
|
||||||
cost.grid(True)
|
cost.grid(True)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user