44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
import pandas as pd
|
|
'''
|
|
Class Confidence:
|
|
Calculate the confidence using the R-Square score
|
|
https://en.wikipedia.org/wiki/Coefficient_of_determination
|
|
'''
|
|
|
|
class Confidence:
|
|
def __init__(self, thetas_path="thetas.csv", data_path="datasets/data.csv"):
|
|
self.theta_path = thetas_path
|
|
self.data_path = data_path
|
|
self.data = []
|
|
self.theta0 = 0
|
|
self.theta1 = 0
|
|
self.get_thetas()
|
|
self.get_data()
|
|
def get_thetas(self):
|
|
try:
|
|
with open(self.theta_path, 'r') as file:
|
|
data = pd.read_csv(file)
|
|
self.theta0 = data["theta0"].iloc[0]
|
|
self.theta1 = data["theta1"].iloc[0]
|
|
except:
|
|
print("! Warning, no trained model has been found")
|
|
|
|
def get_data(self):
|
|
try:
|
|
with open(self.data_path, 'r') as file:
|
|
self.data = pd.read_csv(file)
|
|
except:
|
|
print("! Warning, no data has been found")
|
|
|
|
def estimate_price(self, mileage):
|
|
return self.theta0 + (self.theta1 * mileage)
|
|
|
|
def get_confidence(self):
|
|
predicted_price = []
|
|
for data in self.data["km"]:
|
|
predicted_price.append(self.estimate_price(data))
|
|
avg_price = sum(self.data["price"]) / len(self.data["price"])
|
|
ss_tot = sum((y - avg_price) ** 2 for y in self.data["price"])
|
|
ss_res = sum((y - y_hat) ** 2 for y, y_hat in zip(self.data["price"], predicted_price))
|
|
r2 = 1 - (ss_res / ss_tot)
|
|
return r2 |