From cced4eb99458e9ff0c9c78469605b88b8701c5fa Mon Sep 17 00:00:00 2001 From: Namonay Date: Sun, 4 May 2025 21:22:30 +0200 Subject: [PATCH] add: Initial commit --- data.csv | 25 +++++++++++++++++++ model.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 data.csv create mode 100644 model.py diff --git a/data.csv b/data.csv new file mode 100644 index 0000000..b875289 --- /dev/null +++ b/data.csv @@ -0,0 +1,25 @@ +km,price +240000,3650 +139800,3800 +150500,4400 +185530,4450 +176000,5250 +114800,5350 +166800,5800 +89000,5990 +144500,5999 +84000,6200 +82029,6390 +63060,6390 +74000,6600 +97500,6800 +67000,6800 +76025,6900 +48235,6900 +93000,6990 +60949,7490 +65674,7555 +54000,7990 +68500,7990 +22899,7990 +61789,8290 diff --git a/model.py b/model.py new file mode 100644 index 0000000..a5d9e34 --- /dev/null +++ b/model.py @@ -0,0 +1,74 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + +class Model: + def __init__(self, data_name="data.csv", learning_rate=0.01, epochs=2000): + with open(data_name, 'r') as file: + self.data = pd.read_csv(file) + self.km = [self.data["km"][i] for i in range(len(self.data))] + self.price = [self.data["price"][i] for i in range(len(self.data))] + self.learning_rate = learning_rate + self.epochs = epochs + self.theta0 = 0 + self.theta1 = 0 + self.km_min = 0 + self.km_max = 0 + self.cost_history = [] + self.m = len(self.data) + self.normalize_values() + def __estimate_price(self, mileage): + return self.theta0 + (self.theta1 * mileage) + + def estimate_price(self, mileage): + return self.__estimate_price((mileage - self.km_min) / (self.km_max - self.km_min)) + + def normalize_values(self): + self.km_min = np.min(self.km) + self.km_max = np.max(self.km) + for i in range(len(self.km)): + self.km[i] = (self.km[i] - self.km_min) / (self.km_max - self.km_min) + + def calculate_thetas(self): + tmp_t0 = 0 + tmp_t1 = 0 + cost = 0 + for i in range(self.m): + pred = self.__estimate_price(self.km[i]) + tmp_t0 += pred - self.price[i] + tmp_t1 += (pred - self.price[i]) * self.km[i] + cost += (pred - self.price[i]) ** 2 + self.cost_history.append(cost / (2 * self.m)) + return (tmp_t0, tmp_t1) + + def train(self): + for epoch in range(self.epochs): + tmp_t0, tmp_t1 = self.calculate_thetas() + self.theta0 -= self.learning_rate * tmp_t0 + self.theta1 -= self.learning_rate * tmp_t1 + pass + def visualize(self): + km_range = np.linspace(min(self.data["km"]), max(self.data["km"]), 100) + predicted_prices = self.theta0 + (self.theta1 * (km_range - self.km_min) / (self.km_max - self.km_min)) + fig, (regression, cost) = plt.subplots(nrows=1, ncols=2, figsize=(12, 5)) + + regression.scatter(self.data["km"], self.data["price"], color='blue', label='Données') + regression.plot(km_range, predicted_prices, color='red', label='Regression line') + + regression.set_xlabel("Kilometers (km)") + regression.set_ylabel("Price (Euros)") + regression.set_title("Linear Regression") + regression.legend() + regression.grid(True) + + cost.plot(range(1, len(self.cost_history) + 1), self.cost_history, color='green') + cost.set_title("Coût vs Itérations") + cost.set_xlabel("Itérations") + cost.set_ylabel("Coût") + cost.grid(True) + plt.show() + pass + +first_model = Model() +first_model.train() +first_model.visualize()