add: Initial commit

This commit is contained in:
2025-05-04 21:22:30 +02:00
commit cced4eb994
2 changed files with 99 additions and 0 deletions

25
data.csv Normal file
View File

@@ -0,0 +1,25 @@
km,price
240000,3650
139800,3800
150500,4400
185530,4450
176000,5250
114800,5350
166800,5800
89000,5990
144500,5999
84000,6200
82029,6390
63060,6390
74000,6600
97500,6800
67000,6800
76025,6900
48235,6900
93000,6990
60949,7490
65674,7555
54000,7990
68500,7990
22899,7990
61789,8290
1 km price
2 240000 3650
3 139800 3800
4 150500 4400
5 185530 4450
6 176000 5250
7 114800 5350
8 166800 5800
9 89000 5990
10 144500 5999
11 84000 6200
12 82029 6390
13 63060 6390
14 74000 6600
15 97500 6800
16 67000 6800
17 76025 6900
18 48235 6900
19 93000 6990
20 60949 7490
21 65674 7555
22 54000 7990
23 68500 7990
24 22899 7990
25 61789 8290

74
model.py Normal file
View File

@@ -0,0 +1,74 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
class Model:
def __init__(self, data_name="data.csv", learning_rate=0.01, epochs=2000):
with open(data_name, 'r') as file:
self.data = pd.read_csv(file)
self.km = [self.data["km"][i] for i in range(len(self.data))]
self.price = [self.data["price"][i] for i in range(len(self.data))]
self.learning_rate = learning_rate
self.epochs = epochs
self.theta0 = 0
self.theta1 = 0
self.km_min = 0
self.km_max = 0
self.cost_history = []
self.m = len(self.data)
self.normalize_values()
def __estimate_price(self, mileage):
return self.theta0 + (self.theta1 * mileage)
def estimate_price(self, mileage):
return self.__estimate_price((mileage - self.km_min) / (self.km_max - self.km_min))
def normalize_values(self):
self.km_min = np.min(self.km)
self.km_max = np.max(self.km)
for i in range(len(self.km)):
self.km[i] = (self.km[i] - self.km_min) / (self.km_max - self.km_min)
def calculate_thetas(self):
tmp_t0 = 0
tmp_t1 = 0
cost = 0
for i in range(self.m):
pred = self.__estimate_price(self.km[i])
tmp_t0 += pred - self.price[i]
tmp_t1 += (pred - self.price[i]) * self.km[i]
cost += (pred - self.price[i]) ** 2
self.cost_history.append(cost / (2 * self.m))
return (tmp_t0, tmp_t1)
def train(self):
for epoch in range(self.epochs):
tmp_t0, tmp_t1 = self.calculate_thetas()
self.theta0 -= self.learning_rate * tmp_t0
self.theta1 -= self.learning_rate * tmp_t1
pass
def visualize(self):
km_range = np.linspace(min(self.data["km"]), max(self.data["km"]), 100)
predicted_prices = self.theta0 + (self.theta1 * (km_range - self.km_min) / (self.km_max - self.km_min))
fig, (regression, cost) = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))
regression.scatter(self.data["km"], self.data["price"], color='blue', label='Données')
regression.plot(km_range, predicted_prices, color='red', label='Regression line')
regression.set_xlabel("Kilometers (km)")
regression.set_ylabel("Price (Euros)")
regression.set_title("Linear Regression")
regression.legend()
regression.grid(True)
cost.plot(range(1, len(self.cost_history) + 1), self.cost_history, color='green')
cost.set_title("Coût vs Itérations")
cost.set_xlabel("Itérations")
cost.set_ylabel("Coût")
cost.grid(True)
plt.show()
pass
first_model = Model()
first_model.train()
first_model.visualize()