-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
210 lines (187 loc) · 8.41 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import streamlit as st
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow_decision_forests as tfdf
st.set_page_config(
page_title="Jeston Lewis | Capstone",
layout="wide",
menu_items=None,
)
@st.cache_data
def build_data(path):
local_data = pd.read_csv(path)
local_data.dropna(subset="position", inplace=True)
local_data["position"] = local_data["position"].astype("int")
return local_data
@st.cache_resource
def load_saved_model(path):
try:
return tf.keras.models.load_model(path)
except:
return None
@st.cache_resource
def build_model(local_training):
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(local_training[predictors], label="position")
local_model = tfdf.keras.RandomForestModel()
local_model.fit(train_ds)
local_model.save("./model")
return local_model
def make_prediction(local_model, local_data):
local_data_ds = tfdf.keras.pd_dataframe_to_tf_dataset(local_data[predictors])
predictions = local_model.predict(local_data_ds)
predictions_df = pd.DataFrame(predictions)
local_table = pd.merge(local_data, predictions_df, on=local_data.index)
return local_table, predictions
def make_form_prediction(driver_choice, circuit_choice, starting_choice, local_data, local_circuits):
driver_code = local_data["driver_code"].loc[local_data["driverRef"] == driver_choice].values[0]
constructor_code = local_data["constructor_code"].loc[local_data["driverRef"] == driver_choice].values[0]
circuit_code = local_circuits["circuit_code"].loc[local_circuits["circuitRef"] == circuit_choice].values[0]
grid_rolling = local_data["grid_rolling"].loc[local_data["driverRef"] == driver_choice].values[0]
position_rolling = local_data["position_rolling"].loc[local_data["driverRef"] == driver_choice].values[0]
pos_delta_rolling = local_data["pos_delta_rolling"].loc[local_data["driverRef"] == driver_choice].values[0]
pos_delta = starting_choice
driver_df = pd.DataFrame(
{
"driver_code": [driver_code],
"constructor_code": [constructor_code],
"circuit_code": [circuit_code],
"grid_rolling": [grid_rolling],
"position_rolling": [position_rolling],
"pos_delta_rolling": [pos_delta_rolling],
"grid": [starting_choice],
"pos_delta": [pos_delta]
}
)
driver_ds = tfdf.keras.pd_dataframe_to_tf_dataset(driver_df)
driver_prediction = model.predict(driver_ds)
driver_prediction_df = pd.DataFrame(driver_prediction)
driver_full = pd.merge(driver_df, driver_prediction_df, on=driver_df.index)
driver_full["driverRef"] = driver_choice
driver_full["circuit_choice"] = circuit_choice
return driver_full[["driverRef", "circuit_choice", "grid", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]
# Data prep
data = build_data("./data/final_rolling.csv")
hamilton_result = data[data["driverRef"].isin(["Hamilton"])]
training = data[data["year"] < 2022]
test = data[data["year"] >= 2022]
current_season = data[data["year"] == data["year"].max()]
circuits24 = pd.read_csv("./data/circuit24.csv")
circuits = pd.DataFrame()
circuits["circuit_code"] = data["circuit_code"].unique()
circuits["circuitRef"] = data["circuitRef"].unique()
predictors = [
"grid", "position", "pos_delta", "driver_code", "constructor_code", "circuit_code", "grid_rolling",
"position_rolling", "pos_delta_rolling"
]
preds = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
]
# Build model, inspector, and visualization
if load_saved_model("./model/saved_model.pb") is None:
model = build_model(training)
else:
model = load_saved_model("./model/saved_model.pb")
inspector = model.make_inspector()
# Evaluator
evaluation = inspector.evaluation()
eval_perc = evaluation.accuracy * 100
# Single race table
full_table, predictions = make_prediction(model, test)
single = full_table[
["driverRef", "grid", "circuitRef", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
].loc[full_table["raceId"] == full_table["raceId"].max()]
single.sort_values(by="grid", inplace=True)
single["grid"] = single["grid"].astype(int)
# Data visualization
lmplot = sns.lmplot(x="pos_delta", y="grid", data=training, fit_reg=False)
one_prediction = tf.argmax(predictions, axis=-1)
cm = tf.math.confusion_matrix(labels=test["position"], predictions=one_prediction)
cm_df = pd.DataFrame(cm)
# FRONTEND
st.title("Jeston Lewis - Capstone Project")
predictor, analysis = st.tabs(["Predictor", "Analysis"])
# Predictor
predictor.title("Race predictor")
predictor.write("1 - Select a driver")
predictor.write("2 - Select a track")
predictor.write("3 - Select a starting position")
predictor.write("4 - Press 'Predict'")
with predictor.form("Predict a winner"):
f_driver_choice = st.selectbox(
"Driver",
current_season["driverRef"].unique(),
index=None,
placeholder="Choose a driver"
)
f_circuit_choice = st.selectbox(
"Circuit",
circuits24,
index=None,
placeholder="Choose a circuit"
)
f_starting_choice = st.selectbox(
"Start",
current_season["grid"].unique(),
index=None,
placeholder="Choose a starting position"
)
submit = st.form_submit_button("Predict")
if submit is True and f_driver_choice is not None and f_circuit_choice is not None and f_starting_choice is not None:
prediction = make_form_prediction(
driver_choice=f_driver_choice,
circuit_choice=f_circuit_choice,
starting_choice=f_starting_choice,
local_data=current_season[current_season["raceId"] == current_season["raceId"].max()],
local_circuits=circuits
)
predictor.dataframe(prediction.style.format(
{1:"{:.2%}", 2:"{:.2%}", 3:"{:.2%}", 4:"{:.2%}", 5:"{:.2%}", 6:"{:.2%}", 7:"{:.2%}", 8:"{:.2%}",
9:"{:.2%}", 10:"{:.2%}", 11:"{:.2%}", 12:"{:.2%}", 13:"{:.2%}", 14:"{:.2%}", 15:"{:.2%}",
16:"{:.2%}", 17:"{:.2%}", 18:"{:.2%}", 19:"{:.2%}", 20:"{:.2%}"}
).highlight_max(
axis=1,
subset=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
use_container_width=True,
hide_index=True
)
if submit is True and f_driver_choice is None or f_circuit_choice is None or f_starting_choice is None:
predictor.write("Please select all options.")
# Analysis
analysis.title("Visualizations of the data")
analysis.header("Description of the training data")
analysis.dataframe(training.describe(), use_container_width=True)
container1 = analysis.container()
col1, col2 = container1.columns(2)
col1.header("Grid vs Position Delta")
col1.write("Starting position versus the change in position over the course of a race")
col1.pyplot(lmplot.fig)
col2.header("Wins per year")
col2.write("Wins for Lewis Hamilton by year")
col2.bar_chart(hamilton_result[hamilton_result["position"].isin([1])], x="year", y="position")
container2 = analysis.container()
container2.title("Single race prediction")
container2.write(f"Test accuracy - {eval_perc:.2f}%")
container2.write("The table below shows the likelihood of each driver achieving a specific finishing position giving their "
"starting position or grid. For instance, the person in first at the beginning of the race (Leclerc), has a "
"1.67% chance of winning the race.")
container2.write("The highlighted percentage next to each driver shows the predicted likelihood of him finishing in the "
"position indicated by the column name. Hamilton has a 83% chance of finishing in 1st.")
container2.dataframe(
single.style.format(
{1:"{:.2%}", 2:"{:.2%}", 3:"{:.2%}", 4:"{:.2%}", 5:"{:.2%}", 6:"{:.2%}", 7:"{:.2%}", 8:"{:.2%}",
9:"{:.2%}", 10:"{:.2%}", 11:"{:.2%}", 12:"{:.2%}", 13:"{:.2%}", 14:"{:.2%}", 15:"{:.2%}",
16:"{:.2%}", 17:"{:.2%}", 18:"{:.2%}", 19:"{:.2%}", 20:"{:.2%}"}
).highlight_max(
axis=1,
subset=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
use_container_width=True,
hide_index=True,
height=738,
) # Single race prediction
container3 = analysis.container()
container3.title("Visualizations of the model")
container3.header("Single tree plot")
container3.image("tree.svg", use_column_width=True) # Plot tree
container3.header("Confusion matrix")
container3.dataframe(cm_df.style.background_gradient(cmap="coolwarm"), use_container_width=False, height=772)