Skip to content
Snippets Groups Projects
Commit 70ae2468 authored by Tobias Rothlin's avatar Tobias Rothlin
Browse files

Run Logs Added

parent 269e8a35
Branches
No related tags found
No related merge requests found
%% Cell type:code id:initial_id tags:
``` python
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
```
%% Cell type:markdown id:883dd0fb6eae6601 tags:
# Load data
%% Cell type:code id:f64fc415ab307f7d tags:
``` python
data_state_space_controller = pd.read_csv("RunLog_Controller_2024-06-18-13-19-46.csv")
data_fine_tuning = pd.read_csv("RunLog_Model_FineTune_2024-06-18-13-22-43.csv")
data_model = pd.read_csv("RunLog_Model_2024-06-18-13-27-50.csv")
```
%% Cell type:code id:2d48576cf1434bf0 tags:
``` python
data_state_space_controller
```
%% Output
idx ball_position ball_target angle
0 109 0.000615 0.241154 88.099012
1 110 0.000809 0.241198 46.679006
2 111 0.022139 0.243308 46.820461
3 112 0.015005 0.242331 3.236380
4 113 0.011516 0.241988 -0.194204
.. ... ... ... ...
981 1090 0.291940 0.241148 -0.283710
982 1091 0.291934 0.241210 -0.306506
983 1092 0.291973 0.241160 -0.377769
984 1093 0.291960 0.241285 -0.332947
985 1094 0.292012 0.241241 -0.405627
[986 rows x 4 columns]
%% Cell type:code id:882f18697bd5d5b tags:
``` python
data_fine_tuning
```
%% Output
idx ball_position ball_target angle
0 725 -0.000434 0.241048 105.562370
1 726 -0.000758 0.241055 6.232263
2 727 0.009969 0.242119 4.324735
3 728 0.034017 0.244397 -3.267518
4 729 0.003438 0.241229 -7.334972
.. ... ... ... ...
707 1432 0.208759 0.241148 0.251702
708 1433 0.208752 0.241148 0.241532
709 1434 0.208771 0.241142 0.236939
710 1435 0.208752 0.241136 0.240235
711 1436 0.208732 0.241129 0.237969
[712 rows x 4 columns]
%% Cell type:code id:f5402a2a5f9bd009 tags:
``` python
data_model
```
%% Output
idx ball_position ball_target angle
0 273 -0.000492 0.241030 1.493593
1 274 -0.000583 0.241061 1.574569
2 275 0.021737 0.243283 1.571033
3 276 0.032198 0.244204 1.526294
4 277 0.019957 0.242853 1.503148
.. ... ... ... ...
660 933 0.368281 0.241198 0.065720
661 934 0.368242 0.241179 0.065709
662 935 0.368242 0.241217 0.065690
663 936 0.368248 0.241192 0.065701
664 937 0.368210 0.241179 0.065887
[665 rows x 4 columns]
%% Cell type:code id:740f6b660085f414 tags:
``` python
```
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -165,6 +165,7 @@ class MainWindow(QMainWindow):
if self.log_file:
self.log_file.write(f"{self.idx},{ball_position},{ball_target},{angle}\n")
# idx, ball_position, ball_target, angle
self.local_data_handler.add_element("SimulatedPosition", simulated_position[0][0] * 100) # cm
self.local_data_handler.add_element("Angle", self.output_angle) # Degree
......
......@@ -18,9 +18,10 @@ class Actor(nn.Module):
def init_weights(self):
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.zeros_(self.fc1.bias)
weight_FC1 = self.fc1.weight.data
bias_FC1 = self.fc1.bias.data
self.fc1.weight.data = weight_FC1
self.fc1.bias.data = bias_FC1
def forward(self, x):
x = x.view(x.size(0), -1)
......
......@@ -77,6 +77,11 @@ class DDPGagent:
target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau))
def save_models(self):
self.actor.save("actor.pt")
self.critic.save("critic.pt")
\ No newline at end of file
def save_models(self,isCheckpoint=False):
if isCheckpoint:
self.actor.save("actor_checkpoint.pt")
self.critic.save("critic_checkpoint.pt")
else:
self.actor.save("actor.pt")
self.critic.save("critic.pt")
\ No newline at end of file
......@@ -9,7 +9,7 @@ class Enviroment:
self.plant = Plant(path="../Simulation/Plant.json")
self.observer = Observer(path="../Simulation/Observer.json")
self.itteration = 0
self.iteration = 0
self.internal_velocity_falloff = 0.0
self.target_position = target_position
......@@ -28,25 +28,39 @@ class Enviroment:
def get_velocity_cutoff(self,update_falloff = True):
if update_falloff:
error = 0.55 - self.internal_velocity_falloff
self.internal_velocity_falloff += error * 0.02
self.internal_velocity_falloff += error * 0.08
return 0.6 - self.internal_velocity_falloff
def reset_state(self,x0):
start_angle = 0.0
random_start_position = 0.0
start_velocity = 0.0
self.target_position = 0.25
x0 = np.array([[start_velocity],[random_start_position],[start_angle]])
self.plant.reset_state(x0)
self.observer.reset_state(x0)
self.iteration = 0
velocity = self.plant.X[0][0]
position = self.plant.X[1][0]
angle = self.plant.X[2][0]
target_position = self.target_position
error = position - target_position
return np.array([[velocity],[position],[angle],[target_position]])
return np.array([[velocity],[position],[angle],[error]])
def get_output(self,u,dt = 0.02):
position = self.plant.get_output(u,dt,ignore_max_min=False)
position = self.plant.get_output(u,dt,ignore_max_min=True)
state = self.observer.get_estimate(u,position,dt)
state_true = self.plant.X
return state
return state_true
def step(self,action):
......@@ -56,15 +70,16 @@ class Enviroment:
position = output[1][0]
angle = output[2][0]
target_position = self.target_position
error = position - target_position
new_state = np.array([[velocity],[position],[angle],[target_position]])
new_state = np.array([[velocity],[position],[angle],[error]])
reward = self.calculate_reward(new_state)
done = self.check_done(new_state)
self.previous_state = np.array([[velocity],[position],[angle],[target_position]])
self.previous_state = np.array([[velocity],[position],[angle],[error]])
self.itteration += 1
self.iteration += 1
return new_state,reward,done,{}
def calculate_reward(self,new_state):
......@@ -80,7 +95,7 @@ class Enviroment:
did_move_closer = 1 if current_error_in_position < previous_error_in_position else -1
reward = did_move_closer * 1 - current_error_in_position * 0.0 - np.abs(current_angle)*0.0 - np.abs(current_velocity)*0.0
reward = did_move_closer * 1.0 - current_error_in_position * 1.0 - np.abs(current_angle)*0.0 - np.abs(current_velocity)*0.01 - self.iteration * 0.0
return reward
......@@ -91,7 +106,7 @@ class Enviroment:
angle = new_state[2][0]
target_position = new_state[3][0]
velocity_cutoff = self.get_velocity_cutoff(update_falloff=False)
velocity_cutoff = 0.01
if np.abs(position - target_position) < 0.05 and np.abs(velocity) < velocity_cutoff:
return True
else:
......@@ -107,7 +122,7 @@ if __name__ == '__main__':
angle_list = []
for i in range(400):
env_output = env.get_output(np.array([[5.0]]),dt=0.02)
env_output = env.get_output(np.array([[-5.0]]),dt=0.02)
speed = env_output[0][0]
position = env_output[1][0]
angle = env_output[2][0]
......
......@@ -14,25 +14,42 @@ import mlflow
from dotenv import load_dotenv
import os
ACTOR_LEARNING_RATE = 1e-3
CRITIC_LEARNING_RATE = 1e-3
GAMMA = 0.99
TAU = 1e-2
ACTOR_LEARNING_RATE = 1e-4
CRITIC_LEARNING_RATE = 1e-2
GAMMA = 0.99 # This is the discount factor used in the update rule for the Q-values in reinforcement learning.
# It determines how mutch importance we want to give to future rewards.
# A high value for gamma (close to 1) will make the agent consider future rewards significantly,
# whereas a low value makes the agent more focused on immediate rewards.
TAU = 0.01 # This is used for the soft update of the target networks.
# In DDPG, there are two sets of networks - the actor-critic networks and the target networks.
# The target networks are copies of the actor-critic networks and are used to generate the target-Q
# values to compute the loss for updating the actor-critic networks. However, instead of copying the
# weights of the actor-critic networks to the target networks directly, a soft update strategy is used
# where the target networks are slowly updated towards the actor-critic networks.
# The parameter tau controls the rate of this update.
# A small tau means that the target networks are updated slowly,
# whereas a large tau means that the target networks are updated more quickly towards the actor-critic networks.
MAX_MEMORY_SIZE = 50000
BATCH_SIZE = 256
DISCRIPTION_REWARD_FUNCTION = "reward = did_move_closer * 10 - current_error_in_position * 10 - abs(current_angle) - abs(current_velocity)*2"
env = Enviroment()
agent = DDPGagent(env, actor_learning_rate=ACTOR_LEARNING_RATE, critic_learning_rate=CRITIC_LEARNING_RATE, gamma=GAMMA, tau=TAU, max_memory_size=MAX_MEMORY_SIZE)
noise = OUNoise(env.action_space)
batch_size = 128
batch_size = BATCH_SIZE
rewards = []
avg_rewards = []
steps_list = []
load_dotenv()
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))
mlflow.set_experiment("DRF_DDPG")
mlflow.set_experiment("DRF_DDPG_Sem")
with mlflow.start_run():
mlflow.log_param("ACTOR_LEARNING_RATE", ACTOR_LEARNING_RATE)
......@@ -41,15 +58,16 @@ with mlflow.start_run():
mlflow.log_param("TAU", TAU)
mlflow.log_param("MAX_MEMORY_SIZE", MAX_MEMORY_SIZE)
mlflow.log_param("DISCRIPTION_REWARD_FUNCTION", DISCRIPTION_REWARD_FUNCTION)
mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
for episode in range(200):
for episode in range(1000):
state = env.reset_state(x0=np.array([[0.0], [0.0], [0.0]]))
noise.reset()
episode_reward = 0
velocity_cutoff = env.get_velocity_cutoff(update_falloff=True)
mlflow.log_metric("velocity_cutoff", velocity_cutoff)
for step in tqdm(range(1000),total=1000,desc=f"Training Model Episode:{episode}"):
for step in tqdm(range(500),total=500,desc=f"Training Model Episode:{episode}"):
action = agent.get_action(state)
action = noise.get_action(action, step)[0]
new_state, reward, done, _ = env.step(action)
......@@ -64,13 +82,14 @@ with mlflow.start_run():
if episode > 0:
mean_reward = np.mean(rewards[-10:])
else:
mean_reward = 0
mean_reward = episode_reward
if done:
print(f"Step: {step}, Episode: {episode}, episode reward: {episode_reward}, mean reward: {mean_reward}")
steps_list.append(step)
break
agent.save_models(isCheckpoint=True)
rewards.append(episode_reward)
avg_rewards.append(mean_reward)
mlflow.log_metric("episode_reward", episode_reward)
......@@ -124,9 +143,10 @@ with mlflow.start_run():
state = torch.FloatTensor(state).unsqueeze(0)
action = actor(state).detach().numpy()[0, 0]
state, reward, done, _ = env.step(action)
speed = state[0][0]
position = state[1][0]
angle = state[2][0]
true_state = env.plant.X
speed = true_state[0][0]
position = true_state[1][0]
angle = true_state[2][0]
speed_list.append(speed)
position_list.append(position)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment