Run Logs Added

70ae2468 · Tobias Rothlin · 269e8a35 · 70ae2468 · 70ae2468 · 70ae2468
Commit 70ae2468 authored 1 year ago by Tobias Rothlin
--- a/Evaluation/Comparison.ipynb
+++ b/Evaluation/Comparison.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-06-18T13:04:39.535075Z",
+     "start_time": "2024-06-18T13:04:39.530509Z"
+    }
+   },
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import plotly.express as px\n",
+    "import plotly.graph_objects as go"
+   ],
+   "outputs": [],
+   "execution_count": 6
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "# Load data",
+   "id": "883dd0fb6eae6601"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-18T13:04:39.547315Z",
+     "start_time": "2024-06-18T13:04:39.537587Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "data_state_space_controller = pd.read_csv(\"RunLog_Controller_2024-06-18-13-19-46.csv\")\n",
+    "\n",
+    "data_fine_tuning = pd.read_csv(\"RunLog_Model_FineTune_2024-06-18-13-22-43.csv\")\n",
+    "\n",
+    "data_model = pd.read_csv(\"RunLog_Model_2024-06-18-13-27-50.csv\")"
+   ],
+   "id": "f64fc415ab307f7d",
+   "outputs": [],
+   "execution_count": 7
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-18T13:04:39.558893Z",
+     "start_time": "2024-06-18T13:04:39.548311Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "data_state_space_controller",
+   "id": "2d48576cf1434bf0",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "      idx   ball_position   ball_target      angle\n",
+       "0     109        0.000615      0.241154  88.099012\n",
+       "1     110        0.000809      0.241198  46.679006\n",
+       "2     111        0.022139      0.243308  46.820461\n",
+       "3     112        0.015005      0.242331   3.236380\n",
+       "4     113        0.011516      0.241988  -0.194204\n",
+       "..    ...             ...           ...        ...\n",
+       "981  1090        0.291940      0.241148  -0.283710\n",
+       "982  1091        0.291934      0.241210  -0.306506\n",
+       "983  1092        0.291973      0.241160  -0.377769\n",
+       "984  1093        0.291960      0.241285  -0.332947\n",
+       "985  1094        0.292012      0.241241  -0.405627\n",
+       "\n",
+       "[986 rows x 4 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>idx</th>\n",
+       "      <th>ball_position</th>\n",
+       "      <th>ball_target</th>\n",
+       "      <th>angle</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>109</td>\n",
+       "      <td>0.000615</td>\n",
+       "      <td>0.241154</td>\n",
+       "      <td>88.099012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>110</td>\n",
+       "      <td>0.000809</td>\n",
+       "      <td>0.241198</td>\n",
+       "      <td>46.679006</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>111</td>\n",
+       "      <td>0.022139</td>\n",
+       "      <td>0.243308</td>\n",
+       "      <td>46.820461</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>112</td>\n",
+       "      <td>0.015005</td>\n",
+       "      <td>0.242331</td>\n",
+       "      <td>3.236380</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>113</td>\n",
+       "      <td>0.011516</td>\n",
+       "      <td>0.241988</td>\n",
+       "      <td>-0.194204</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>981</th>\n",
+       "      <td>1090</td>\n",
+       "      <td>0.291940</td>\n",
+       "      <td>0.241148</td>\n",
+       "      <td>-0.283710</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>982</th>\n",
+       "      <td>1091</td>\n",
+       "      <td>0.291934</td>\n",
+       "      <td>0.241210</td>\n",
+       "      <td>-0.306506</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>983</th>\n",
+       "      <td>1092</td>\n",
+       "      <td>0.291973</td>\n",
+       "      <td>0.241160</td>\n",
+       "      <td>-0.377769</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>984</th>\n",
+       "      <td>1093</td>\n",
+       "      <td>0.291960</td>\n",
+       "      <td>0.241285</td>\n",
+       "      <td>-0.332947</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>985</th>\n",
+       "      <td>1094</td>\n",
+       "      <td>0.292012</td>\n",
+       "      <td>0.241241</td>\n",
+       "      <td>-0.405627</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>986 rows × 4 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 8
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-18T13:04:39.571249Z",
+     "start_time": "2024-06-18T13:04:39.560932Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "data_fine_tuning",
+   "id": "882f18697bd5d5b",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "      idx   ball_position   ball_target       angle\n",
+       "0     725       -0.000434      0.241048  105.562370\n",
+       "1     726       -0.000758      0.241055    6.232263\n",
+       "2     727        0.009969      0.242119    4.324735\n",
+       "3     728        0.034017      0.244397   -3.267518\n",
+       "4     729        0.003438      0.241229   -7.334972\n",
+       "..    ...             ...           ...         ...\n",
+       "707  1432        0.208759      0.241148    0.251702\n",
+       "708  1433        0.208752      0.241148    0.241532\n",
+       "709  1434        0.208771      0.241142    0.236939\n",
+       "710  1435        0.208752      0.241136    0.240235\n",
+       "711  1436        0.208732      0.241129    0.237969\n",
+       "\n",
+       "[712 rows x 4 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>idx</th>\n",
+       "      <th>ball_position</th>\n",
+       "      <th>ball_target</th>\n",
+       "      <th>angle</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>725</td>\n",
+       "      <td>-0.000434</td>\n",
+       "      <td>0.241048</td>\n",
+       "      <td>105.562370</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>726</td>\n",
+       "      <td>-0.000758</td>\n",
+       "      <td>0.241055</td>\n",
+       "      <td>6.232263</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>727</td>\n",
+       "      <td>0.009969</td>\n",
+       "      <td>0.242119</td>\n",
+       "      <td>4.324735</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>728</td>\n",
+       "      <td>0.034017</td>\n",
+       "      <td>0.244397</td>\n",
+       "      <td>-3.267518</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>729</td>\n",
+       "      <td>0.003438</td>\n",
+       "      <td>0.241229</td>\n",
+       "      <td>-7.334972</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>707</th>\n",
+       "      <td>1432</td>\n",
+       "      <td>0.208759</td>\n",
+       "      <td>0.241148</td>\n",
+       "      <td>0.251702</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>708</th>\n",
+       "      <td>1433</td>\n",
+       "      <td>0.208752</td>\n",
+       "      <td>0.241148</td>\n",
+       "      <td>0.241532</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>709</th>\n",
+       "      <td>1434</td>\n",
+       "      <td>0.208771</td>\n",
+       "      <td>0.241142</td>\n",
+       "      <td>0.236939</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>710</th>\n",
+       "      <td>1435</td>\n",
+       "      <td>0.208752</td>\n",
+       "      <td>0.241136</td>\n",
+       "      <td>0.240235</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>711</th>\n",
+       "      <td>1436</td>\n",
+       "      <td>0.208732</td>\n",
+       "      <td>0.241129</td>\n",
+       "      <td>0.237969</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>712 rows × 4 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 9
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-18T13:04:39.583499Z",
+     "start_time": "2024-06-18T13:04:39.572250Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "data_model",
+   "id": "f5402a2a5f9bd009",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "     idx   ball_position   ball_target     angle\n",
+       "0    273       -0.000492      0.241030  1.493593\n",
+       "1    274       -0.000583      0.241061  1.574569\n",
+       "2    275        0.021737      0.243283  1.571033\n",
+       "3    276        0.032198      0.244204  1.526294\n",
+       "4    277        0.019957      0.242853  1.503148\n",
+       "..   ...             ...           ...       ...\n",
+       "660  933        0.368281      0.241198  0.065720\n",
+       "661  934        0.368242      0.241179  0.065709\n",
+       "662  935        0.368242      0.241217  0.065690\n",
+       "663  936        0.368248      0.241192  0.065701\n",
+       "664  937        0.368210      0.241179  0.065887\n",
+       "\n",
+       "[665 rows x 4 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>idx</th>\n",
+       "      <th>ball_position</th>\n",
+       "      <th>ball_target</th>\n",
+       "      <th>angle</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>273</td>\n",
+       "      <td>-0.000492</td>\n",
+       "      <td>0.241030</td>\n",
+       "      <td>1.493593</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>274</td>\n",
+       "      <td>-0.000583</td>\n",
+       "      <td>0.241061</td>\n",
+       "      <td>1.574569</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>275</td>\n",
+       "      <td>0.021737</td>\n",
+       "      <td>0.243283</td>\n",
+       "      <td>1.571033</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>276</td>\n",
+       "      <td>0.032198</td>\n",
+       "      <td>0.244204</td>\n",
+       "      <td>1.526294</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>277</td>\n",
+       "      <td>0.019957</td>\n",
+       "      <td>0.242853</td>\n",
+       "      <td>1.503148</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>660</th>\n",
+       "      <td>933</td>\n",
+       "      <td>0.368281</td>\n",
+       "      <td>0.241198</td>\n",
+       "      <td>0.065720</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>661</th>\n",
+       "      <td>934</td>\n",
+       "      <td>0.368242</td>\n",
+       "      <td>0.241179</td>\n",
+       "      <td>0.065709</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>662</th>\n",
+       "      <td>935</td>\n",
+       "      <td>0.368242</td>\n",
+       "      <td>0.241217</td>\n",
+       "      <td>0.065690</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>663</th>\n",
+       "      <td>936</td>\n",
+       "      <td>0.368248</td>\n",
+       "      <td>0.241192</td>\n",
+       "      <td>0.065701</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>664</th>\n",
+       "      <td>937</td>\n",
+       "      <td>0.368210</td>\n",
+       "      <td>0.241179</td>\n",
+       "      <td>0.065887</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>665 rows × 4 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 10
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "740f6b660085f414"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:code id:initial_id tags:
+
+``` python
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+```
+
+%% Cell type:markdown id:883dd0fb6eae6601 tags:
+
+# Load data
+
+%% Cell type:code id:f64fc415ab307f7d tags:
+
+``` python
+data_state_space_controller = pd.read_csv("RunLog_Controller_2024-06-18-13-19-46.csv")
+
+data_fine_tuning = pd.read_csv("RunLog_Model_FineTune_2024-06-18-13-22-43.csv")
+
+data_model = pd.read_csv("RunLog_Model_2024-06-18-13-27-50.csv")
+```
+
+%% Cell type:code id:2d48576cf1434bf0 tags:
+
+``` python
+data_state_space_controller
+```
+
+%% Output
+
+          idx   ball_position   ball_target      angle
+    0     109        0.000615      0.241154  88.099012
+    1     110        0.000809      0.241198  46.679006
+    2     111        0.022139      0.243308  46.820461
+    3     112        0.015005      0.242331   3.236380
+    4     113        0.011516      0.241988  -0.194204
+    ..    ...             ...           ...        ...
+    981  1090        0.291940      0.241148  -0.283710
+    982  1091        0.291934      0.241210  -0.306506
+    983  1092        0.291973      0.241160  -0.377769
+    984  1093        0.291960      0.241285  -0.332947
+    985  1094        0.292012      0.241241  -0.405627
+    
+    [986 rows x 4 columns]
+
+%% Cell type:code id:882f18697bd5d5b tags:
+
+``` python
+data_fine_tuning
+```
+
+%% Output
+
+          idx   ball_position   ball_target       angle
+    0     725       -0.000434      0.241048  105.562370
+    1     726       -0.000758      0.241055    6.232263
+    2     727        0.009969      0.242119    4.324735
+    3     728        0.034017      0.244397   -3.267518
+    4     729        0.003438      0.241229   -7.334972
+    ..    ...             ...           ...         ...
+    707  1432        0.208759      0.241148    0.251702
+    708  1433        0.208752      0.241148    0.241532
+    709  1434        0.208771      0.241142    0.236939
+    710  1435        0.208752      0.241136    0.240235
+    711  1436        0.208732      0.241129    0.237969
+    
+    [712 rows x 4 columns]
+
+%% Cell type:code id:f5402a2a5f9bd009 tags:
+
+``` python
+data_model
+```
+
+%% Output
+
+         idx   ball_position   ball_target     angle
+    0    273       -0.000492      0.241030  1.493593
+    1    274       -0.000583      0.241061  1.574569
+    2    275        0.021737      0.243283  1.571033
+    3    276        0.032198      0.244204  1.526294
+    4    277        0.019957      0.242853  1.503148
+    ..   ...             ...           ...       ...
+    660  933        0.368281      0.241198  0.065720
+    661  934        0.368242      0.241179  0.065709
+    662  935        0.368242      0.241217  0.065690
+    663  936        0.368248      0.241192  0.065701
+    664  937        0.368210      0.241179  0.065887
+    
+    [665 rows x 4 columns]
+
+%% Cell type:code id:740f6b660085f414 tags:
+
+``` python
+```
--- a/Evaluation/RunLog_Controller_2024-06-18-13-19-46.csv
+++ b/Evaluation/RunLog_Controller_2024-06-18-13-19-46.csv
--- a/Evaluation/RunLog_Model_2024-06-18-13-26-31.csv
+++ b/Evaluation/RunLog_Model_2024-06-18-13-26-31.csv
--- a/Evaluation/RunLog_Model_2024-06-18-13-27-50.csv
+++ b/Evaluation/RunLog_Model_2024-06-18-13-27-50.csv
--- a/Evaluation/RunLog_Model_FineTune_2024-06-18-13-22-43.csv
+++ b/Evaluation/RunLog_Model_FineTune_2024-06-18-13-22-43.csv
--- a/PythonInterface/main.py
+++ b/PythonInterface/main.py
@@ -165,6 +165,7 @@ class MainWindow(QMainWindow):

                        if self.log_file:
                            self.log_file.write(f"{self.idx},{ball_position},{ball_target},{angle}\n")
+                            # idx, ball_position, ball_target, angle

                    self.local_data_handler.add_element("SimulatedPosition", simulated_position[0][0] * 100) # cm
                    self.local_data_handler.add_element("Angle", self.output_angle) # Degree

--- a/RF/Actor.py
+++ b/RF/Actor.py
@@ -18,9 +18,10 @@ class Actor(nn.Module):
    def init_weights(self):
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
-
-
-
+        weight_FC1 = self.fc1.weight.data
+        bias_FC1 = self.fc1.bias.data
+        self.fc1.weight.data = weight_FC1
+        self.fc1.bias.data = bias_FC1

    def forward(self, x):
            x = x.view(x.size(0), -1)

--- a/RF/DDPG.py
+++ b/RF/DDPG.py
@@ -77,6 +77,11 @@ class DDPGagent:
            target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau))


-    def save_models(self):
-        self.actor.save("actor.pt")
-        self.critic.save("critic.pt")
\ No newline at end of file
+    def save_models(self,isCheckpoint=False):
+
+        if isCheckpoint:
+            self.actor.save("actor_checkpoint.pt")
+            self.critic.save("critic_checkpoint.pt")
+        else:
+            self.actor.save("actor.pt")
+            self.critic.save("critic.pt")
\ No newline at end of file
--- a/RF/Enviroment.py
+++ b/RF/Enviroment.py
@@ -9,7 +9,7 @@ class Enviroment:
        self.plant = Plant(path="../Simulation/Plant.json")
        self.observer = Observer(path="../Simulation/Observer.json")

-        self.itteration = 0
+        self.iteration = 0
        self.internal_velocity_falloff = 0.0

        self.target_position = target_position
@@ -28,25 +28,39 @@ class Enviroment:
    def get_velocity_cutoff(self,update_falloff = True):
        if update_falloff:
            error = 0.55 - self.internal_velocity_falloff
-            self.internal_velocity_falloff += error * 0.02
+            self.internal_velocity_falloff += error * 0.08
        return 0.6 - self.internal_velocity_falloff

    def reset_state(self,x0):
+
+        start_angle = 0.0
+        random_start_position = 0.0
+        start_velocity = 0.0
+        self.target_position = 0.25
+
+        x0 = np.array([[start_velocity],[random_start_position],[start_angle]])
+
        self.plant.reset_state(x0)
        self.observer.reset_state(x0)
+        self.iteration = 0
+
+
+

        velocity = self.plant.X[0][0]
        position = self.plant.X[1][0]
        angle = self.plant.X[2][0]
        target_position = self.target_position
+        error = position - target_position
+

-        return np.array([[velocity],[position],[angle],[target_position]])
+        return np.array([[velocity],[position],[angle],[error]])

    def get_output(self,u,dt = 0.02):
-        position =  self.plant.get_output(u,dt,ignore_max_min=False)
+        position =  self.plant.get_output(u,dt,ignore_max_min=True)
        state = self.observer.get_estimate(u,position,dt)
        state_true = self.plant.X
-        return state
+        return state_true


    def step(self,action):
@@ -56,15 +70,16 @@ class Enviroment:
        position = output[1][0]
        angle = output[2][0]
        target_position = self.target_position
+        error = position - target_position

-        new_state = np.array([[velocity],[position],[angle],[target_position]])
+        new_state = np.array([[velocity],[position],[angle],[error]])

        reward = self.calculate_reward(new_state)
        done = self.check_done(new_state)

-        self.previous_state =  np.array([[velocity],[position],[angle],[target_position]])
+        self.previous_state =  np.array([[velocity],[position],[angle],[error]])

-        self.itteration += 1
+        self.iteration += 1
        return new_state,reward,done,{}

    def calculate_reward(self,new_state):
@@ -80,7 +95,7 @@ class Enviroment:

        did_move_closer = 1 if current_error_in_position < previous_error_in_position else -1

-        reward = did_move_closer * 1 - current_error_in_position * 0.0 - np.abs(current_angle)*0.0 - np.abs(current_velocity)*0.0
+        reward = did_move_closer * 1.0 - current_error_in_position * 1.0 - np.abs(current_angle)*0.0 - np.abs(current_velocity)*0.01 - self.iteration * 0.0

        return reward

@@ -91,7 +106,7 @@ class Enviroment:
        angle = new_state[2][0]
        target_position = new_state[3][0]

-        velocity_cutoff = self.get_velocity_cutoff(update_falloff=False)
+        velocity_cutoff = 0.01
        if np.abs(position - target_position) < 0.05 and np.abs(velocity) < velocity_cutoff:
            return True
        else:
@@ -107,7 +122,7 @@ if __name__ == '__main__':
    angle_list = []

    for i in range(400):
-        env_output = env.get_output(np.array([[5.0]]),dt=0.02)
+        env_output = env.get_output(np.array([[-5.0]]),dt=0.02)
        speed = env_output[0][0]
        position = env_output[1][0]
        angle = env_output[2][0]

--- a/RF/main.py
+++ b/RF/main.py
@@ -14,25 +14,42 @@ import mlflow
 from dotenv import load_dotenv
 import os

-ACTOR_LEARNING_RATE = 1e-3
-CRITIC_LEARNING_RATE = 1e-3
-GAMMA = 0.99
-TAU = 1e-2
+ACTOR_LEARNING_RATE = 1e-4
+CRITIC_LEARNING_RATE = 1e-2
+
+GAMMA = 0.99    # This is the discount factor used in the update rule for the Q-values in reinforcement learning.
+                # It determines how mutch importance we want to give to future rewards.
+                # A high value for gamma (close to 1) will make the agent consider future rewards significantly,
+                # whereas a low value makes the agent more focused on immediate rewards.
+
+TAU = 0.01      # This is used for the soft update of the target networks.
+                # In DDPG, there are two sets of networks - the actor-critic networks and the target networks.
+                # The target networks are copies of the actor-critic networks and are used to generate the target-Q
+                # values to compute the loss for updating the actor-critic networks. However, instead of copying the
+                # weights of the actor-critic networks to the target networks directly, a soft update strategy is used
+                # where the target networks are slowly updated towards the actor-critic networks.
+                # The parameter tau controls the rate of this update.
+                # A small tau means that the target networks are updated slowly,
+                # whereas a large tau means that the target networks are updated more quickly towards the actor-critic networks.
+
+
 MAX_MEMORY_SIZE = 50000
+
+BATCH_SIZE = 256
 DISCRIPTION_REWARD_FUNCTION = "reward = did_move_closer * 10 - current_error_in_position * 10 - abs(current_angle) - abs(current_velocity)*2"

 env = Enviroment()

 agent = DDPGagent(env, actor_learning_rate=ACTOR_LEARNING_RATE, critic_learning_rate=CRITIC_LEARNING_RATE, gamma=GAMMA, tau=TAU, max_memory_size=MAX_MEMORY_SIZE)
 noise = OUNoise(env.action_space)
-batch_size = 128
+batch_size = BATCH_SIZE
 rewards = []
 avg_rewards = []
 steps_list = []

 load_dotenv()
 mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))
-mlflow.set_experiment("DRF_DDPG")
+mlflow.set_experiment("DRF_DDPG_Sem")

 with mlflow.start_run():
    mlflow.log_param("ACTOR_LEARNING_RATE", ACTOR_LEARNING_RATE)
@@ -41,15 +58,16 @@ with mlflow.start_run():
    mlflow.log_param("TAU", TAU)
    mlflow.log_param("MAX_MEMORY_SIZE", MAX_MEMORY_SIZE)
    mlflow.log_param("DISCRIPTION_REWARD_FUNCTION", DISCRIPTION_REWARD_FUNCTION)
+    mlflow.log_param("BATCH_SIZE", BATCH_SIZE)

-    for episode in range(200):
+    for episode in range(1000):
        state = env.reset_state(x0=np.array([[0.0], [0.0], [0.0]]))
        noise.reset()
        episode_reward = 0
        velocity_cutoff = env.get_velocity_cutoff(update_falloff=True)
        mlflow.log_metric("velocity_cutoff", velocity_cutoff)

-        for step in tqdm(range(1000),total=1000,desc=f"Training Model Episode:{episode}"):
+        for step in tqdm(range(500),total=500,desc=f"Training Model Episode:{episode}"):
            action = agent.get_action(state)
            action = noise.get_action(action, step)[0]
            new_state, reward, done, _ = env.step(action)
@@ -64,13 +82,14 @@ with mlflow.start_run():
            if episode > 0:
                mean_reward = np.mean(rewards[-10:])
            else:
-                mean_reward = 0
+                mean_reward = episode_reward

            if done:
                print(f"Step: {step}, Episode: {episode}, episode reward: {episode_reward}, mean reward: {mean_reward}")
                steps_list.append(step)
                break

+        agent.save_models(isCheckpoint=True)
        rewards.append(episode_reward)
        avg_rewards.append(mean_reward)
        mlflow.log_metric("episode_reward", episode_reward)
@@ -124,9 +143,10 @@ with mlflow.start_run():
        state = torch.FloatTensor(state).unsqueeze(0)
        action = actor(state).detach().numpy()[0, 0]
        state, reward, done, _ = env.step(action)
-        speed = state[0][0]
-        position = state[1][0]
-        angle = state[2][0]
+        true_state = env.plant.X
+        speed = true_state[0][0]
+        position = true_state[1][0]
+        angle = true_state[2][0]

        speed_list.append(speed)
        position_list.append(position)