14
14
class QuadXPoleWaypointsEnv (QuadXBaseEnv ):
15
15
"""QuadX Pole Waypoints Environment.
16
16
17
- Actions are vp, vq, vr, T, ie: angular rates and thrust .
17
+ Actions are direct motor PWM commands because any underlying controller introduces too much control latency .
18
18
The target is to get to a set of `[x, y, z]` waypoints in space without dropping the pole.
19
19
20
20
Args:
@@ -37,11 +37,11 @@ def __init__(
37
37
sparse_reward : bool = False ,
38
38
num_targets : int = 4 ,
39
39
goal_reach_distance : float = 0.2 ,
40
- flight_mode : int = 0 ,
40
+ flight_mode : int = - 1 ,
41
41
flight_dome_size : float = 10.0 ,
42
- max_duration_seconds : float = 60 .0 ,
42
+ max_duration_seconds : float = 20 .0 ,
43
43
angle_representation : Literal ["euler" , "quaternion" ] = "quaternion" ,
44
- agent_hz : int = 30 ,
44
+ agent_hz : int = 40 ,
45
45
render_mode : None | Literal ["human" , "rgb_array" ] = None ,
46
46
render_resolution : tuple [int , int ] = (480 , 480 ),
47
47
):
@@ -57,12 +57,11 @@ def __init__(
57
57
max_duration_seconds (float): maximum simulation time of the environment.
58
58
angle_representation (Literal["euler", "quaternion"]): can be "euler" or "quaternion".
59
59
agent_hz (int): looprate of the agent to environment interaction.
60
- render_mode (None | Literal["human", "rgb_array"]): render_mode
60
+ render_mode (None | Literal["human", "rgb_array"]): render_mode.
61
61
render_resolution (tuple[int, int]): render_resolution.
62
62
63
63
"""
64
64
super ().__init__ (
65
- start_pos = np .array ([[0.0 , 0.0 , 1.0 ]]),
66
65
flight_mode = flight_mode ,
67
66
flight_dome_size = flight_dome_size ,
68
67
max_duration_seconds = max_duration_seconds ,
@@ -126,7 +125,12 @@ def reset(
126
125
127
126
"""
128
127
super ().begin_reset (
129
- seed , options , drone_options = {"drone_model" : "primitive_drone" }
128
+ seed ,
129
+ options ,
130
+ drone_options = {
131
+ "drone_model" : "primitive_drone" ,
132
+ "camera_position_offset" : np .array ([- 3.0 , 0.0 , 1.0 ]),
133
+ },
130
134
)
131
135
132
136
# spawn in a pole
@@ -135,7 +139,6 @@ def reset(
135
139
# init some other metadata
136
140
self .waypoints .reset (self .env , self .np_random )
137
141
self .info ["num_targets_reached" ] = 0
138
- self .distance_to_immediate = np .inf
139
142
140
143
super ().end_reset ()
141
144
@@ -162,10 +165,10 @@ def compute_state(self) -> None:
162
165
"""
163
166
# compute attitude of self
164
167
ang_vel , ang_pos , lin_vel , lin_pos , quaternion = super ().compute_attitude ()
168
+ aux_state = super ().compute_auxiliary ()
165
169
rotation = (
166
170
np .array (self .env .getMatrixFromQuaternion (quaternion )).reshape (3 , 3 ).T
167
171
)
168
- aux_state = super ().compute_auxiliary ()
169
172
170
173
# compute the pole's states
171
174
(
@@ -210,36 +213,34 @@ def compute_state(self) -> None:
210
213
pole_bot_pos ,
211
214
pole_top_vel ,
212
215
pole_bot_vel ,
213
- ]
216
+ ],
217
+ axis = - 1 ,
214
218
)
215
219
216
- new_state ["target_deltas" ] = self .waypoints .distance_to_target (
220
+ new_state ["target_deltas" ] = self .waypoints .distance_to_targets (
217
221
ang_pos , lin_pos , quaternion
218
222
)
219
- self .distance_to_immediate = float (
220
- np .linalg .norm (new_state ["target_deltas" ][0 ])
221
- )
222
223
223
224
self .state : dict [Literal ["attitude" , "target_deltas" ], np .ndarray ] = new_state
224
225
225
226
def compute_term_trunc_reward (self ) -> None :
226
- """Computes the termination, trunction , and reward of the current timestep."""
227
+ """Computes the termination, truncation , and reward of the current timestep."""
227
228
super ().compute_base_term_trunc_reward ()
228
229
229
230
# bonus reward if we are not sparse
230
231
if not self .sparse_reward :
231
- self .reward += max (3 .0 * self .waypoints .progress_to_target () , 0.0 )
232
- self .reward += 0.1 / self .distance_to_immediate
233
- self .reward -= self .pole .leaningness
232
+ self .reward += max (15 .0 * self .waypoints .progress_to_next_target , 0.0 )
233
+ self .reward += 0.5 / self .waypoints . distance_to_next_target
234
+ self .reward += ( 0.5 - self .pole .leaningness )
234
235
235
236
# target reached
236
- if self .waypoints .target_reached () :
237
- self .reward = 100 .0
237
+ if self .waypoints .target_reached :
238
+ self .reward = 300 .0
238
239
239
240
# advance the targets
240
241
self .waypoints .advance_targets ()
241
242
242
243
# update infos and dones
243
- self .truncation |= self .waypoints .all_targets_reached ()
244
- self .info ["env_complete" ] = self .waypoints .all_targets_reached ()
245
- self .info ["num_targets_reached" ] = self .waypoints .num_targets_reached ()
244
+ self .truncation |= self .waypoints .all_targets_reached
245
+ self .info ["env_complete" ] = self .waypoints .all_targets_reached
246
+ self .info ["num_targets_reached" ] = self .waypoints .num_targets_reached
0 commit comments