muSim / Git / Diff of /SAC/reward_function

Models:
JoelW/
muSim
Downloads: 1
Diff of /SAC/reward_function_specs.py [000000] .. [9f010e]
Switch to side-by-side view

--- a
+++ b/SAC/reward_function_specs.py
@@ -0,0 +1,51 @@
+import numpy as np
+from . import kinematics_preprocessing_specs
+
+#Reward function is a function of the sensory_feedback (do indexing to get desired state features), 
+#current simulator_state(also to avoid indexing the sensory_feedback in case of no delays) and current action a_t
+
+musculo_tracking = kinematics_preprocessing_specs.musculo_tracking
+
+#Whether or not to implement the minimum muscle effor constraint
+min_muscle_constraint = False
+#Specify the scaler to weight the muscle effort
+muscle_cost_scaler = 1/50
+
+#Reward Scaling Factor exponentially scales the distance between the body/end-effector and the target it has to track
+#For a smaller threshold, use a higher reward scaling factor
+reward_scaling_factor = 1000
+
+#Threshold crossing penalty is imposed if any of the body/end-effector xyz pos goes outside the thresholding region
+threshold_crossing_penalty = -5
+
+def reward_function(state_td, sim_state, action_t, threshold):
+	
+        xyz_coord_dists = []
+
+        for musculo_body_tracking in musculo_tracking:
+        	musculo_body = sim_state.data.get_body_xpos(musculo_body_tracking[0]).flat.copy()
+        	musculo_target = sim_state.data.get_body_xpos(musculo_body_tracking[1]).flat.copy()
+
+        	current_dists = np.abs(musculo_body - musculo_target)
+        	xyz_coord_dists = [*xyz_coord_dists, *current_dists]
+
+        	#If any body goes out of the movement thresholding region return a very high penalty
+        	if (np.array(current_dists) > threshold).any():
+        		return threshold_crossing_penalty
+
+        xyz_coord_dists = np.array(xyz_coord_dists)
+        #Implement the exponential reward scaling 
+        reward_exp = 1/(reward_scaling_factor**xyz_coord_dists)
+        reward = np.sum(reward_exp)
+
+
+        if min_muscle_constraint:
+            muscle_cost = muscle_effort_cost(action_t)
+            reward = reward - muscle_cost
+
+        return reward
+
+def muscle_effort_cost(action_t):
+
+        cost= muscle_cost_scaler * np.sum(np.abs(action_t))
+        return cost
\ No newline at end of file