Diff of /SAC/TR_Algorithm.py [000000] .. [9f010e]

Switch to side-by-side view

--- a
+++ b/SAC/TR_Algorithm.py
@@ -0,0 +1,117 @@
+import numpy as np 
+import matplotlib.pyplot as plt 
+import time
+
+## ------- Functions related to finite differencing -------------
+
+#Parameters for the optimization algorithm
+delta_step= 1e-4
+
+def gradient_fd(f, x):
+  num_variables = len(x)
+  out = [ ]
+
+  f_0 = f(x)
+  delta_x = delta_step
+
+  for i in range(num_variables):
+    x_copy = np.copy(x)
+    x_copy[i] = x_copy[i] + delta_x
+    f_delta = f(x_copy)
+    out.append( (f_delta - f_0) / delta_x )
+
+  return np.array(out)
+
+def hessian_fd(f, x):
+  num_variables = len(x)
+  out = [ ]
+
+  f_0 = gradient_fd(f, x)
+  delta_x = delta_step
+
+  for i in range(num_variables):
+    x_copy = np.copy(x)
+    x_copy[i] = x_copy[i] + delta_x
+    f_delta = gradient_fd(f, x_copy)
+    out.append( (f_delta - f_0) / delta_x )
+
+  return np.array(out)
+
+def fs(x):
+  return f_obj(x)
+
+def Dfs(x):
+  return gradient_fd(fs, x)
+
+def Hfs(x):
+  return hessian_fd(fs, x)
+
+
+#maps the intial state to the final state that minimizes the obj func
+def TR_Algorithm(obj_func, initial_state, env):
+
+  #Parameters for the optimization algorithm
+  tol= 5e-04
+  r0= 0.001
+  max_iters = 10000
+
+
+  global f_obj
+  f_obj = obj_func
+  
+  x0 = initial_state
+
+  iter_t = 0
+
+  #whether the optimization was successful or not
+  success = False
+  cum_loss = 0
+  while(1):
+
+      pks = -1*r0*Dfs(x0).T/np.linalg.norm(Dfs(x0).T)
+
+      if Dfs(x0)@Hfs(x0)@Dfs(x0).T <= 0:
+          tk = 1
+      else:
+          thresh = np.linalg.norm(Dfs(x0))**3 / (  r0*Dfs(x0)@Hfs(x0)@Dfs(x0).T )
+          tk = min(1, thresh)
+
+      pkc = tk*pks
+
+      x1 = x0 + pkc
+
+      #Advance the simulator by one step as well
+      env.set_state_musculo(x1)
+      env.render()
+
+      x0 = x1
+      r0 = r0
+
+      # if np.linalg.norm(Dfs(x0)) < tol and np.linalg.norm(fs(x0)) < tol:
+      if np.linalg.norm(fs(x0)) < tol:
+
+        #Return the final qpos and qvel
+
+        state_opt = env.sim.data.qpos.flat.copy()
+        state_opt_musculo = env.get_musculo_state()
+        final_loss = np.linalg.norm(fs(x0))
+        success = True
+
+        return state_opt, state_opt_musculo, final_loss, cum_loss, success
+
+      iter_t += 1
+      #add to the cumulative loss
+      cum_loss += np.linalg.norm(fs(x0))
+
+      if iter_t >= max_iters:
+        print('Max iters achieved for the IK algorithm')
+        print('Try the following: 1. Decrease tol (vars) in IK algorithm')
+        print('2. Change other parameters including r0, delta step')
+        print('3. Use MuJoCo to provide a better initial pose')
+        
+        state_opt = env.sim.data.qpos.flat.copy()
+        state_opt_musculo = env.get_musculo_state()
+        final_loss = np.linalg.norm(fs(x0))
+        success = False
+
+        return state_opt, state_opt_musculo, final_loss, cum_loss, success
\ No newline at end of file