--- a +++ b/baselines/ppo2/run_mujoco.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +import argparse +from baselines.common.cmd_util import mujoco_arg_parser +from baselines import bench, logger + +def train(env_id, num_timesteps, seed): + from baselines.common import set_global_seeds + from baselines.common.vec_env.vec_normalize import VecNormalize + from baselines.ppo2 import ppo2 + from baselines.ppo2.policies import MlpPolicy + import gym + import tensorflow as tf + from baselines.common.vec_env.dummy_vec_env import DummyVecEnv + ncpu = 1 + config = tf.ConfigProto(allow_soft_placement=True, + intra_op_parallelism_threads=ncpu, + inter_op_parallelism_threads=ncpu) + tf.Session(config=config).__enter__() + def make_env(): + env = gym.make(env_id) + env = bench.Monitor(env, logger.get_dir()) + return env + env = DummyVecEnv([make_env]) + env = VecNormalize(env) + + set_global_seeds(seed) + policy = MlpPolicy + ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32, + lam=0.95, gamma=0.99, noptepochs=10, log_interval=1, + ent_coef=0.0, + lr=3e-4, + cliprange=0.2, + total_timesteps=num_timesteps) + + +def main(): + args = mujoco_arg_parser().parse_args() + logger.configure() + train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) + + +if __name__ == '__main__': + main()