From d5751ab59aed744e41ad8af265da81089eb0c58b Mon Sep 17 00:00:00 2001
From: Quan Vuong <quan.vuong@nyu.edu>
Date: Sat, 11 May 2019 23:07:21 +0100
Subject: [PATCH] Set the seeds for both training and eval environments

---
 examples/development/main.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/examples/development/main.py b/examples/development/main.py
index 8510769d2..b70b0cefa 100644
--- a/examples/development/main.py
+++ b/examples/development/main.py
@@ -46,6 +46,14 @@ def _build(self):
             get_environment_from_params(environment_params['evaluation'])
             if 'evaluation' in environment_params
             else training_environment)
+        
+        seed = variant['run_params']['seed']
+        
+        training_environment.seed(seed)
+        
+        # Set a different seed for the evaluation env
+        # to ensure the policy is not just memorizing action sequences for seen initial states
+        evaluation_environment.seed(seed + 10)
 
         replay_pool = self.replay_pool = (
             get_replay_pool_from_variant(variant, training_environment))