When I run the following code ,
agent = Agent.create( agent=dict(type='trpo'), environment=setup_env(recording=False), batch_size=1, learning_rate=1e-3, memory=50000, max_episode_timesteps=timesteps, exploration=dict( type='decaying', unit='timesteps', decay='exponential', initial_value=0.2, decay_steps=50000, decay_rate=0.5, num_steps=timesteps ), parallel_interactions=num_parallel, )
it reports an error :
`ValueError Traceback (most recent call last)
Cell In[7], line 1
----> 1 agent = Agent.create(
2 agent=dict(type='trpo'),
3 environment=setup_env(recording=False),
4 batch_size=1,
5 learning_rate=1e-3,
6 memory=50000,
7 max_episode_timesteps=timesteps,
8 exploration=dict(
9 type='decaying', unit='timesteps', decay='exponential',
10 initial_value=0.2, decay_steps=50000, decay_rate=0.5,
11 num_steps=timesteps
12 ),
13 parallel_interactions=num_parallel,
14 )
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:97, in Agent.create(agent, environment, **kwargs)
94 kwargs = dict(agent)
95 agent = kwargs.pop('agent', kwargs.pop('type', 'default'))
---> 97 return Agent.create(agent=agent, environment=environment, **kwargs)
99 elif isinstance(agent, str):
100 if os.path.isfile(agent):
101 # JSON file specification
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:116, in Agent.create(agent, environment, **kwargs)
113 elif agent in tensorforce.agents.agents:
114 # Keyword specification
115 agent = tensorforce.agents.agents[agent]
--> 116 return Agent.create(agent=agent, environment=environment, **kwargs)
118 else:
119 raise TensorforceError.value(name='Agent.create', argument='agent', dtype=agent)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:89, in Agent.create(agent, environment, **kwargs)
87 agent = agent(**kwargs)
88 assert isinstance(agent, Agent)
---> 89 return Agent.create(agent=agent, environment=environment)
91 elif isinstance(agent, dict):
92 # Dictionary specification
93 agent.update(kwargs)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:66, in Agent.create(agent, environment, **kwargs)
64 agent.reset()
65 else:
---> 66 agent.initialize()
68 return agent
70 elif isinstance(agent, type) and issubclass(agent, Agent):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:357, in Agent.initialize(self)
354 if not hasattr(self, 'model'):
355 raise TensorforceError(message="Missing agent attribute model.")
--> 357 self.model.initialize()
359 self.internals_spec = self.model.internals_spec
360 self.auxiliaries_spec = self.model.auxiliaries_spec
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/model.py:287, in Model.initialize(self)
284 # This is unreachable?
285 quit()
--> 287 super().initialize()
289 # If we are a global model -> return here.
290 # Saving, syncing, finalizing graph, session is done by local replica model.
291 # if self.execution_spec is not None and self.execution_type == "distributed" and not self.is_local_model:
(...)
295 # Creates the tf.compat.v1.train.Saver object and stores it in self.saver.
296 # if self.execution_spec is None or self.execution_type == "single":
297 if True:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:546, in Module.initialize(self)
543 if not callable(api_function):
544 raise TensorforceError.unexpected()
--> 546 function = self.create_api_function(
547 name='{}.{}'.format(self.name, function_name), api_function=api_function
548 )
550 setattr(self, function_name, function)
552 if self.summarizer_spec is not None:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:597, in Module.create_api_function(self, name, api_function)
594 Module.scope_stack.append(scope)
595 scope.enter()
--> 597 results = api_function()
598 assert all(x.name.endswith('-output:0') for x in util.flatten(xs=results))
599 self.output_tensors[name[name.index('.') + 1:]] = [
600 x.name[len(name) + 1: -9] for x in util.flatten(xs=results)
601 ]
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/model.py:1404, in Model.api_observe(self)
1398 actions[name] = self.actions_buffer[name][parallel[0], :buffer_index[0]]
1400 reward = self.add_summary(
1401 label=('raw-reward', 'rewards'), name='raw-reward', tensor=reward
1402 )
-> 1404 is_updated = self.core_observe(
1405 states=states, internals=internals, auxiliaries=auxiliaries, actions=actions,
1406 terminal=terminal, reward=reward
1407 )
1409 # Reset buffer index
1410 with tf.control_dependencies(control_inputs=(is_updated,)):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:586, in TensorforceModel.tf_core_observe(self, states, internals, auxiliaries, actions, terminal, reward)
583 with tf.control_dependencies(control_inputs=(assignment,)):
584 return self.core_update()
--> 586 is_updated = self.cond(
587 pred=is_frequency, true_fn=perform_update, false_fn=util.no_operation
588 )
590 return is_updated
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:687, in Module.cond(self, pred, true_fn, false_fn)
684 return result
686 Module.cond_counter += 1
--> 687 x = tf.cond(pred=pred, true_fn=true_fn_wrapper, false_fn=false_fn_wrapper)
688 Module.cond_counter -= 1
689 return x
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:1392, in cond_for_tf_v2(pred, true_fn, false_fn, name)
1321 @tf_export("cond", v1=[])
1322 def cond_for_tf_v2(pred, true_fn=None, false_fn=None, name=None):
1323 """Return true_fn() if the predicate pred is true else false_fn().
1324
1325 true_fn and false_fn both return lists of output tensors. true_fn and
(...)
1390
1391 """
-> 1392 return cond(pred, true_fn=true_fn, false_fn=false_fn, strict=True, name=name)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:507, in deprecated_args..deprecated_wrapper..new_func(*args, **kwargs)
499 _PRINTED_WARNING[(func, arg_name)] = True
500 logging.warning(
501 'From %s: calling %s (from %s) with %s is deprecated and will '
502 'be removed %s.\nInstructions for updating:\n%s',
(...)
505 'in a future version' if date is None else ('after %s' % date),
506 instructions)
--> 507 return func(*args, **kwargs)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:1227, in cond(pred, true_fn, false_fn, strict, name, fn1, fn2)
1225 try:
1226 context_t.Enter()
-> 1227 orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
1228 if orig_res_t is None:
1229 raise ValueError("true_fn must have a return value.")
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:1064, in CondContext.BuildCondBranch(self, fn)
1062 """Add the subgraph defined by fn() to the graph."""
1063 pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
-> 1064 original_result = fn()
1065 post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
1066 if len(post_summaries) > len(pre_summaries):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:673, in Module.cond..true_fn_wrapper()
671 for scope in Module.scope_stack:
672 scope.enter()
--> 673 result = true_fn()
674 for scope in reversed(Module.scope_stack):
675 scope.exit(None, None, None)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:584, in TensorforceModel.tf_core_observe..perform_update()
582 assignment = self.last_update.assign(value=unit, read_value=False)
583 with tf.control_dependencies(control_inputs=(assignment,)):
--> 584 return self.core_update()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:664, in TensorforceModel.tf_core_update(self)
661 indices = self.memory.retrieve_episodes(n=batch_size)
663 # Optimization
--> 664 optimized = self.optimize(indices=indices)
666 # Increment update
667 with tf.control_dependencies(control_inputs=(optimized,)):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:778, in TensorforceModel.tf_optimize(self, indices)
776 # Optimization
777 with tf.control_dependencies(control_inputs=dependencies):
--> 778 optimized = self.optimizer.minimize(
779 variables=variables, arguments=arguments, fn_loss=fn_loss,
780 fn_kl_divergence=fn_kl_divergence, global_variables=global_variables, **kwargs
781 )
783 with tf.control_dependencies(control_inputs=(optimized,)):
784 # Loss summaries
785 if self.is_summary_logged(label=('loss', 'objective-loss', 'losses')):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/optimizer.py:54, in Optimizer.tf_minimize(self, variables, **kwargs)
51 if any(variable.dtype != util.tf_dtype(dtype='float') for variable in variables):
52 raise TensorforceError.unexpected()
---> 54 deltas = self.step(variables=variables, **kwargs)
56 update_norm = tf.linalg.global_norm(t_list=deltas)
57 deltas = self.add_summary(
58 label='update-norm', name='update-norm', tensor=update_norm, pass_tensors=deltas
59 )
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/optimizing_step.py:73, in OptimizingStep.tf_step(self, variables, arguments, fn_loss, fn_reference, **kwargs)
70 loss_before = -fn_loss(**augmented_arguments)
72 with tf.control_dependencies(control_inputs=(loss_before,)):
---> 73 deltas = self.optimizer.step(
74 variables=variables, arguments=arguments, fn_loss=fn_loss, # no reference here?
75 return_estimated_improvement=True, **kwargs
76 )
78 if isinstance(deltas, tuple):
79 # If 'return_estimated_improvement' argument exists.
80 if len(deltas) != 2:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/natural_gradient.py:109, in NaturalGradient.tf_step(self, variables, arguments, fn_loss, fn_kl_divergence, return_estimated_improvement, **kwargs)
104 loss_gradients = tf.gradients(ys=loss, xs=variables)
106 # Solve the following system for delta' via the conjugate gradient solver.
107 # [delta' * F] * delta' = -grad(loss)
108 # --> delta' (= lambda * delta)
--> 109 deltas = self.solver.solve(
110 fn_x=fisher_matrix_product, x_init=None, b=[-grad for grad in loss_gradients]
111 )
113 # delta' * F
114 delta_fisher_matrix_product = fisher_matrix_product(deltas=deltas)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/solvers/conjugate_gradient.py:83, in ConjugateGradient.tf_solve(self, fn_x, x_init, b)
71 def tf_solve(self, fn_x, x_init, b):
72 """
73 Iteratively solves the system of linear equations $A x = b$.
74
(...)
81 A solution $x$ to the problem as given by the solver.
82 """
---> 83 return super().tf_solve(fn_x, x_init, b)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/solvers/iterative.py:77, in Iterative.tf_solve(self, fn_x, x_init, *args)
74 else:
75 # TensorFlow while loop
76 max_iterations = self.max_iterations.value()
---> 77 args = self.while_loop(
78 cond=self.next_step, body=self.step, loop_vars=args, back_prop=False,
79 maximum_iterations=max_iterations
80 )
82 solution = self.end(*args)
84 return solution
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:698, in Module.while_loop(self, cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, maximum_iterations)
696 if maximum_iterations is not None and maximum_iterations.dtype is not tf.int32:
697 maximum_iterations = tf.dtypes.cast(x=maximum_iterations, dtype=tf.int32)
--> 698 x = tf.while_loop(
699 cond=cond, body=body, loop_vars=loop_vars, shape_invariants=shape_invariants,
700 parallel_iterations=parallel_iterations, back_prop=back_prop,
701 swap_memory=swap_memory, maximum_iterations=maximum_iterations
702 )
703 Module.while_counter -= 1
704 return x
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574, in deprecated_arg_values..deprecated_wrapper..new_func(*args, **kwargs)
567 _PRINTED_WARNING[(func, arg_name)] = True
568 logging.warning(
569 'From %s: calling %s (from %s) with %s=%s is deprecated and '
570 'will be removed %s.\nInstructions for updating:\n%s',
571 _call_location(), decorator_utils.get_qualified_name(func),
572 func.module, arg_name, arg_value, 'in a future version'
573 if date is None else ('after %s' % date), instructions)
--> 574 return func(*args, **kwargs)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2481, in while_loop_v2(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, maximum_iterations, name)
2305 @tf_export("while_loop", v1=[])
2306 @deprecation.deprecated_arg_values(
2307 None,
(...)
2322 maximum_iterations=None,
2323 name=None):
2324 """Repeat body while the condition cond is true.
2325
2326 cond is a callable returning a boolean scalar tensor. body is a callable
(...)
2479
2480 """
-> 2481 return while_loop(
2482 cond=cond,
2483 body=body,
2484 loop_vars=loop_vars,
2485 shape_invariants=shape_invariants,
2486 parallel_iterations=parallel_iterations,
2487 back_prop=back_prop,
2488 swap_memory=swap_memory,
2489 name=name,
2490 maximum_iterations=maximum_iterations,
2491 return_same_structure=True)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2765, in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations, return_same_structure)
2763 if loop_context.outer_context is None:
2764 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 2765 result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants,
2766 return_same_structure)
2767 if maximum_iterations is not None:
2768 return result[1]
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2247, in WhileContext.BuildLoop(self, pred, body, loop_vars, shape_invariants, return_same_structure)
2243 # _BuildLoop calls _update_input in several places. _mutation_lock()
2244 # ensures a Session.run call cannot occur between creating and mutating
2245 # new ops.
2246 with ops.get_default_graph()._mutation_lock(): # pylint: disable=protected-access
-> 2247 original_body_result, exit_vars = self._BuildLoop(
2248 pred, body, original_loop_vars, loop_vars, shape_invariants)
2249 finally:
2250 self.Exit()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2173, in WhileContext._BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2168 packed_vars_for_body = nest.pack_sequence_as(
2169 structure=original_loop_vars,
2170 flat_sequence=vars_for_body_with_tensor_arrays,
2171 expand_composites=True)
2172 pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
-> 2173 body_result = body(*packed_vars_for_body)
2174 post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
2175 if not nest.is_sequence_or_composite(body_result):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2718, in while_loop..(i, lv)
2715 loop_vars = (counter, loop_vars)
2716 cond = lambda i, lv: ( # pylint: disable=g-long-lambda
2717 math_ops.logical_and(i < maximum_iterations, orig_cond(*lv)))
-> 2718 body = lambda i, lv: (i + 1, orig_body(*lv))
2719 try_to_pack = False
2721 if executing_eagerly:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/solvers/conjugate_gradient.py:100, in ConjugateGradient.tf_step(self, x, conjugate, residual, squared_residual)
86 """
87 Iteration loop body of the conjugate gradient algorithm.
88
(...)
96 Updated arguments for next iteration.
97 """
99 # Ac := A * c_t
--> 100 A_conjugate = self.fn_x(conjugate)
102 # TODO: reference?
103 damping = self.damping.value()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/natural_gradient.py:92, in NaturalGradient.tf_step..fisher_matrix_product(deltas)
86 delta_kldiv_grads = tf.add_n(inputs=[
87 tf.reduce_sum(input_tensor=(delta * grad))
88 for delta, grad in zip(deltas, kldiv_grads)
89 ])
91 # [delta' * F] = grad(delta' * grad(kldiv))
---> 92 delta_kldiv_grads2 = tf.gradients(ys=delta_kldiv_grads, xs=variables)
93 assert sum(grad is None for grad in delta_kldiv_grads2) == num_grad_none
94 return [
95 tf.zeros_like(input=var) if grad is None else tf.convert_to_tensor(value=grad)
96 for grad, var in zip(delta_kldiv_grads2, variables)
97 ]
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/gradients_impl.py:299, in gradients_v2(ys, xs, grad_ys, name, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients)
294 # Creating the gradient graph for control flow mutates Operations.
295 # _mutation_lock ensures a Session.run call cannot occur between creating and
296 # mutating new ops.
297 # pylint: disable=protected-access
298 with ops.get_default_graph()._mutation_lock():
--> 299 return gradients_util._GradientsHelper(
300 ys, xs, grad_ys, name, True, gate_gradients,
301 aggregation_method, stop_gradients,
302 unconnected_gradients)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/gradients_util.py:638, in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph)
627 # NOTE(skyewm): We don't support computing gradients wrt a loop variable
628 # unless it's within the context of a single iteration (i.e. the
629 # gradient is wrt to the loop parameter in the body function, not wrt or
630 # through the initial value). This means if we're in a while loop
631 # context, we should never see a switch node from this context.
632 # pylint: disable=protected-access
633 if (control_flow_util.IsSwitch(op) and
634 op._control_flow_context is not None and
635 op._control_flow_context.IsWhileContext() and
636 op._control_flow_context ==
637 ops.get_default_graph()._get_control_flow_context()):
--> 638 _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs_set)
639 # pylint: enable=protected-access
641 if (grad_fn or is_func_call) and has_out_grads:
642 # NOTE: If _AggregatedGrads didn't compute a value for the i'th
643 # output, it means that the cost does not depend on output[i],
644 # therefore dC/doutput[i] is 0.
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/gradients_util.py:374, in _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs_set)
372 queue.extend(t.op for t in _NonEagerInputs(curr_op, xs_set))
373 assert target_op
--> 374 raise ValueError(
375 "Cannot compute gradient inside while loop with respect to op '%s'. "
376 "We do not support taking the gradient wrt or through the initial value "
377 "of a loop variable. Gradients can be computed through loop invariants "
378 "or wrt the input parameters to the loop body."
379 % target_op.name)
ValueError: Cannot compute gradient inside while loop with respect to op 'agent/policy/policy-network/TS-dense0/bias'. We do not support taking the gradient wrt or through the initial value of a loop variable. Gradients can be computed through loop invariants or wrt the input parameters to the loop body.`
or
AssertionError: Do not use tf.reset_default_graph() to clear nested graphs. If you need a cleared graph, exit the nesting and create a new graph.
I have python version 3.8, tensorflow version 2.2.0 and tensorforce version 0.5.5.
I would be grateful if someone could offer some advice and help.
When I run the following code ,
agent = Agent.create( agent=dict(type='trpo'), environment=setup_env(recording=False), batch_size=1, learning_rate=1e-3, memory=50000, max_episode_timesteps=timesteps, exploration=dict( type='decaying', unit='timesteps', decay='exponential', initial_value=0.2, decay_steps=50000, decay_rate=0.5, num_steps=timesteps ), parallel_interactions=num_parallel, )it reports an error :
`ValueError Traceback (most recent call last)
Cell In[7], line 1
----> 1 agent = Agent.create(
2 agent=dict(type='trpo'),
3 environment=setup_env(recording=False),
4 batch_size=1,
5 learning_rate=1e-3,
6 memory=50000,
7 max_episode_timesteps=timesteps,
8 exploration=dict(
9 type='decaying', unit='timesteps', decay='exponential',
10 initial_value=0.2, decay_steps=50000, decay_rate=0.5,
11 num_steps=timesteps
12 ),
13 parallel_interactions=num_parallel,
14 )
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:97, in Agent.create(agent, environment, **kwargs)
94 kwargs = dict(agent)
95 agent = kwargs.pop('agent', kwargs.pop('type', 'default'))
---> 97 return Agent.create(agent=agent, environment=environment, **kwargs)
99 elif isinstance(agent, str):
100 if os.path.isfile(agent):
101 # JSON file specification
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:116, in Agent.create(agent, environment, **kwargs)
113 elif agent in tensorforce.agents.agents:
114 # Keyword specification
115 agent = tensorforce.agents.agents[agent]
--> 116 return Agent.create(agent=agent, environment=environment, **kwargs)
118 else:
119 raise TensorforceError.value(name='Agent.create', argument='agent', dtype=agent)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:89, in Agent.create(agent, environment, **kwargs)
87 agent = agent(**kwargs)
88 assert isinstance(agent, Agent)
---> 89 return Agent.create(agent=agent, environment=environment)
91 elif isinstance(agent, dict):
92 # Dictionary specification
93 agent.update(kwargs)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:66, in Agent.create(agent, environment, **kwargs)
64 agent.reset()
65 else:
---> 66 agent.initialize()
68 return agent
70 elif isinstance(agent, type) and issubclass(agent, Agent):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/agents/agent.py:357, in Agent.initialize(self)
354 if not hasattr(self, 'model'):
355 raise TensorforceError(message="Missing agent attribute model.")
--> 357 self.model.initialize()
359 self.internals_spec = self.model.internals_spec
360 self.auxiliaries_spec = self.model.auxiliaries_spec
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/model.py:287, in Model.initialize(self)
284 # This is unreachable?
285 quit()
--> 287 super().initialize()
289 # If we are a global model -> return here.
290 # Saving, syncing, finalizing graph, session is done by local replica model.
291 # if self.execution_spec is not None and self.execution_type == "distributed" and not self.is_local_model:
(...)
295 # Creates the tf.compat.v1.train.Saver object and stores it in self.saver.
296 # if self.execution_spec is None or self.execution_type == "single":
297 if True:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:546, in Module.initialize(self)
543 if not callable(api_function):
544 raise TensorforceError.unexpected()
--> 546 function = self.create_api_function(
547 name='{}.{}'.format(self.name, function_name), api_function=api_function
548 )
550 setattr(self, function_name, function)
552 if self.summarizer_spec is not None:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:597, in Module.create_api_function(self, name, api_function)
594 Module.scope_stack.append(scope)
595 scope.enter()
--> 597 results = api_function()
598 assert all(x.name.endswith('-output:0') for x in util.flatten(xs=results))
599 self.output_tensors[name[name.index('.') + 1:]] = [
600 x.name[len(name) + 1: -9] for x in util.flatten(xs=results)
601 ]
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/model.py:1404, in Model.api_observe(self)
1398 actions[name] = self.actions_buffer[name][parallel[0], :buffer_index[0]]
1400 reward = self.add_summary(
1401 label=('raw-reward', 'rewards'), name='raw-reward', tensor=reward
1402 )
-> 1404 is_updated = self.core_observe(
1405 states=states, internals=internals, auxiliaries=auxiliaries, actions=actions,
1406 terminal=terminal, reward=reward
1407 )
1409 # Reset buffer index
1410 with tf.control_dependencies(control_inputs=(is_updated,)):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:586, in TensorforceModel.tf_core_observe(self, states, internals, auxiliaries, actions, terminal, reward)
583 with tf.control_dependencies(control_inputs=(assignment,)):
584 return self.core_update()
--> 586 is_updated = self.cond(
587 pred=is_frequency, true_fn=perform_update, false_fn=util.no_operation
588 )
590 return is_updated
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:687, in Module.cond(self, pred, true_fn, false_fn)
684 return result
686 Module.cond_counter += 1
--> 687 x = tf.cond(pred=pred, true_fn=true_fn_wrapper, false_fn=false_fn_wrapper)
688 Module.cond_counter -= 1
689 return x
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:1392, in cond_for_tf_v2(pred, true_fn, false_fn, name)
1321 @tf_export("cond", v1=[])
1322 def cond_for_tf_v2(pred, true_fn=None, false_fn=None, name=None):
1323 """Return
true_fn()if the predicatepredis true elsefalse_fn().1324
1325
true_fnandfalse_fnboth return lists of output tensors.true_fnand(...)
1390
1391 """
-> 1392 return cond(pred, true_fn=true_fn, false_fn=false_fn, strict=True, name=name)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:507, in deprecated_args..deprecated_wrapper..new_func(*args, **kwargs)
499 _PRINTED_WARNING[(func, arg_name)] = True
500 logging.warning(
501 'From %s: calling %s (from %s) with %s is deprecated and will '
502 'be removed %s.\nInstructions for updating:\n%s',
(...)
505 'in a future version' if date is None else ('after %s' % date),
506 instructions)
--> 507 return func(*args, **kwargs)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:1227, in cond(pred, true_fn, false_fn, strict, name, fn1, fn2)
1225 try:
1226 context_t.Enter()
-> 1227 orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
1228 if orig_res_t is None:
1229 raise ValueError("true_fn must have a return value.")
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:1064, in CondContext.BuildCondBranch(self, fn)
1062 """Add the subgraph defined by fn() to the graph."""
1063 pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
-> 1064 original_result = fn()
1065 post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
1066 if len(post_summaries) > len(pre_summaries):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:673, in Module.cond..true_fn_wrapper()
671 for scope in Module.scope_stack:
672 scope.enter()
--> 673 result = true_fn()
674 for scope in reversed(Module.scope_stack):
675 scope.exit(None, None, None)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:584, in TensorforceModel.tf_core_observe..perform_update()
582 assignment = self.last_update.assign(value=unit, read_value=False)
583 with tf.control_dependencies(control_inputs=(assignment,)):
--> 584 return self.core_update()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:664, in TensorforceModel.tf_core_update(self)
661 indices = self.memory.retrieve_episodes(n=batch_size)
663 # Optimization
--> 664 optimized = self.optimize(indices=indices)
666 # Increment update
667 with tf.control_dependencies(control_inputs=(optimized,)):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/models/tensorforce.py:778, in TensorforceModel.tf_optimize(self, indices)
776 # Optimization
777 with tf.control_dependencies(control_inputs=dependencies):
--> 778 optimized = self.optimizer.minimize(
779 variables=variables, arguments=arguments, fn_loss=fn_loss,
780 fn_kl_divergence=fn_kl_divergence, global_variables=global_variables, **kwargs
781 )
783 with tf.control_dependencies(control_inputs=(optimized,)):
784 # Loss summaries
785 if self.is_summary_logged(label=('loss', 'objective-loss', 'losses')):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/optimizer.py:54, in Optimizer.tf_minimize(self, variables, **kwargs)
51 if any(variable.dtype != util.tf_dtype(dtype='float') for variable in variables):
52 raise TensorforceError.unexpected()
---> 54 deltas = self.step(variables=variables, **kwargs)
56 update_norm = tf.linalg.global_norm(t_list=deltas)
57 deltas = self.add_summary(
58 label='update-norm', name='update-norm', tensor=update_norm, pass_tensors=deltas
59 )
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/optimizing_step.py:73, in OptimizingStep.tf_step(self, variables, arguments, fn_loss, fn_reference, **kwargs)
70 loss_before = -fn_loss(**augmented_arguments)
72 with tf.control_dependencies(control_inputs=(loss_before,)):
---> 73 deltas = self.optimizer.step(
74 variables=variables, arguments=arguments, fn_loss=fn_loss, # no reference here?
75 return_estimated_improvement=True, **kwargs
76 )
78 if isinstance(deltas, tuple):
79 # If 'return_estimated_improvement' argument exists.
80 if len(deltas) != 2:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/natural_gradient.py:109, in NaturalGradient.tf_step(self, variables, arguments, fn_loss, fn_kl_divergence, return_estimated_improvement, **kwargs)
104 loss_gradients = tf.gradients(ys=loss, xs=variables)
106 # Solve the following system for delta' via the conjugate gradient solver.
107 # [delta' * F] * delta' = -grad(loss)
108 # --> delta' (= lambda * delta)
--> 109 deltas = self.solver.solve(
110 fn_x=fisher_matrix_product, x_init=None, b=[-grad for grad in loss_gradients]
111 )
113 # delta' * F
114 delta_fisher_matrix_product = fisher_matrix_product(deltas=deltas)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/solvers/conjugate_gradient.py:83, in ConjugateGradient.tf_solve(self, fn_x, x_init, b)$A x = b$ .$x$ to the problem as given by the solver.
71 def tf_solve(self, fn_x, x_init, b):
72 """
73 Iteratively solves the system of linear equations
74
(...)
81 A solution
82 """
---> 83 return super().tf_solve(fn_x, x_init, b)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/solvers/iterative.py:77, in Iterative.tf_solve(self, fn_x, x_init, *args)
74 else:
75 # TensorFlow while loop
76 max_iterations = self.max_iterations.value()
---> 77 args = self.while_loop(
78 cond=self.next_step, body=self.step, loop_vars=args, back_prop=False,
79 maximum_iterations=max_iterations
80 )
82 solution = self.end(*args)
84 return solution
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:698, in Module.while_loop(self, cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, maximum_iterations)
696 if maximum_iterations is not None and maximum_iterations.dtype is not tf.int32:
697 maximum_iterations = tf.dtypes.cast(x=maximum_iterations, dtype=tf.int32)
--> 698 x = tf.while_loop(
699 cond=cond, body=body, loop_vars=loop_vars, shape_invariants=shape_invariants,
700 parallel_iterations=parallel_iterations, back_prop=back_prop,
701 swap_memory=swap_memory, maximum_iterations=maximum_iterations
702 )
703 Module.while_counter -= 1
704 return x
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574, in deprecated_arg_values..deprecated_wrapper..new_func(*args, **kwargs)
567 _PRINTED_WARNING[(func, arg_name)] = True
568 logging.warning(
569 'From %s: calling %s (from %s) with %s=%s is deprecated and '
570 'will be removed %s.\nInstructions for updating:\n%s',
571 _call_location(), decorator_utils.get_qualified_name(func),
572 func.module, arg_name, arg_value, 'in a future version'
573 if date is None else ('after %s' % date), instructions)
--> 574 return func(*args, **kwargs)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2481, in while_loop_v2(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, maximum_iterations, name)
2305 @tf_export("while_loop", v1=[])
2306 @deprecation.deprecated_arg_values(
2307 None,
(...)
2322 maximum_iterations=None,
2323 name=None):
2324 """Repeat
bodywhile the conditioncondis true.2325
2326
condis a callable returning a boolean scalar tensor.bodyis a callable(...)
2479
2480 """
-> 2481 return while_loop(
2482 cond=cond,
2483 body=body,
2484 loop_vars=loop_vars,
2485 shape_invariants=shape_invariants,
2486 parallel_iterations=parallel_iterations,
2487 back_prop=back_prop,
2488 swap_memory=swap_memory,
2489 name=name,
2490 maximum_iterations=maximum_iterations,
2491 return_same_structure=True)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2765, in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations, return_same_structure)
2763 if loop_context.outer_context is None:
2764 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 2765 result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants,
2766 return_same_structure)
2767 if maximum_iterations is not None:
2768 return result[1]
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2247, in WhileContext.BuildLoop(self, pred, body, loop_vars, shape_invariants, return_same_structure)
2243 # _BuildLoop calls _update_input in several places. _mutation_lock()
2244 # ensures a Session.run call cannot occur between creating and mutating
2245 # new ops.
2246 with ops.get_default_graph()._mutation_lock(): # pylint: disable=protected-access
-> 2247 original_body_result, exit_vars = self._BuildLoop(
2248 pred, body, original_loop_vars, loop_vars, shape_invariants)
2249 finally:
2250 self.Exit()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2173, in WhileContext._BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2168 packed_vars_for_body = nest.pack_sequence_as(
2169 structure=original_loop_vars,
2170 flat_sequence=vars_for_body_with_tensor_arrays,
2171 expand_composites=True)
2172 pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
-> 2173 body_result = body(*packed_vars_for_body)
2174 post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
2175 if not nest.is_sequence_or_composite(body_result):
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/control_flow_ops.py:2718, in while_loop..(i, lv)
2715 loop_vars = (counter, loop_vars)
2716 cond = lambda i, lv: ( # pylint: disable=g-long-lambda
2717 math_ops.logical_and(i < maximum_iterations, orig_cond(*lv)))
-> 2718 body = lambda i, lv: (i + 1, orig_body(*lv))
2719 try_to_pack = False
2721 if executing_eagerly:
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/module.py:571, in Module.create_tf_function..fn(*args, **kwargs)
569 Module.scope_stack.append(scope)
570 scope.enter()
--> 571 results = tf_function(*args, **kwargs)
572 scope.exit(None, None, None)
573 Module.scope_stack.pop()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/solvers/conjugate_gradient.py:100, in ConjugateGradient.tf_step(self, x, conjugate, residual, squared_residual)
86 """
87 Iteration loop body of the conjugate gradient algorithm.
88
(...)
96 Updated arguments for next iteration.
97 """
99 # Ac := A * c_t
--> 100 A_conjugate = self.fn_x(conjugate)
102 # TODO: reference?
103 damping = self.damping.value()
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorforce/core/optimizers/natural_gradient.py:92, in NaturalGradient.tf_step..fisher_matrix_product(deltas)
86 delta_kldiv_grads = tf.add_n(inputs=[
87 tf.reduce_sum(input_tensor=(delta * grad))
88 for delta, grad in zip(deltas, kldiv_grads)
89 ])
91 # [delta' * F] = grad(delta' * grad(kldiv))
---> 92 delta_kldiv_grads2 = tf.gradients(ys=delta_kldiv_grads, xs=variables)
93 assert sum(grad is None for grad in delta_kldiv_grads2) == num_grad_none
94 return [
95 tf.zeros_like(input=var) if grad is None else tf.convert_to_tensor(value=grad)
96 for grad, var in zip(delta_kldiv_grads2, variables)
97 ]
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/gradients_impl.py:299, in gradients_v2(ys, xs, grad_ys, name, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients)
294 # Creating the gradient graph for control flow mutates Operations.
295 # _mutation_lock ensures a Session.run call cannot occur between creating and
296 # mutating new ops.
297 # pylint: disable=protected-access
298 with ops.get_default_graph()._mutation_lock():
--> 299 return gradients_util._GradientsHelper(
300 ys, xs, grad_ys, name, True, gate_gradients,
301 aggregation_method, stop_gradients,
302 unconnected_gradients)
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/gradients_util.py:638, in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph)
627 # NOTE(skyewm): We don't support computing gradients wrt a loop variable
628 # unless it's within the context of a single iteration (i.e. the
629 # gradient is wrt to the loop parameter in the body function, not wrt or
630 # through the initial value). This means if we're in a while loop
631 # context, we should never see a switch node from this context.
632 # pylint: disable=protected-access
633 if (control_flow_util.IsSwitch(op) and
634 op._control_flow_context is not None and
635 op._control_flow_context.IsWhileContext() and
636 op._control_flow_context ==
637 ops.get_default_graph()._get_control_flow_context()):
--> 638 _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs_set)
639 # pylint: enable=protected-access
641 if (grad_fn or is_func_call) and has_out_grads:
642 # NOTE: If _AggregatedGrads didn't compute a value for the i'th
643 # output, it means that the cost does not depend on output[i],
644 # therefore dC/doutput[i] is 0.
File ~/anaconda3/envs/catgym/lib/python3.8/site-packages/tensorflow/python/ops/gradients_util.py:374, in _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs_set)
372 queue.extend(t.op for t in _NonEagerInputs(curr_op, xs_set))
373 assert target_op
--> 374 raise ValueError(
375 "Cannot compute gradient inside while loop with respect to op '%s'. "
376 "We do not support taking the gradient wrt or through the initial value "
377 "of a loop variable. Gradients can be computed through loop invariants "
378 "or wrt the input parameters to the loop body."
379 % target_op.name)
ValueError: Cannot compute gradient inside while loop with respect to op 'agent/policy/policy-network/TS-dense0/bias'. We do not support taking the gradient wrt or through the initial value of a loop variable. Gradients can be computed through loop invariants or wrt the input parameters to the loop body.`
or
AssertionError: Do not use tf.reset_default_graph() to clear nested graphs. If you need a cleared graph, exit the nesting and create a new graph.
I have python version 3.8, tensorflow version 2.2.0 and tensorforce version 0.5.5.
I would be grateful if someone could offer some advice and help.