I’m working on creating a deep learning model using TensorFlow that learns from collected game data, but I keep getting an error that I can’t figure out. Here’s what I’m trying to do:
import gym
import tensorflow as tf
import numpy as np
game_env = 'CartPole-v0'
environment = gym.make(game_env)
input_data = []
target_data = []
best_score = 1
for episode in range(100):
state = environment.reset()
for step in range(100):
environment.render()
move = environment.action_space.sample()
state, score, finished, details = environment.step(move)
if score >= best_score:
input_data.append(state)
target_data.append(move)
best_score = score
if finished:
break
layer1_size = 50
layer2_size = 50
input_size = 4
output_size = 1
input_placeholder = tf.placeholder("float")
output_placeholder = tf.placeholder("float")
def create_network(input_data):
network_weights = {
'layer1': tf.Variable(tf.random_normal([input_size, layer1_size])),
'layer2': tf.Variable(tf.random_normal([layer1_size, layer2_size])),
'output': tf.Variable(tf.random_normal([layer2_size, output_size]))
}
network_biases = {
'bias1': tf.Variable(tf.random_normal([layer1_size])),
'bias2': tf.Variable(tf.random_normal([layer2_size])),
'output': tf.Variable(tf.random_normal([output_size]))
}
first_layer = tf.add(tf.matmul(input_data, network_weights['layer1']), network_biases['bias1'])
first_layer = tf.nn.relu(first_layer)
second_layer = tf.add(tf.matmul(first_layer, network_weights['layer2']), network_biases['bias2'])
second_layer = tf.nn.relu(second_layer)
final_output = tf.matmul(second_layer, network_weights['output']) + network_biases['output']
return final_output
model_output = create_network(input_placeholder)
cost_function = tf.reduce_sum(tf.square(model_output - output_placeholder))
training_step = tf.train.GradientDescentOptimizer(0.001).minimize(cost_function)
initializer = tf.global_variables_initializer()
session = tf.Session()
session.run(initializer)
for iteration in range(100):
session.run(training_step, {input_placeholder: input_data, output_placeholder: target_data})
with tf.Session() as test_session:
test_session.run(initializer)
test_env = gym.make(game_env)
current_state = test_env.reset
for time_step in range(1000):
test_env.render()
predicted_action = np.around(create_network(current_state))
current_state, reward, done, info = test_env.step(predicted_action)
if done:
break
The error I’m getting is:
TypeError: Expected binary or unicode string, got <bound method Env.reset of >
I think there might be something wrong with how I’m handling the environment state or passing data to the network. Can someone help me understand what’s causing this issue? Thanks for any help!