Specification¶
Import |
|
|---|---|
Actions |
Discrete & Stochastic |
Observations |
Discrete and fully Observed with private observations |
Parallel API |
Yes |
Manual Control |
No |
Agent Names |
[\(firefighter\)_0, …, \(firefighter\)_n] |
# Agents |
[0, \(n_{firefighters}\)] |
Action Shape |
(\(envs\), 2) |
Action Values |
[\(fight_0\), …, \(fight_{tasks}\), \(noop\) (-1)] |
Observation Shape |
TensorDict: { |
Observation Values |
self: |
Usage¶
Parallel API¶
from free_range_zoo.envs import wildfire_v0
main_logger = logging.getLogger(__name__)
# Initialize and reset environment to initial state
env = wildfire_v0.parallel_env(render_mode="human")
observations, infos = env.reset()
# Initialize agents and give initial observations
agents = []
cumulative_rewards = {agent: 0 for agent in env.agents}
current_step = 0
while not torch.all(env.finished):
agent_actions = {
agent_name: torch.stack([agents[agent_name].act()])
for agent_name in env.agents
} # Policy action determination here
observations, rewards, terminations, truncations, infos = env.step(agent_actions)
rewards = {agent_name: rewards[agent_name].item() for agent_name in env.agents}
for agent_name, agent in agents.items():
agent.observe(observations[agent_name][0]) # Policy observation processing here
cumulative_rewards[agent_name] += rewards[agent_name]
main_logger.info(f"Step {current_step}: {rewards}")
current_step += 1
env.close()
AEC API¶
from free_range_zoo.envs import wildfire_v0
main_logger = logging.getLogger(__name__)
# Initialize and reset environment to initial state
env = wildfire_v0.parallel_env(render_mode="human")
observations, infos = env.reset()
# Initialize agents and give initial observations
agents = []
cumulative_rewards = {agent: 0 for agent in env.agents}
current_step = 0
while not torch.all(env.finished):
for agent in env.agent_iter():
observations, rewards, terminations, truncations, infos = env.last()
# Policy action determination here
action = env.action_space(agent).sample()
env.step(action)
rewards = {agent: rewards[agent].item() for agent in env.agents}
cumulative_rewards[agent] += rewards[agent]
current_step += 1
main_logger.info(f"Step {current_step}: {rewards}")
env.close()