Specification


Import

from free_range_zoo.envs import wildfire_v0

Actions

Discrete & Stochastic

Observations

Discrete and fully Observed with private observations

Parallel API

Yes

Manual Control

No

Agent Names

[\(firefighter\)_0, …, \(firefighter\)_n]

# Agents

[0, \(n_{firefighters}\)]

Action Shape

(\(envs\), 2)

Action Values

[\(fight_0\), …, \(fight_{tasks}\), \(noop\) (-1)]

Observation Shape

TensorDict: {
self: \(<ypos, xpos, power, suppressant, range, capacity, equipment>\)
others: \(<ypos, xpos, [power], [suppressant], [range], [capacity], [equipment]>\)
tasks: \(<y, x, fire level, intensity>\)
batch_size: \(num\_envs\) }

Observation Values

self:
\(ypos\): \([0, max_y]\)
\(xpos\): \([0, max_x]\)
\(power\): \([0, max_{effective\_power}]\)
\(suppressant\): \([0, max_{effective\_suppressant}]\)
\(range\): \([0, max_{effective\_range}]\)
\(capacity\): \([0, max_{effective\_capacity}]\)
\(equipment\): \([0, max_{equipment\_state}]\)
others:
\(ypos\): \([0, max_y]\)
\(xpos\): \([0, max_x]\)
\([power]\): included if observe_other_power
\([suppressant]\): included if observe_other_suppressant
\([range]\): included if observe_other_range
\([capacity]\): included if observe_other_capacity
\([equipment]\): included if observe_other_equipment
tasks
\(ypos\): \([0, max_y]\)
\(xpos\): \([0, max_x]\)
\(fire\_level\): \([0, max_{fire\_level}]\)
\(intensity\): \([0, num_{fire\_states}]\)


Usage

Parallel API

from free_range_zoo.envs import wildfire_v0

main_logger = logging.getLogger(__name__)

# Initialize and reset environment to initial state
env = wildfire_v0.parallel_env(render_mode="human")
observations, infos = env.reset()

# Initialize agents and give initial observations
agents = []

cumulative_rewards = {agent: 0 for agent in env.agents}

current_step = 0
while not torch.all(env.finished):
    agent_actions = {
        agent_name: torch.stack([agents[agent_name].act()])
        for agent_name in env.agents
    }  # Policy action determination here

    observations, rewards, terminations, truncations, infos = env.step(agent_actions)
    rewards = {agent_name: rewards[agent_name].item() for agent_name in env.agents}

    for agent_name, agent in agents.items():
        agent.observe(observations[agent_name][0])  # Policy observation processing here
        cumulative_rewards[agent_name] += rewards[agent_name]

    main_logger.info(f"Step {current_step}: {rewards}")
    current_step += 1

env.close()

AEC API

from free_range_zoo.envs import wildfire_v0

main_logger = logging.getLogger(__name__)

# Initialize and reset environment to initial state
env = wildfire_v0.parallel_env(render_mode="human")
observations, infos = env.reset()

# Initialize agents and give initial observations
agents = []

cumulative_rewards = {agent: 0 for agent in env.agents}

current_step = 0
while not torch.all(env.finished):
    for agent in env.agent_iter():
        observations, rewards, terminations, truncations, infos = env.last()

        # Policy action determination here
        action = env.action_space(agent).sample()

        env.step(action)

    rewards = {agent: rewards[agent].item() for agent in env.agents}
    cumulative_rewards[agent] += rewards[agent]

    current_step += 1
    main_logger.info(f"Step {current_step}: {rewards}")

env.close()

Configuration


API