Specification


Import

from free_range_zoo.envs import rideshare_v0

Actions

Discrete & Deterministic

Observations

Discrete and fully observed with private observations

Parallel API

Yes

Manual Control

No

Agent Names

[\(driver\)_0, … , \(driver\)_n]

# Agents

\(n\)

Action Shape

(\(envs\), 2)

Action Values

[\([accept (0)|pick (1)|drop (2)]\_0\), …, \([accept (0)|pick (1)|drop (2)]\_{tasks}\), \(noop\) (-1)]

Observation Shape

TensorDict: {
self: \(<y, x, num_{accepted}, num_{riding}>\)
others: \(<y, x, num_{accepted}, num_{riding}>\)
tasks: \(<y, x, y_{dest}, x_{dest}, accepted\_by, riding\_by, fare, entered\_step>\)
batch_size: { \(num\_envs\) }

Observation Values

self:
\(y\):\([0, max_y]\)
\(x\): \([0, max_x]\)
\(num\_accepted\): \([0, pooling\_limit]\)
\(num_riding\): \([0, pooling\_limit]\)
others:
\(y\):\([0, max_y]\)
\(x\): \([0, max_x]\)
\(num\_accepted\): \([0, pooling\_limit]\)
\(num_riding\): \([0, pooling\_limit]\)
tasks:
\(y\): \([0, max_y]\)
\(x\): \([0, max_x]\)
\(y_{dest}\): \([0, max_y]\)
\(x_{dest}\): \([0, max_x]\)
\(riding\_by\): \([0, num_{agents}]\)
\(accepted\_by\): \([0, num_{agents}]\)
\(fare\): \([0, max_{fare}]\)
\(entered\_step\): \([0, max_{steps}]\)


Usage

Parallel API

from free_range_zoo.envs import rideshare_v0

main_logger = logging.getLogger(__name__)

# Initialize and reset environment to initial state
env = rideshare_v0.parallel_env(render_mode="human")
observations, infos = env.reset()

# Initialize agents and give initial observations
agents = []

cumulative_rewards = {agent: 0 for agent in env.agents}

current_step = 0
while not torch.all(env.finished):
    agent_actions = {
        agent_name: torch.stack([agents[agent_name].act()])
        for agent_name in env.agents
    }  # Policy action determination here

    observations, rewards, terminations, truncations, infos = env.step(agent_actions)
    rewards = {agent_name: rewards[agent_name].item() for agent_name in env.agents}

    for agent_name, agent in agents.items():
        agent.observe(observations[agent_name][0])  # Policy observation processing here
        cumulative_rewards[agent_name] += rewards[agent_name]

    main_logger.info(f"Step {current_step}: {rewards}")
    current_step += 1

env.close()

AEC API

from free_range_zoo.envs import rideshare_v0

main_logger = logging.getLogger(__name__)

# Initialize and reset environment to initial state
env = rideshare_v0.parallel_env(render_mode="human")
observations, infos = env.reset()

# Initialize agents and give initial observations
agents = []

cumulative_rewards = {agent: 0 for agent in env.agents}

current_step = 0
while not torch.all(env.finished):
    for agent in env.agent_iter():
        observations, rewards, terminations, truncations, infos = env.last()

        # Policy action determination here
        action = env.action_space(agent).sample()

        env.step(action)

    rewards = {agent: rewards[agent].item() for agent in env.agents}
    cumulative_rewards[agent] += rewards[agent]

    current_step += 1
    main_logger.info(f"Step {current_step}: {rewards}")

env.close()

Configuration

API