diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 08a8e33e72..7d1fffd938 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,9 +56,13 @@ repos: (?x)^( .*cs.meta| .*.css| - .*.meta + .*.meta| + .*.asset| + .*.prefab| + .*.unity| + .*.json )$ - args: [--fix=lf] + args: [--fix=crlf] - id: trailing-whitespace name: trailing-whitespace-markdown diff --git a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb index e5d3d45c8b..83aad09aba 100644 --- a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb +++ b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb @@ -161,8 +161,8 @@ "from pathlib import Path\n", "from typing import Callable, Any\n", "\n", - "import gym\n", - "from gym import Env\n", + "import gymnasium as gym\n", + "from gymnasium import Env\n", "\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n", diff --git a/docs/Python-Gym-API-Documentation.md b/docs/Python-Gym-API-Documentation.md index b35771fc46..e92edce5e0 100644 --- a/docs/Python-Gym-API-Documentation.md +++ b/docs/Python-Gym-API-Documentation.md @@ -59,18 +59,22 @@ Environment initialization #### reset ```python - | reset() -> Union[List[np.ndarray], np.ndarray] + | reset(*, seed: int | None = None, options: dict[str, Any] | None = None) -> Tuple[np.ndarray, Dict] ``` -Resets the state of the environment and returns an initial observation. -Returns: observation (object/list): the initial observation of the -space. +Resets the state of the environment and returns an initial observation and info. + +**Returns**: + +- `observation` _object/list_ - the initial observation of the + space. +- `info` _dict_ - contains auxiliary diagnostic information. #### step ```python - | step(action: List[Any]) -> GymStepResult + | step(action: Any) -> GymStepResult ``` Run one timestep of the environment's dynamics. When end of @@ -86,14 +90,15 @@ Accepts an action and returns a tuple (observation, reward, done, info). - `observation` _object/list_ - agent's observation of the current environment reward (float/list) : amount of reward returned after previous action -- `done` _boolean/list_ - whether the episode has ended. +- `terminated` _boolean/list_ - whether the episode has ended by termination. +- `truncated` _boolean/list_ - whether the episode has ended by truncation. - `info` _dict_ - contains auxiliary diagnostic information. #### render ```python - | render(mode="rgb_array") + | render() ``` Return the latest visual observations. diff --git a/docs/Python-Gym-API.md b/docs/Python-Gym-API.md index 97869899ce..59ce44eeb6 100644 --- a/docs/Python-Gym-API.md +++ b/docs/Python-Gym-API.md @@ -93,7 +93,7 @@ observation, a single discrete action and a single Agent in the scene. Add the following code to the `train_unity.py` file: ```python -import gym +import gymnasium as gym from baselines import deepq from baselines import logger diff --git a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py index 4bb6fdf390..bccae65c0f 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py @@ -1,5 +1,5 @@ from typing import Any, Optional -from gym import error +from gymnasium import error from mlagents_envs.base_env import BaseEnv from pettingzoo import AECEnv diff --git a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py index df29a95c9a..3f0513ffb0 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py @@ -3,8 +3,8 @@ import numpy as np from typing import Any, Dict, List, Optional, Tuple, Union -import gym -from gym import error, spaces +import gymnasium as gym +from gymnasium import error, spaces from mlagents_envs.base_env import ActionTuple, BaseEnv from mlagents_envs.base_env import DecisionSteps, TerminalSteps @@ -20,7 +20,7 @@ class UnityGymException(error.Error): logger = logging_util.get_logger(__name__) -GymStepResult = Tuple[np.ndarray, float, bool, Dict] +GymStepResult = Tuple[np.ndarray, float, bool, bool, Dict] class UnityToGymWrapper(gym.Env): @@ -151,11 +151,16 @@ def __init__( else: self._observation_space = list_spaces[0] # only return the first one - def reset(self) -> Union[List[np.ndarray], np.ndarray]: - """Resets the state of the environment and returns an initial observation. - Returns: observation (object/list): the initial observation of the + def reset( + self, *, seed: int | None = None, options: dict[str, Any] | None = None + ) -> Tuple[np.ndarray, Dict]: + """Resets the state of the environment and returns an initial observation and info. + Returns: + observation (object/list): the initial observation of the space. + info (dict): contains auxiliary diagnostic information. """ + super().reset(seed=seed, options=options) self._env.reset() decision_step, _ = self._env.get_steps(self.name) n_agents = len(decision_step) @@ -163,9 +168,9 @@ def reset(self) -> Union[List[np.ndarray], np.ndarray]: self.game_over = False res: GymStepResult = self._single_step(decision_step) - return res[0] + return res[0], res[4] - def step(self, action: List[Any]) -> GymStepResult: + def step(self, action: Any) -> GymStepResult: """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. @@ -175,14 +180,15 @@ def step(self, action: List[Any]) -> GymStepResult: Returns: observation (object/list): agent's observation of the current environment reward (float/list) : amount of reward returned after previous action - done (boolean/list): whether the episode has ended. + terminated (boolean/list): whether the episode has ended by termination. + truncated (boolean/list): whether the episode has ended by truncation. info (dict): contains auxiliary diagnostic information. """ if self.game_over: raise UnityGymException( "You are calling 'step()' even though this environment has already " - "returned done = True. You must always call 'reset()' once you " - "receive 'done = True'." + "returned `terminated` or `truncated` as True. You must always call 'reset()' once you " + "receive `terminated` or `truncated` as True." ) if self._flattener is not None: # Translate action into list @@ -227,9 +233,19 @@ def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResu visual_obs = self._get_vis_obs_list(info) self.visual_obs = self._preprocess_single(visual_obs[0][0]) - done = isinstance(info, TerminalSteps) + if isinstance(info, TerminalSteps): + interrupted = info.interrupted + terminated, truncated = not interrupted, interrupted + else: + terminated, truncated = False, False - return (default_observation, info.reward[0], done, {"step": info}) + return ( + default_observation, + info.reward[0], + terminated, + truncated, + {"step": info}, + ) def _preprocess_single(self, single_visual_obs: np.ndarray) -> np.ndarray: if self.uint8_visual: @@ -276,7 +292,7 @@ def _get_vec_obs_size(self) -> int: result += obs_spec.shape[0] return result - def render(self, mode="rgb_array"): + def render(self): """ Return the latest visual observations. Note that it will not render a new frame of the environment. diff --git a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py index 09398d27fa..906905e83b 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py @@ -1,5 +1,5 @@ from typing import Optional, Dict, Any, Tuple -from gym import error +from gymnasium import error from mlagents_envs.base_env import BaseEnv from pettingzoo import ParallelEnv diff --git a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py index 3457f18c88..c040050a2b 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py @@ -1,7 +1,7 @@ import atexit from typing import Optional, List, Set, Dict, Any, Tuple import numpy as np -from gym import error, spaces +from gymnasium import error, spaces from mlagents_envs.base_env import BaseEnv, ActionTuple from mlagents_envs.envs.env_helpers import _agent_id_to_behavior, _unwrap_batch_steps diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py index fcbee96151..bd40cb4c01 100644 --- a/ml-agents-envs/setup.py +++ b/ml-agents-envs/setup.py @@ -58,12 +58,12 @@ def run(self): "Pillow>=4.2.1", "protobuf>=3.6,<3.21", "pyyaml>=3.1.0", - "gym>=0.21.0", - "pettingzoo==1.15.0", - "numpy>=1.23.5,<1.24.0", + "gymnasium>=0.25.0", + "pettingzoo>=1.15.0", + "numpy>=1.23.5,<2.0", "filelock>=3.4.0", ], - python_requires=">=3.10.1,<=3.10.12", + python_requires=">=3.9,<4", # TODO: Remove this once mypy stops having spurious setuptools issues. cmdclass={"verify": VerifyVersionCommand}, # type: ignore ) diff --git a/ml-agents-envs/tests/test_gym.py b/ml-agents-envs/tests/test_gym.py index 4fc2bf548c..21afdc0c9f 100644 --- a/ml-agents-envs/tests/test_gym.py +++ b/ml-agents-envs/tests/test_gym.py @@ -2,7 +2,7 @@ import pytest import numpy as np -from gym import spaces +from gymnasium import spaces from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper from mlagents_envs.base_env import ( diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 43d468f2bc..8f767e23d0 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -12,7 +12,7 @@ UnityCommunicatorStoppedException, ) from multiprocessing import Process, Pipe, Queue -from multiprocessing.connection import Connection +from multiprocessing.connection import Connection, PipeConnection from queue import Empty as EmptyQueueException from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec from mlagents_envs import logging_util @@ -77,7 +77,7 @@ class StepResponse(NamedTuple): class UnityEnvWorker: - def __init__(self, process: Process, worker_id: int, conn: Connection): + def __init__(self, process: Process, worker_id: int, conn: PipeConnection): self.process = process self.worker_id = worker_id self.conn = conn diff --git a/utils/generate_markdown_docs.py b/utils/generate_markdown_docs.py index 7566b1bdc7..5ce432b3a2 100755 --- a/utils/generate_markdown_docs.py +++ b/utils/generate_markdown_docs.py @@ -6,7 +6,6 @@ import argparse import hashlib - # pydoc-markdown -I . -m module_name --render_toc > doc.md @@ -52,8 +51,8 @@ def remove_trailing_whitespace(filename): # compare source and destination and write only if changed if source_file != destination_file: num_changed += 1 - with open(filename, "wb") as f: - f.write(destination_file.encode()) + with open(filename, "w", newline="\r\n") as f: + f.write(destination_file) if __name__ == "__main__": @@ -84,7 +83,7 @@ def remove_trailing_whitespace(filename): for submodule in submodules: module_args.append("-m") module_args.append(f"{module_name}.{submodule}") - with open(output_file_name, "w") as output_file: + with open(output_file_name, "wb") as output_file: subprocess_args = [ "pydoc-markdown", "-I",