pytorch
diff --git a/‎.github/unittest/linux_libs/scripts_brax/environment.yml
Lines changed: 1 addition & 0 deletions b/‎.github/unittest/linux_libs/scripts_brax/environment.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/unittest/linux_libs/scripts_brax/run_test.sh
Lines changed: 34 additions & 1 deletion b/‎.github/unittest/linux_libs/scripts_brax/run_test.sh
Lines changed: 34 additions & 1 deletion
diff --git a/‎.github/unittest/linux_libs/scripts_openx/environment.yml
Lines changed: 1 addition & 1 deletion b/‎.github/unittest/linux_libs/scripts_openx/environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/unittest/windows_optdepts/scripts/unittest.sh
Lines changed: 34 additions & 3 deletions b/‎.github/unittest/windows_optdepts/scripts/unittest.sh
Lines changed: 34 additions & 3 deletions
diff --git a/‎.github/workflows/benchmarks_pr.yml
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/benchmarks_pr.yml
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/test-linux-libs.yml
Lines changed: 33 additions & 33 deletions b/‎.github/workflows/test-linux-libs.yml
Lines changed: 33 additions & 33 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md
Lines changed: 97 additions & 0 deletions b/‎README.md
Lines changed: 97 additions & 0 deletions
diff --git a/‎docs/source/_static/img/llm-data.svg
Lines changed: 5 additions & 0 deletions b/‎docs/source/_static/img/llm-data.svg
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/source/_static/img/llm-env.png
577 KB b/‎docs/source/_static/img/llm-env.png
577 KB
@@ -21,3 +21,4 @@ dependencies:
     - hydra-core
     - jax[cuda12]
     - brax
+    - psutil
@@ -8,6 +8,13 @@ conda activate ./env
 
 export PYTORCH_TEST_WITH_SLOW='1'
 export LAZY_LEGACY_OP=False
+
+# Configure JAX for proper GPU initialization
+export XLA_PYTHON_CLIENT_PREALLOCATE=false
+export XLA_PYTHON_CLIENT_ALLOCATOR=platform
+export TF_FORCE_GPU_ALLOW_GROWTH=true
+export CUDA_VISIBLE_DEVICES=0
+
 python -m torch.utils.collect_env
 # Avoid error: "fatal: unsafe repository"
 git config --global --add safe.directory '*'
@@ -28,7 +35,33 @@ export MAGNUM_LOG=verbose MAGNUM_GPU_VALIDATION=ON
 # this workflow only tests the libs
 python -c "import brax"
 python -c "import brax.envs"
-python -c "import jax"
+
+# Initialize JAX with proper GPU configuration
+python -c "
+import jax
+import jax.numpy as jnp
+import os
+
+# Configure JAX for GPU
+os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
+os.environ['XLA_PYTHON_CLIENT_ALLOCATOR'] = 'platform'
+
+# Test JAX GPU availability
+try:
+    devices = jax.devices()
+    print(f'JAX devices: {devices}')
+    if len(devices) > 1:
+        print('JAX GPU is available')
+    else:
+        print('JAX CPU only')
+except Exception as e:
+    print(f'JAX initialization error: {e}')
+    # Fallback to CPU
+    os.environ['JAX_PLATFORM_NAME'] = 'cpu'
+    jax.config.update('jax_platform_name', 'cpu')
+    print('Falling back to JAX CPU')
+"
+
 python3 -c 'import torch;t = torch.ones([2,2], device="cuda:0");print(t);print("tensor device:" + str(t.device))'
 
 python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestBrax --error-for-skips
 
@@ -21,5 +21,5 @@ dependencies:
     - hydra-core
     - tqdm
     - h5py
-    - datasets
+    - datasets<4.0.0
     - pillow
@@ -14,6 +14,12 @@ env_dir="${root_dir}/env"
 
 cd "${root_dir}"
 
+echo "=== Starting Windows CI setup ==="
+echo "Current directory: $(pwd)"
+echo "Python version: $PYTHON_VERSION"
+echo "CU_VERSION: $CU_VERSION"
+echo "TORCH_VERSION: $TORCH_VERSION"
+
 eval "$($(which conda) shell.bash hook)" && set -x
 
 # Create test environment at ./env
@@ -28,11 +34,12 @@ echo $(which python)
 echo $(python --version)
 echo $(conda info -e)
 
-
+echo "=== Installing test dependencies ==="
 python -m pip install hypothesis future cloudpickle pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures expecttest pyyaml scipy coverage
 
 # =================================== Install =================================================
 
+echo "=== Installing PyTorch and dependencies ==="
 
 # TODO, refactor the below logic to make it easy to understand how to get correct cuda_version.
 if [ "${CU_VERSION:-}" == cpu ] ; then
@@ -56,8 +63,8 @@ else
     cudatoolkit="${cuda_toolkit_pckg}=${version}"
 fi
 
-
 # submodules
+echo "=== Updating git submodules ==="
 git submodule sync && git submodule update --init --recursive
 python -m pip install "numpy<2.0"
 
@@ -92,6 +99,7 @@ fi
 #python -m pip install pip --upgrade
 
 # install tensordict
+echo "=== Installing tensordict ==="
 if [[ "$RELEASE" == 0 ]]; then
   conda install anaconda::cmake -y
 
@@ -103,11 +111,13 @@ else
 fi
 
 # smoke test
+echo "=== Testing tensordict import ==="
 python -c """
 from tensordict import TensorDict
 print('successfully imported tensordict')
 """
 
+echo "=== Setting up CUDA environment ==="
 source "$this_dir/set_cuda_envs.sh"
 
 printf "* Installing torchrl\n"
@@ -117,13 +127,15 @@ whatsinside=$(ls -rtlh ./torchrl)
 echo $whatsinside
 
 # smoke test
+echo "=== Testing torchrl import ==="
 python -c """
 from torchrl.data import ReplayBuffer
 print('successfully imported torchrl')
 """
 
 # =================================== Run =================================================
 
+echo "=== Setting up test environment ==="
 source "$this_dir/set_cuda_envs.sh"
 
 # we don't use torchsnapshot
@@ -132,5 +144,24 @@ export MAX_IDLE_COUNT=60
 export BATCHED_PIPE_TIMEOUT=60
 export LAZY_LEGACY_OP=False
 
+echo "=== Collecting environment info ==="
 python -m torch.utils.collect_env
-pytest --junitxml=test-results/junit.xml -v --durations 200  --ignore test/test_distributed.py --ignore test/test_rlhf.py
+
+echo "=== Starting pytest execution ==="
+echo "Current working directory: $(pwd)"
+echo "Python executable: $(which python)"
+echo "Pytest executable: $(which pytest)"
+
+# Create test-results directory if it doesn't exist
+mkdir -p test-results
+
+# Run pytest with explicit error handling
+set +e  # Don't exit on error for pytest
+pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py --ignore test/llm
+PYTEST_EXIT_CODE=$?
+set -e  # Re-enable exit on error
+
+echo "=== Pytest completed with exit code: $PYTEST_EXIT_CODE ==="
+
+# Exit with pytest's exit code
+exit $PYTEST_EXIT_CODE
@@ -14,7 +14,9 @@ jobs:
 
   benchmark_cpu:
     name: CPU Pytest benchmark
-    runs-on: linux.g5.4xlarge.nvidia.cpu
+    runs-on: linux.4xlarge
+    # Disabling job since it hasn't worked for months
+    if: false 
     defaults:
       run:
         shell: bash -l {0}
 
@@ -21,39 +21,39 @@ permissions:
 
 jobs:
 
-  unittests-atari-dqn:
-    strategy:
-      matrix:
-        python_version: ["3.10"]
-        cuda_arch_version: ["12.8"]
-    if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Data') }}
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    with:
-      repository: pytorch/rl
-      runner: "linux.g5.4xlarge.nvidia.gpu"
-      docker-image: "nvidia/cuda:12.4.0-devel-ubuntu22.04"
-      timeout: 120
-      script: |
-        if [[ "${{ github.ref }}" =~ release/* ]]; then
-          export RELEASE=1
-          export TORCH_VERSION=stable
-        else
-          export RELEASE=0
-          export TORCH_VERSION=nightly
-        fi
-
-        set -euo pipefail
-        export PYTHON_VERSION="3.10"
-        export CU_VERSION="cu128"
-        export TAR_OPTIONS="--no-same-owner"
-        export UPLOAD_CHANNEL="nightly"
-        export TF_CPP_MIN_LOG_LEVEL=0
-        export TD_GET_DEFAULTS_TO_NONE=1
-
-        bash .github/unittest/linux_libs/scripts_ataridqn/setup_env.sh
-        bash .github/unittest/linux_libs/scripts_ataridqn/install.sh
-        bash .github/unittest/linux_libs/scripts_ataridqn/run_test.sh
-        bash .github/unittest/linux_libs/scripts_ataridqn/post_process.sh
+  # unittests-atari-dqn:
+  #   strategy:
+  #     matrix:
+  #       python_version: ["3.10"]
+  #       cuda_arch_version: ["12.8"]
+  #   if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Data') }}
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   with:
+  #     repository: pytorch/rl
+  #     runner: "linux.g5.4xlarge.nvidia.gpu"
+  #     docker-image: "nvidia/cuda:12.4.0-devel-ubuntu22.04"
+  #     timeout: 120
+  #     script: |
+  #       if [[ "${{ github.ref }}" =~ release/* ]]; then
+  #         export RELEASE=1
+  #         export TORCH_VERSION=stable
+  #       else
+  #         export RELEASE=0
+  #         export TORCH_VERSION=nightly
+  #       fi
+
+  #       set -euo pipefail
+  #       export PYTHON_VERSION="3.10"
+  #       export CU_VERSION="cu128"
+  #       export TAR_OPTIONS="--no-same-owner"
+  #       export UPLOAD_CHANNEL="nightly"
+  #       export TF_CPP_MIN_LOG_LEVEL=0
+  #       export TD_GET_DEFAULTS_TO_NONE=1
+
+  #       bash .github/unittest/linux_libs/scripts_ataridqn/setup_env.sh
+  #       bash .github/unittest/linux_libs/scripts_ataridqn/install.sh
+  #       bash .github/unittest/linux_libs/scripts_ataridqn/run_test.sh
+  #       bash .github/unittest/linux_libs/scripts_ataridqn/post_process.sh
 
   unittests-brax:
     strategy:
 
@@ -44,6 +44,7 @@ htmlcov/
 .coverage
 .coverage.*
 .cache
+.neptune
 nosetests.xml
 coverage.xml
 *.cover
 
@@ -23,6 +23,57 @@
 
 **TorchRL** is an open-source Reinforcement Learning (RL) library for PyTorch.
 
+## 🚀 What's New
+
+### LLM API - Complete Framework for Language Model Fine-tuning
+
+TorchRL now includes a comprehensive **LLM API** for post-training and fine-tuning of language models! This new framework provides everything you need for RLHF, supervised fine-tuning, and tool-augmented training:
+
+- 🤖 **Unified LLM Wrappers**: Seamless integration with Hugging Face models and vLLM inference engines - more to come!
+- 💬 **Conversation Management**: Advanced [`History`](torchrl/data/llm/history.py) class for multi-turn dialogue with automatic chat template detection
+- 🛠️ **Tool Integration**: [Built-in support](torchrl/envs/llm/transforms/) for Python code execution, function calling, and custom tool transforms
+- 🎯 **Specialized Objectives**: [GRPO](torchrl/objectives/llm/grpo.py) (Group Relative Policy Optimization) and [SFT](torchrl/objectives/llm/sft.py) loss functions optimized for language models
+- ⚡ **High-Performance Collectors**: [Async data collection](torchrl/collectors/llm/) with distributed training support
+- 🔄 **Flexible Environments**: Transform-based architecture for reward computation, data loading, and conversation augmentation
+
+The LLM API follows TorchRL's modular design principles, allowing you to mix and match components for your specific use case. Check out the [complete documentation](https://pytorch.org/rl/main/reference/llms.html) and [GRPO implementation example](https://github.com/pytorch/rl/tree/main/sota-implementations/grpo) to get started!
+
+<details>
+  <summary>Quick LLM API Example</summary>
+
+```python
+from torchrl.envs.llm import ChatEnv
+from torchrl.modules.llm import TransformersWrapper
+from torchrl.objectives.llm import GRPOLoss
+from torchrl.collectors.llm import LLMCollector
+
+# Create environment with Python tool execution
+env = ChatEnv(
+    tokenizer=tokenizer,
+    system_prompt="You are an assistant that can execute Python code.",
+    batch_size=[1]
+).append_transform(PythonInterpreter())
+
+# Wrap your language model
+llm = TransformersWrapper(
+    model=model,
+    tokenizer=tokenizer,
+    input_mode="history"
+)
+
+# Set up GRPO training
+loss_fn = GRPOLoss(llm, critic, gamma=0.99)
+collector = LLMCollector(env, llm, frames_per_batch=100)
+
+# Training loop
+for data in collector:
+    loss = loss_fn(data)
+    loss.backward()
+    optimizer.step()
+```
+
+</details>
+
 ## Key features
 
 - 🐍 **Python-first**: Designed with Python as the primary language for ease of use and flexibility
@@ -516,6 +567,39 @@ And it is `functorch` and `torch.compile` compatible!
 - various [recipes](https://github.com/pytorch/rl/blob/main/torchrl/trainers/helpers/models.py) to build models that
     correspond to the environment being deployed.
 
+- **LLM API**: Complete framework for language model fine-tuning with unified wrappers for Hugging Face and vLLM backends, 
+  conversation management with automatic chat template detection, tool integration (Python execution, function calling), 
+  specialized objectives (GRPO, SFT), and high-performance async collectors. Perfect for RLHF, supervised fine-tuning, 
+  and tool-augmented training scenarios.
+  <details>
+    <summary>Code</summary>
+
+  ```python
+  from torchrl.envs.llm import ChatEnv
+  from torchrl.modules.llm import TransformersWrapper
+  from torchrl.envs.llm.transforms import PythonInterpreter
+  
+  # Create environment with tool execution
+  env = ChatEnv(
+      tokenizer=tokenizer,
+      system_prompt="You can execute Python code.",
+      batch_size=[1]
+  ).append_transform(PythonInterpreter())
+  
+  # Wrap language model for training
+  llm = TransformersWrapper(
+      model=model,
+      tokenizer=tokenizer,
+      input_mode="history"
+  )
+  
+  # Multi-turn conversation with tool use
+  obs = env.reset(TensorDict({"query": "Calculate 2+2"}, batch_size=[1]))
+  llm_output = llm(obs)  # Generates response
+  obs = env.step(llm_output)  # Environment processes response
+  ```
+  </details>
+
 If you feel a feature is missing from the library, please submit an issue!
 If you would like to contribute to new features, check our [call for contributions](https://github.com/pytorch/rl/issues/509) and our [contribution](https://github.com/pytorch/rl/blob/main/CONTRIBUTING.md) page.
 
@@ -792,6 +876,18 @@ A series of [State-of-the-Art implementations](https://github.com/pytorch/rl/blo
    <td> NA
    </td>
   </tr>
+  <tr>
+   <td><a href="https://github.com/pytorch/rl/blob/main/sota-implementations/grpo">LLM API (GRPO)</a>
+   </td>
+   <td> NA
+   </td>
+   <td> +
+   </td>
+   <td> +
+   </td>
+   <td> NA
+   </td>
+  </tr>
 </table>
 
 ** The number indicates expected speed-up compared to eager mode when executed on CPU. Numbers may vary depending on
@@ -800,6 +896,7 @@ A series of [State-of-the-Art implementations](https://github.com/pytorch/rl/blo
 and many more to come!
 
 [Code examples](examples/) displaying toy code snippets and training scripts are also available 
+- [LLM API & GRPO](sota-implementations/grpo) - Complete language model fine-tuning pipeline
 - [RLHF](examples/rlhf)
 - [Memory-mapped replay buffers](examples/torchrl_features)