distro: make liger-kernel optional. do not rely on requirement.txt during setup (#286)

0c32cf78 · HL · GitHub · c8b9c355 · 0c32cf78 · 0c32cf78
Unverified Commit 0c32cf78 authored Feb 16, 2025 by HL Committed by GitHub Feb 16, 2025
11 changed files
--- a/.github/workflows/e2e_gsm8k.yml
+++ b/.github/workflows/e2e_gsm8k.yml
@@ -37,7 +37,7 @@ jobs:
      - name: Install the current repository
        run: |
          pip3 install hf_transfer
-          pip3 install -e .[test]
+          pip3 install -e .[test,gpu]
      - name: Prepare gsm8k dataset
        run: |
          ray stop --force

--- a/.github/workflows/e2e_sft.yml
+++ b/.github/workflows/e2e_sft.yml
@@ -37,7 +37,7 @@ jobs:
      - name: Install the current repository
        run: |
          pip3 install hf_transfer
-          pip3 install -e .[test]
+          pip3 install -e .[test,gpu]
      - name: Prepare gsm8k dataset
        run: |
          ray stop --force

--- a/.github/workflows/sanity.yml
+++ b/.github/workflows/sanity.yml
@@ -40,3 +40,7 @@ jobs:
      - name: Run license test
        run: |
          python3 tests/sanity/check_license.py --directory .
+      - name: Run dependency test
+        run: |
+          pip install tomli
+          pytest -s -x tests/distro/requirements.py
--- a/docs/perf/perf_tuning.rst
+++ b/docs/perf/perf_tuning.rst
@@ -42,6 +42,8 @@ Below are key factors for tuning vLLM-based rollout. Before tuning, we recommend
 More tuning details such as dealing with Preemption and Chunked-prefill
 can be found in `vLLM official tuning guide <https://docs.vllm.ai/en/latest/performance/optimization.html>`_ 
+The performance of vllm can be further increased if upgrading from v0.6.3 to v0.7. See https://github.com/volcengine/verl/blob/main/docs/README_vllm0.7.md for details on how to upgrade.
 Enable remove padding (sequence packing)
 -----------------------------------------
@@ -149,7 +151,7 @@ LigerKernel for SFT
 LigerKernel is a high-performance kernel for Supervised Fine-Tuning (SFT) that can improve training efficiency. To enable LigerKernel in your SFT training:
-1. In your SFT configuration file (e.g., ``verl/trainer/config/sft_trainer.yaml``), set the ``use_liger`` parameter:
+1. Install liger-kernel via ``pip3 install liger-kernel``. In your SFT configuration file (e.g., ``verl/trainer/config/sft_trainer.yaml``), set the ``use_liger`` parameter:
   .. code-block:: yaml

--- a/docs/start/install.rst
+++ b/docs/start/install.rst
@@ -81,6 +81,8 @@ own post-training jobs.
 .. code:: bash
   # install verl together with some lightweight dependencies in setup.py
+   pip3 install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu124
+   pip3 install flash-attn --no-build-isolation
   git clone https://github.com/volcengine/verl.git
   cd verl
   pip3 install -e .

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,22 +36,25 @@ dependencies = [
    "dill",
    "hydra-core",
    "numpy",
-    "pybind11",
+    "pandas",
-    "ray>=2.38",
-    "tensordict",
-    "transformers<4.48",
-    "vllm<=0.6.3",
    "peft",
-    "liger-kernel",
+    "pyarrow>=15.0.0",
+    "pybind11",
    "pylatexenc",
+    "ray>=2.10",
+    "tensordict<0.6",
+    "transformers",
+    "vllm<=0.6.3.post1",
+    'wandb',
 ]
 # Optional dependencies (extras_require in setup.py)
 [project.optional-dependencies]
 test = [
-  "pytest", "yapf"
+  "pytest", "yapf", "py-spy",
 ]
 prime = ["pyext"]
+gpu = ["liger-kernel", "flash-attn"]
 # URLs
 [project.urls]
@@ -79,4 +82,4 @@ version = {file = "verl/version/version"}
 verl = [
  "version/*",
  "trainer/config/*.yaml"
 ]
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
+# requirements.txt records the full set of dependencies for development
 accelerate
 codetiming
 datasets
 dill
 flash-attn
 hydra-core
+liger-kernel
 numpy
 pandas
 peft
 pyarrow>=15.0.0
 pybind11
-ray>=2.38
+pylatexenc
+ray
 tensordict<0.6
-transformers<4.48
+transformers
-vllm
+vllm==0.6.3.post1
 wandb
-liger-kernel
-pylatexenc
\ No newline at end of file
--- a/scripts/format.sh
+++ b/scripts/format.sh
 #!/bin/bash
 pip3 install --upgrade yapf
-yapf -ir -vv --style ./.style.yapf verl tests single_controller examples
+python3 -m yapf -ir -vv --style ./.style.yapf verl tests single_controller examples
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -21,14 +21,33 @@ version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
 with open(os.path.join(version_folder, 'verl/version/version')) as f:
    __version__ = f.read().strip()
+install_requires = [
+  'accelerate',
+  'codetiming',
+  'datasets',
+  'dill',
+  'hydra-core',
+  'numpy',
+  'pandas',
+  'peft',
+  'pyarrow>=15.0.0',
+  'pybind11',
+  'pylatexenc',
+  'ray>=2.10',
+  'tensordict<0.6',
+  'transformers',
+  'vllm<=0.6.3.post1',
+  'wandb',
+]
-with open('requirements.txt') as f:
+TEST_REQUIRES = ['pytest', 'yapf', 'py-spy']
-    required = f.read().splitlines()
+PRIME_REQUIRES = ['pyext']
-    install_requires = [item.strip() for item in required if item.strip()[0] != '#']
+GPU_REQUIRES = ['liger-kernel', 'flash-attn']
 extras_require = {
-    'test': ['pytest', 'yapf'],
+  'test': TEST_REQUIRES,
-    'prime': ['pyext'],
+  'prime': PRIME_REQUIRES,
+  'gpu': GPU_REQUIRES,
 }
 from pathlib import Path

--- a/tests/distro/requirements.py
+++ b/tests/distro/requirements.py
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ast
+from pathlib import Path
+try:
+    import tomllib
+except ModuleNotFoundError:
+    import tomli as tomllib
+def extract_install_requires(setup_py_path):
+    """Extracts the install_requires list from setup.py using AST parsing."""
+    with open(setup_py_path, "r") as f:
+        tree = ast.parse(f.read())
+    # Locate the setup() function call
+    setup_call = None
+    for node in ast.walk(tree):
+        if (isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "setup"):
+            setup_call = node
+            break
+    if not setup_call:
+        raise ValueError("setup() call not found in setup.py")
+    # Extract the install_requires keyword argument
+    install_requires = None
+    for keyword in setup_call.keywords:
+        if keyword.arg == "install_requires":
+            install_requires = keyword.value
+            break
+    if not install_requires:
+        raise ValueError("install_requires not specified in setup() call")
+    # Handle cases where install_requires is a variable or a direct list
+    if isinstance(install_requires, ast.Name):
+        var_name = install_requires.id
+        requires = []
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Assign):
+                for target in node.targets:
+                    if isinstance(target, ast.Name) and target.id == var_name:
+                        if isinstance(node.value, ast.List):
+                            for element in node.value.elts:
+                                if isinstance(element, ast.Constant):
+                                    requires.append(element.value)
+                            return requires
+                        else:
+                            raise ValueError(f"install_requires references non-list variable {var_name}")
+        raise ValueError(f"Variable {var_name} not found in setup.py")
+    elif isinstance(install_requires, ast.List):
+        return [element.value for element in install_requires.elts if isinstance(element, ast.Constant)]
+    else:
+        raise ValueError("install_requires must be a list or variable referencing a list")
+def test_dependencies_consistent():
+    # Paths to the project root (adjust if test script is in a subdirectory)
+    project_root = Path(__file__).parent.parent.parent
+    # Extract dependencies from setup.py
+    setup_deps = extract_install_requires(project_root / "setup.py")
+    # Extract dependencies from pyproject.toml
+    pyproject_path = project_root / "pyproject.toml"
+    with open(pyproject_path, "rb") as f:
+        pyproject_data = tomllib.load(f)
+    toml_deps = pyproject_data["project"]["dependencies"]
+    # Assert equality to ensure consistency
+    assert setup_deps == toml_deps, ("Please make sure dependencies in setup.py and pyproject.toml matches.\n"
+                                     f"setup.py: {setup_deps}\n"
+                                     f"pyproject.toml: {toml_deps}")
--- a/tests/sanity/check_license.py
+++ b/tests/sanity/check_license.py
@@ -13,7 +13,11 @@
 # limitations under the License.
 license_head_bytedance = "Copyright 2024 Bytedance Ltd. and/or its affiliates"
-license_head2_prime = "Copyright 2024 PRIME team and/or its affiliates"
+license_head_bytedance_25 = "Copyright 2025 Bytedance Ltd. and/or its affiliates"
+# Add custom license headers below
+license_head_prime = "Copyright 2024 PRIME team and/or its affiliates"
+license_headers = [license_head_bytedance, license_head_bytedance_25, license_head_prime]
 from pathlib import Path
 from argparse import ArgumentParser
@@ -32,5 +36,9 @@ if __name__ == '__main__':
        with open(path_in_str, 'r', encoding='utf-8') as f:
            file_content = f.read()
-            assert license_head_bytedance in file_content or \
+            has_license = False
-                license_head2_prime in file_content, f'file {path_in_str} does not contain license'
+            for lh in license_headers:
+                if lh in file_content:
+                    has_license = True
+                    break
+            assert has_license, f'file {path_in_str} does not contain license'