Unverified Commit 0c32cf78 by HL Committed by GitHub

distro: make liger-kernel optional. do not rely on requirement.txt during setup (#286)

parent c8b9c355
...@@ -37,7 +37,7 @@ jobs: ...@@ -37,7 +37,7 @@ jobs:
- name: Install the current repository - name: Install the current repository
run: | run: |
pip3 install hf_transfer pip3 install hf_transfer
pip3 install -e .[test] pip3 install -e .[test,gpu]
- name: Prepare gsm8k dataset - name: Prepare gsm8k dataset
run: | run: |
ray stop --force ray stop --force
......
...@@ -37,7 +37,7 @@ jobs: ...@@ -37,7 +37,7 @@ jobs:
- name: Install the current repository - name: Install the current repository
run: | run: |
pip3 install hf_transfer pip3 install hf_transfer
pip3 install -e .[test] pip3 install -e .[test,gpu]
- name: Prepare gsm8k dataset - name: Prepare gsm8k dataset
run: | run: |
ray stop --force ray stop --force
......
...@@ -40,3 +40,7 @@ jobs: ...@@ -40,3 +40,7 @@ jobs:
- name: Run license test - name: Run license test
run: | run: |
python3 tests/sanity/check_license.py --directory . python3 tests/sanity/check_license.py --directory .
- name: Run dependency test
run: |
pip install tomli
pytest -s -x tests/distro/requirements.py
...@@ -42,6 +42,8 @@ Below are key factors for tuning vLLM-based rollout. Before tuning, we recommend ...@@ -42,6 +42,8 @@ Below are key factors for tuning vLLM-based rollout. Before tuning, we recommend
More tuning details such as dealing with Preemption and Chunked-prefill More tuning details such as dealing with Preemption and Chunked-prefill
can be found in `vLLM official tuning guide <https://docs.vllm.ai/en/latest/performance/optimization.html>`_ can be found in `vLLM official tuning guide <https://docs.vllm.ai/en/latest/performance/optimization.html>`_
The performance of vllm can be further increased if upgrading from v0.6.3 to v0.7. See https://github.com/volcengine/verl/blob/main/docs/README_vllm0.7.md for details on how to upgrade.
Enable remove padding (sequence packing) Enable remove padding (sequence packing)
----------------------------------------- -----------------------------------------
...@@ -149,7 +151,7 @@ LigerKernel for SFT ...@@ -149,7 +151,7 @@ LigerKernel for SFT
LigerKernel is a high-performance kernel for Supervised Fine-Tuning (SFT) that can improve training efficiency. To enable LigerKernel in your SFT training: LigerKernel is a high-performance kernel for Supervised Fine-Tuning (SFT) that can improve training efficiency. To enable LigerKernel in your SFT training:
1. In your SFT configuration file (e.g., ``verl/trainer/config/sft_trainer.yaml``), set the ``use_liger`` parameter: 1. Install liger-kernel via ``pip3 install liger-kernel``. In your SFT configuration file (e.g., ``verl/trainer/config/sft_trainer.yaml``), set the ``use_liger`` parameter:
.. code-block:: yaml .. code-block:: yaml
......
...@@ -81,6 +81,8 @@ own post-training jobs. ...@@ -81,6 +81,8 @@ own post-training jobs.
.. code:: bash .. code:: bash
# install verl together with some lightweight dependencies in setup.py # install verl together with some lightweight dependencies in setup.py
pip3 install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu124
pip3 install flash-attn --no-build-isolation
git clone https://github.com/volcengine/verl.git git clone https://github.com/volcengine/verl.git
cd verl cd verl
pip3 install -e . pip3 install -e .
......
...@@ -36,22 +36,25 @@ dependencies = [ ...@@ -36,22 +36,25 @@ dependencies = [
"dill", "dill",
"hydra-core", "hydra-core",
"numpy", "numpy",
"pybind11", "pandas",
"ray>=2.38",
"tensordict",
"transformers<4.48",
"vllm<=0.6.3",
"peft", "peft",
"liger-kernel", "pyarrow>=15.0.0",
"pybind11",
"pylatexenc", "pylatexenc",
"ray>=2.10",
"tensordict<0.6",
"transformers",
"vllm<=0.6.3.post1",
'wandb',
] ]
# Optional dependencies (extras_require in setup.py) # Optional dependencies (extras_require in setup.py)
[project.optional-dependencies] [project.optional-dependencies]
test = [ test = [
"pytest", "yapf" "pytest", "yapf", "py-spy",
] ]
prime = ["pyext"] prime = ["pyext"]
gpu = ["liger-kernel", "flash-attn"]
# URLs # URLs
[project.urls] [project.urls]
...@@ -79,4 +82,4 @@ version = {file = "verl/version/version"} ...@@ -79,4 +82,4 @@ version = {file = "verl/version/version"}
verl = [ verl = [
"version/*", "version/*",
"trainer/config/*.yaml" "trainer/config/*.yaml"
] ]
\ No newline at end of file
#!/bin/bash #!/bin/bash
pip3 install --upgrade yapf pip3 install --upgrade yapf
yapf -ir -vv --style ./.style.yapf verl tests single_controller examples python3 -m yapf -ir -vv --style ./.style.yapf verl tests single_controller examples
\ No newline at end of file
...@@ -21,14 +21,33 @@ version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) ...@@ -21,14 +21,33 @@ version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
with open(os.path.join(version_folder, 'verl/version/version')) as f: with open(os.path.join(version_folder, 'verl/version/version')) as f:
__version__ = f.read().strip() __version__ = f.read().strip()
install_requires = [
'accelerate',
'codetiming',
'datasets',
'dill',
'hydra-core',
'numpy',
'pandas',
'peft',
'pyarrow>=15.0.0',
'pybind11',
'pylatexenc',
'ray>=2.10',
'tensordict<0.6',
'transformers',
'vllm<=0.6.3.post1',
'wandb',
]
with open('requirements.txt') as f: TEST_REQUIRES = ['pytest', 'yapf', 'py-spy']
required = f.read().splitlines() PRIME_REQUIRES = ['pyext']
install_requires = [item.strip() for item in required if item.strip()[0] != '#'] GPU_REQUIRES = ['liger-kernel', 'flash-attn']
extras_require = { extras_require = {
'test': ['pytest', 'yapf'], 'test': TEST_REQUIRES,
'prime': ['pyext'], 'prime': PRIME_REQUIRES,
'gpu': GPU_REQUIRES,
} }
from pathlib import Path from pathlib import Path
......
# Copyright 2025 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ast
from pathlib import Path
try:
import tomllib
except ModuleNotFoundError:
import tomli as tomllib
def extract_install_requires(setup_py_path):
"""Extracts the install_requires list from setup.py using AST parsing."""
with open(setup_py_path, "r") as f:
tree = ast.parse(f.read())
# Locate the setup() function call
setup_call = None
for node in ast.walk(tree):
if (isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "setup"):
setup_call = node
break
if not setup_call:
raise ValueError("setup() call not found in setup.py")
# Extract the install_requires keyword argument
install_requires = None
for keyword in setup_call.keywords:
if keyword.arg == "install_requires":
install_requires = keyword.value
break
if not install_requires:
raise ValueError("install_requires not specified in setup() call")
# Handle cases where install_requires is a variable or a direct list
if isinstance(install_requires, ast.Name):
var_name = install_requires.id
requires = []
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and target.id == var_name:
if isinstance(node.value, ast.List):
for element in node.value.elts:
if isinstance(element, ast.Constant):
requires.append(element.value)
return requires
else:
raise ValueError(f"install_requires references non-list variable {var_name}")
raise ValueError(f"Variable {var_name} not found in setup.py")
elif isinstance(install_requires, ast.List):
return [element.value for element in install_requires.elts if isinstance(element, ast.Constant)]
else:
raise ValueError("install_requires must be a list or variable referencing a list")
def test_dependencies_consistent():
# Paths to the project root (adjust if test script is in a subdirectory)
project_root = Path(__file__).parent.parent.parent
# Extract dependencies from setup.py
setup_deps = extract_install_requires(project_root / "setup.py")
# Extract dependencies from pyproject.toml
pyproject_path = project_root / "pyproject.toml"
with open(pyproject_path, "rb") as f:
pyproject_data = tomllib.load(f)
toml_deps = pyproject_data["project"]["dependencies"]
# Assert equality to ensure consistency
assert setup_deps == toml_deps, ("Please make sure dependencies in setup.py and pyproject.toml matches.\n"
f"setup.py: {setup_deps}\n"
f"pyproject.toml: {toml_deps}")
...@@ -13,7 +13,11 @@ ...@@ -13,7 +13,11 @@
# limitations under the License. # limitations under the License.
license_head_bytedance = "Copyright 2024 Bytedance Ltd. and/or its affiliates" license_head_bytedance = "Copyright 2024 Bytedance Ltd. and/or its affiliates"
license_head2_prime = "Copyright 2024 PRIME team and/or its affiliates" license_head_bytedance_25 = "Copyright 2025 Bytedance Ltd. and/or its affiliates"
# Add custom license headers below
license_head_prime = "Copyright 2024 PRIME team and/or its affiliates"
license_headers = [license_head_bytedance, license_head_bytedance_25, license_head_prime]
from pathlib import Path from pathlib import Path
from argparse import ArgumentParser from argparse import ArgumentParser
...@@ -32,5 +36,9 @@ if __name__ == '__main__': ...@@ -32,5 +36,9 @@ if __name__ == '__main__':
with open(path_in_str, 'r', encoding='utf-8') as f: with open(path_in_str, 'r', encoding='utf-8') as f:
file_content = f.read() file_content = f.read()
assert license_head_bytedance in file_content or \ has_license = False
license_head2_prime in file_content, f'file {path_in_str} does not contain license' for lh in license_headers:
if lh in file_content:
has_license = True
break
assert has_license, f'file {path_in_str} does not contain license'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment