-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy path12_hello_world_bitsandbytes.py
More file actions
101 lines (77 loc) · 2.52 KB
/
12_hello_world_bitsandbytes.py
File metadata and controls
101 lines (77 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
ToolBrain Training Example
This script demonstrates the new, ultra-simplified ToolBrain API:
1. Create a smolagent CodeAgent
2. Create brain with Brain() constructor (all parameters as keywords)
3. Train with explicit, self-documenting parameters
This script is similar to the standard hello-world example but running inference with bitsandsbytes to save GPU memory.
This example only run on machines with GPU supported
"""
from smolagents import tool, TransformersModel, CodeAgent
from transformers import BitsAndBytesConfig
from toolbrain import Brain
from toolbrain.rewards import reward_exact_match
# --- 1. Define Tools and Reward Function (User-defined) ---
@tool
def add(a: int, b: int) -> int:
"""
Add two integers.
Args:
a (int): First addend.
b (int): Second addend.
Returns:
int: Sum of a and b.
"""
return a + b
@tool
def multiply(a: int, b: int) -> int:
"""
Multiply two integers.
Args:
a (int): First factor.
b (int): Second factor.
Returns:
int: Product of a and b.
"""
return a * b
# --- 2. Prepare Training Data ---
training_dataset = [
{
"query": "Use the add tool to calculate 5 + 7",
"gold_answer": "12"
},
{
"query": "What is 8 multiplied by 6?",
"gold_answer": "48"
},
# Add more examples here
]
print("🧠 ToolBrain Training Example with Reinforcement Learning")
print("=" * 60)
# 0. set bitsandbytes config for low precision inference
nf4_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4")
# 1. Create agent
model = TransformersModel(
model_id="Qwen/Qwen2.5-0.5B-Instruct", # use a bigger model for better results
max_new_tokens=128,
model_kwargs={"quantization_config": nf4_config},
)
agent = CodeAgent(
model=model,
tools=[add, multiply],
max_steps=1
)
print("✅ Agent created.")
# 2. Create Brain
# This is a simplified version of Brain with default parameters settings, for advanced parameter settings please
# refer to the documentation.
brain = Brain(
agent, # Agent instance
algorithm="GRPO", # Algorithm choice
# Customised reward function is defined here, we use a mocking reward function with value 1.0
# for an exact gold_answer match and 0 otherwise, llm as judge can be used for automatic reward
reward_func=reward_exact_match,
use_bitsandbytes=True
)
# 3. Train the agent with RL for 10 training GRPO steps
brain.train(training_dataset, num_iterations=10)