Skip to content

Commit 8e7451f

Browse files
authored
Merge pull request #48 from netoptimizer/hints_traits-001-basic
Hints: Add basic benchmark module for traits proposal
2 parents d9e7c81 + a1a0111 commit 8e7451f

3 files changed

Lines changed: 258 additions & 0 deletions

File tree

kernel/config.default

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,6 @@ CONFIG_PAGE_BULK_API=n
3535

3636
# New benchmarking modules for testing page_pool
3737
CONFIG_BENCH_PAGE_POOL=m
38+
39+
# Features under development
40+
# CONFIG_BENCH_TRAITS=m

kernel/lib/Kbuild

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,5 @@ obj-$(CONFIG_SKB_ARRAY_TESTS) += skb_array_parallel01.o
3131

3232
obj-$(CONFIG_BENCH_PAGE_POOL) += bench_page_pool_simple.o
3333
obj-$(CONFIG_BENCH_PAGE_POOL) += bench_page_pool_cross_cpu.o
34+
35+
obj-$(CONFIG_BENCH_TRAITS) += bench_traits_simple.o

kernel/lib/bench_traits_simple.c

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
/*
2+
* Benchmark module for traits - related to XDP-hints
3+
*
4+
* NOTICE: Compiling this depend kernel changes under-development
5+
* https://github.com/arthurfabre/linux/tree/afabre/traits-002-bounds-inline
6+
*/
7+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8+
9+
#include <linux/module.h>
10+
#include <linux/time_bench.h>
11+
#include <net/xdp.h>
12+
#include <net/trait.h>
13+
#include <linux/mm.h>
14+
15+
static int verbose=1;
16+
17+
/* Makes tests selectable. Useful for perf-record to analyze a single test.
18+
* Hint: Bash shells support writing binary number like: $((2#101010)
19+
*
20+
* # perf record -g modprobe bench_traits_simple run_flags=$((2#10))
21+
*/
22+
static unsigned long run_flags = 0xFFFFFFFF;
23+
module_param(run_flags, ulong, 0);
24+
MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
25+
/* Count the bit number from the enum */
26+
enum benchmark_bit {
27+
bit_run_bench_baseline,
28+
bit_run_bench_func,
29+
bit_run_bench_func_ptr,
30+
bit_run_bench_trait_set,
31+
bit_run_bench_trait_get,
32+
};
33+
#define bit(b) (1 << (b))
34+
#define enabled(b) ((run_flags & (bit(b))))
35+
36+
/* notice time_bench is limited to U32_MAX nr loops */
37+
static unsigned long loops = 10000000;
38+
module_param(loops, ulong, 0);
39+
MODULE_PARM_DESC(loops, "Specify loops bench will run");
40+
41+
static unsigned long stay_loaded = 0;
42+
module_param(stay_loaded, ulong, 0);
43+
MODULE_PARM_DESC(stay_loaded, "For perf report keep module loaded");
44+
45+
/* Timing at the nanosec level, we need to know the overhead
46+
* introduced by the for loop itself */
47+
static int time_bench_for_loop(
48+
struct time_bench_record *rec, void *data)
49+
{
50+
uint64_t loops_cnt = 0;
51+
int i;
52+
53+
time_bench_start(rec);
54+
/** Loop to measure **/
55+
for (i = 0; i < rec->loops; i++) {
56+
loops_cnt++;
57+
barrier(); /* avoid compiler to optimize this loop */
58+
}
59+
time_bench_stop(rec, loops_cnt);
60+
return loops_cnt;
61+
}
62+
63+
static void noinline measured_function(volatile int *var)
64+
{
65+
(*var) = 1;
66+
}
67+
static int time_func(
68+
struct time_bench_record *rec, void *data)
69+
{
70+
int i, tmp;
71+
uint64_t loops_cnt = 0;
72+
73+
time_bench_start(rec);
74+
/** Loop to measure **/
75+
for (i = 0; i < rec->loops; i++) {
76+
measured_function(&tmp);
77+
loops_cnt++;
78+
}
79+
time_bench_stop(rec, loops_cnt);
80+
return loops_cnt;
81+
}
82+
83+
struct func_ptr_ops {
84+
void (*func)(volatile int *var);
85+
unsigned int (*func2)(unsigned int count);
86+
};
87+
static struct func_ptr_ops my_func_ptr __read_mostly = {
88+
.func = measured_function,
89+
};
90+
static int time_func_ptr(
91+
struct time_bench_record *rec, void *data)
92+
{
93+
int i, tmp;
94+
uint64_t loops_cnt = 0;
95+
96+
time_bench_start(rec);
97+
/** Loop to measure **/
98+
for (i = 0; i < rec->loops; i++) {
99+
my_func_ptr.func(&tmp);
100+
loops_cnt++;
101+
}
102+
time_bench_stop(rec, loops_cnt);
103+
return loops_cnt;
104+
}
105+
106+
/* WORK AROUND for improper EXPORT_SYMBOL_GPL */
107+
int bpf_xdp_trait_set(const struct xdp_buff *xdp, u64 key,
108+
const void *val, u64 val__sz, u64 flags);
109+
int bpf_xdp_trait_get(const struct xdp_buff *xdp, u64 key,
110+
void *val, u64 val__sz);
111+
112+
static int time_trait_set(struct time_bench_record *rec, void *data)
113+
{
114+
uint64_t loops_cnt = 0;
115+
int i;
116+
117+
u64 key = 1;
118+
u64 val = 42;
119+
120+
/* XDP create fake packet */
121+
gfp_t gfp_mask = (__GFP_ZERO);
122+
struct page *page;
123+
void *data_start;
124+
struct xdp_buff xdp_buff = {};
125+
struct xdp_buff *xdp = &xdp_buff;
126+
127+
page = alloc_page(gfp_mask);
128+
if (!page)
129+
return 0;
130+
131+
/* XDP setup fake packet */
132+
data_start = page_address(page);
133+
xdp_init_buff(xdp, PAGE_SIZE, NULL);
134+
xdp_prepare_buff(xdp, data_start, XDP_PACKET_HEADROOM, 1024, true);
135+
136+
time_bench_start(rec);
137+
/** Loop to measure **/
138+
for (i = 0; i < rec->loops; i++) {
139+
bpf_xdp_trait_set(xdp, key, &val, sizeof(val), 0);
140+
// bpf_xdp_trait_set(xdp, 2, &val, sizeof(val), 0);
141+
loops_cnt++;
142+
}
143+
time_bench_stop(rec, loops_cnt);
144+
145+
__free_page(page);
146+
147+
return loops_cnt;
148+
}
149+
150+
static int time_trait_get(struct time_bench_record *rec, void *data)
151+
{
152+
uint64_t loops_cnt = 0;
153+
int i;
154+
155+
u64 key = 1;
156+
u64 val = 42;
157+
u64 val2 = 0;
158+
159+
/* XDP create fake packet */
160+
gfp_t gfp_mask = (__GFP_ZERO);
161+
struct page *page;
162+
void *data_start;
163+
struct xdp_buff xdp_buff = {};
164+
struct xdp_buff *xdp = &xdp_buff;
165+
166+
page = alloc_page(gfp_mask);
167+
if (!page)
168+
return 0;
169+
170+
/* XDP setup fake packet */
171+
data_start = page_address(page);
172+
xdp_init_buff(xdp, PAGE_SIZE, NULL);
173+
xdp_prepare_buff(xdp, data_start, XDP_PACKET_HEADROOM, 1024, true);
174+
175+
bpf_xdp_trait_set(xdp, key, &val, sizeof(val), 0);
176+
177+
time_bench_start(rec);
178+
/** Loop to measure **/
179+
for (i = 0; i < rec->loops; i++) {
180+
bpf_xdp_trait_get(xdp, key, &val2, sizeof(val2));
181+
loops_cnt++;
182+
}
183+
time_bench_stop(rec, loops_cnt);
184+
185+
__free_page(page);
186+
187+
return loops_cnt;
188+
}
189+
190+
static int run_benchmark_tests(void)
191+
{
192+
uint32_t nr_loops = loops;
193+
194+
/* Baseline tests */
195+
if (enabled(bit_run_bench_baseline))
196+
time_bench_loop(nr_loops*10, 0,
197+
"for_loop", NULL, time_bench_for_loop);
198+
199+
/* cost for a local function call */
200+
if (enabled(bit_run_bench_func))
201+
time_bench_loop(loops, 0, "function_call_cost",
202+
NULL, time_func);
203+
204+
/* cost for a function pointer invocation (indirect call)
205+
* - likely side-channel mitigation overhead
206+
*/
207+
if (enabled(bit_run_bench_func_ptr))
208+
time_bench_loop(loops, 0, "func_ptr_call_cost",
209+
NULL, time_func_ptr);
210+
211+
if (enabled(bit_run_bench_trait_set)) {
212+
time_bench_loop(loops, 0, "trait_set",
213+
NULL, time_trait_set);
214+
}
215+
216+
if (enabled(bit_run_bench_trait_get)) {
217+
time_bench_loop(loops, 0, "trait_get",
218+
NULL, time_trait_get);
219+
}
220+
221+
return 0;
222+
}
223+
224+
static int __init bench_traits_simple_module_init(void)
225+
{
226+
if (verbose)
227+
pr_info("Loaded\n");
228+
229+
if (loops > U32_MAX) {
230+
pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n",
231+
loops, U32_MAX);
232+
return -ECHRNG;
233+
}
234+
235+
run_benchmark_tests();
236+
237+
if (stay_loaded)
238+
return 0;
239+
else
240+
return -EAGAIN; // Trick to not fully load module
241+
}
242+
module_init(bench_traits_simple_module_init);
243+
244+
static void __exit bench_traits_simple_module_exit(void)
245+
{
246+
if (verbose)
247+
pr_info("Unloaded\n");
248+
}
249+
module_exit(bench_traits_simple_module_exit);
250+
251+
MODULE_DESCRIPTION("Benchmark of traits");
252+
MODULE_AUTHOR("Jesper Dangaard Brouer <hawk@kernel.org>");
253+
MODULE_LICENSE("GPL");

0 commit comments

Comments
 (0)