|
| 1 | +/* |
| 2 | + * Benchmark module for traits - related to XDP-hints |
| 3 | + * |
| 4 | + * NOTICE: Compiling this depend kernel changes under-development |
| 5 | + * https://github.com/arthurfabre/linux/tree/afabre/traits-002-bounds-inline |
| 6 | + */ |
| 7 | +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 8 | + |
| 9 | +#include <linux/module.h> |
| 10 | +#include <linux/time_bench.h> |
| 11 | +#include <net/xdp.h> |
| 12 | +#include <net/trait.h> |
| 13 | +#include <linux/mm.h> |
| 14 | + |
| 15 | +static int verbose=1; |
| 16 | + |
| 17 | +/* Makes tests selectable. Useful for perf-record to analyze a single test. |
| 18 | + * Hint: Bash shells support writing binary number like: $((2#101010) |
| 19 | + * |
| 20 | + * # perf record -g modprobe bench_traits_simple run_flags=$((2#10)) |
| 21 | + */ |
| 22 | +static unsigned long run_flags = 0xFFFFFFFF; |
| 23 | +module_param(run_flags, ulong, 0); |
| 24 | +MODULE_PARM_DESC(run_flags, "Limit which bench test that runs"); |
| 25 | +/* Count the bit number from the enum */ |
| 26 | +enum benchmark_bit { |
| 27 | + bit_run_bench_baseline, |
| 28 | + bit_run_bench_func, |
| 29 | + bit_run_bench_func_ptr, |
| 30 | + bit_run_bench_trait_set, |
| 31 | + bit_run_bench_trait_get, |
| 32 | +}; |
| 33 | +#define bit(b) (1 << (b)) |
| 34 | +#define enabled(b) ((run_flags & (bit(b)))) |
| 35 | + |
| 36 | +/* notice time_bench is limited to U32_MAX nr loops */ |
| 37 | +static unsigned long loops = 10000000; |
| 38 | +module_param(loops, ulong, 0); |
| 39 | +MODULE_PARM_DESC(loops, "Specify loops bench will run"); |
| 40 | + |
| 41 | +static unsigned long stay_loaded = 0; |
| 42 | +module_param(stay_loaded, ulong, 0); |
| 43 | +MODULE_PARM_DESC(stay_loaded, "For perf report keep module loaded"); |
| 44 | + |
| 45 | +/* Timing at the nanosec level, we need to know the overhead |
| 46 | + * introduced by the for loop itself */ |
| 47 | +static int time_bench_for_loop( |
| 48 | + struct time_bench_record *rec, void *data) |
| 49 | +{ |
| 50 | + uint64_t loops_cnt = 0; |
| 51 | + int i; |
| 52 | + |
| 53 | + time_bench_start(rec); |
| 54 | + /** Loop to measure **/ |
| 55 | + for (i = 0; i < rec->loops; i++) { |
| 56 | + loops_cnt++; |
| 57 | + barrier(); /* avoid compiler to optimize this loop */ |
| 58 | + } |
| 59 | + time_bench_stop(rec, loops_cnt); |
| 60 | + return loops_cnt; |
| 61 | +} |
| 62 | + |
| 63 | +static void noinline measured_function(volatile int *var) |
| 64 | +{ |
| 65 | + (*var) = 1; |
| 66 | +} |
| 67 | +static int time_func( |
| 68 | + struct time_bench_record *rec, void *data) |
| 69 | +{ |
| 70 | + int i, tmp; |
| 71 | + uint64_t loops_cnt = 0; |
| 72 | + |
| 73 | + time_bench_start(rec); |
| 74 | + /** Loop to measure **/ |
| 75 | + for (i = 0; i < rec->loops; i++) { |
| 76 | + measured_function(&tmp); |
| 77 | + loops_cnt++; |
| 78 | + } |
| 79 | + time_bench_stop(rec, loops_cnt); |
| 80 | + return loops_cnt; |
| 81 | +} |
| 82 | + |
| 83 | +struct func_ptr_ops { |
| 84 | + void (*func)(volatile int *var); |
| 85 | + unsigned int (*func2)(unsigned int count); |
| 86 | +}; |
| 87 | +static struct func_ptr_ops my_func_ptr __read_mostly = { |
| 88 | + .func = measured_function, |
| 89 | +}; |
| 90 | +static int time_func_ptr( |
| 91 | + struct time_bench_record *rec, void *data) |
| 92 | +{ |
| 93 | + int i, tmp; |
| 94 | + uint64_t loops_cnt = 0; |
| 95 | + |
| 96 | + time_bench_start(rec); |
| 97 | + /** Loop to measure **/ |
| 98 | + for (i = 0; i < rec->loops; i++) { |
| 99 | + my_func_ptr.func(&tmp); |
| 100 | + loops_cnt++; |
| 101 | + } |
| 102 | + time_bench_stop(rec, loops_cnt); |
| 103 | + return loops_cnt; |
| 104 | +} |
| 105 | + |
| 106 | +/* WORK AROUND for improper EXPORT_SYMBOL_GPL */ |
| 107 | +int bpf_xdp_trait_set(const struct xdp_buff *xdp, u64 key, |
| 108 | + const void *val, u64 val__sz, u64 flags); |
| 109 | +int bpf_xdp_trait_get(const struct xdp_buff *xdp, u64 key, |
| 110 | + void *val, u64 val__sz); |
| 111 | + |
| 112 | +static int time_trait_set(struct time_bench_record *rec, void *data) |
| 113 | +{ |
| 114 | + uint64_t loops_cnt = 0; |
| 115 | + int i; |
| 116 | + |
| 117 | + u64 key = 1; |
| 118 | + u64 val = 42; |
| 119 | + |
| 120 | + /* XDP create fake packet */ |
| 121 | + gfp_t gfp_mask = (__GFP_ZERO); |
| 122 | + struct page *page; |
| 123 | + void *data_start; |
| 124 | + struct xdp_buff xdp_buff = {}; |
| 125 | + struct xdp_buff *xdp = &xdp_buff; |
| 126 | + |
| 127 | + page = alloc_page(gfp_mask); |
| 128 | + if (!page) |
| 129 | + return 0; |
| 130 | + |
| 131 | + /* XDP setup fake packet */ |
| 132 | + data_start = page_address(page); |
| 133 | + xdp_init_buff(xdp, PAGE_SIZE, NULL); |
| 134 | + xdp_prepare_buff(xdp, data_start, XDP_PACKET_HEADROOM, 1024, true); |
| 135 | + |
| 136 | + time_bench_start(rec); |
| 137 | + /** Loop to measure **/ |
| 138 | + for (i = 0; i < rec->loops; i++) { |
| 139 | + bpf_xdp_trait_set(xdp, key, &val, sizeof(val), 0); |
| 140 | + // bpf_xdp_trait_set(xdp, 2, &val, sizeof(val), 0); |
| 141 | + loops_cnt++; |
| 142 | + } |
| 143 | + time_bench_stop(rec, loops_cnt); |
| 144 | + |
| 145 | + __free_page(page); |
| 146 | + |
| 147 | + return loops_cnt; |
| 148 | +} |
| 149 | + |
| 150 | +static int time_trait_get(struct time_bench_record *rec, void *data) |
| 151 | +{ |
| 152 | + uint64_t loops_cnt = 0; |
| 153 | + int i; |
| 154 | + |
| 155 | + u64 key = 1; |
| 156 | + u64 val = 42; |
| 157 | + u64 val2 = 0; |
| 158 | + |
| 159 | + /* XDP create fake packet */ |
| 160 | + gfp_t gfp_mask = (__GFP_ZERO); |
| 161 | + struct page *page; |
| 162 | + void *data_start; |
| 163 | + struct xdp_buff xdp_buff = {}; |
| 164 | + struct xdp_buff *xdp = &xdp_buff; |
| 165 | + |
| 166 | + page = alloc_page(gfp_mask); |
| 167 | + if (!page) |
| 168 | + return 0; |
| 169 | + |
| 170 | + /* XDP setup fake packet */ |
| 171 | + data_start = page_address(page); |
| 172 | + xdp_init_buff(xdp, PAGE_SIZE, NULL); |
| 173 | + xdp_prepare_buff(xdp, data_start, XDP_PACKET_HEADROOM, 1024, true); |
| 174 | + |
| 175 | + bpf_xdp_trait_set(xdp, key, &val, sizeof(val), 0); |
| 176 | + |
| 177 | + time_bench_start(rec); |
| 178 | + /** Loop to measure **/ |
| 179 | + for (i = 0; i < rec->loops; i++) { |
| 180 | + bpf_xdp_trait_get(xdp, key, &val2, sizeof(val2)); |
| 181 | + loops_cnt++; |
| 182 | + } |
| 183 | + time_bench_stop(rec, loops_cnt); |
| 184 | + |
| 185 | + __free_page(page); |
| 186 | + |
| 187 | + return loops_cnt; |
| 188 | +} |
| 189 | + |
| 190 | +static int run_benchmark_tests(void) |
| 191 | +{ |
| 192 | + uint32_t nr_loops = loops; |
| 193 | + |
| 194 | + /* Baseline tests */ |
| 195 | + if (enabled(bit_run_bench_baseline)) |
| 196 | + time_bench_loop(nr_loops*10, 0, |
| 197 | + "for_loop", NULL, time_bench_for_loop); |
| 198 | + |
| 199 | + /* cost for a local function call */ |
| 200 | + if (enabled(bit_run_bench_func)) |
| 201 | + time_bench_loop(loops, 0, "function_call_cost", |
| 202 | + NULL, time_func); |
| 203 | + |
| 204 | + /* cost for a function pointer invocation (indirect call) |
| 205 | + * - likely side-channel mitigation overhead |
| 206 | + */ |
| 207 | + if (enabled(bit_run_bench_func_ptr)) |
| 208 | + time_bench_loop(loops, 0, "func_ptr_call_cost", |
| 209 | + NULL, time_func_ptr); |
| 210 | + |
| 211 | + if (enabled(bit_run_bench_trait_set)) { |
| 212 | + time_bench_loop(loops, 0, "trait_set", |
| 213 | + NULL, time_trait_set); |
| 214 | + } |
| 215 | + |
| 216 | + if (enabled(bit_run_bench_trait_get)) { |
| 217 | + time_bench_loop(loops, 0, "trait_get", |
| 218 | + NULL, time_trait_get); |
| 219 | + } |
| 220 | + |
| 221 | + return 0; |
| 222 | +} |
| 223 | + |
| 224 | +static int __init bench_traits_simple_module_init(void) |
| 225 | +{ |
| 226 | + if (verbose) |
| 227 | + pr_info("Loaded\n"); |
| 228 | + |
| 229 | + if (loops > U32_MAX) { |
| 230 | + pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", |
| 231 | + loops, U32_MAX); |
| 232 | + return -ECHRNG; |
| 233 | + } |
| 234 | + |
| 235 | + run_benchmark_tests(); |
| 236 | + |
| 237 | + if (stay_loaded) |
| 238 | + return 0; |
| 239 | + else |
| 240 | + return -EAGAIN; // Trick to not fully load module |
| 241 | +} |
| 242 | +module_init(bench_traits_simple_module_init); |
| 243 | + |
| 244 | +static void __exit bench_traits_simple_module_exit(void) |
| 245 | +{ |
| 246 | + if (verbose) |
| 247 | + pr_info("Unloaded\n"); |
| 248 | +} |
| 249 | +module_exit(bench_traits_simple_module_exit); |
| 250 | + |
| 251 | +MODULE_DESCRIPTION("Benchmark of traits"); |
| 252 | +MODULE_AUTHOR("Jesper Dangaard Brouer <hawk@kernel.org>"); |
| 253 | +MODULE_LICENSE("GPL"); |
0 commit comments