| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Copyright 2024 Rivos Inc. |
| 4 | */ |
| 5 | |
| 6 | #include <linux/cpu.h> |
| 7 | #include <linux/cpumask.h> |
| 8 | #include <linux/jump_label.h> |
| 9 | #include <linux/kthread.h> |
| 10 | #include <linux/mm.h> |
| 11 | #include <linux/smp.h> |
| 12 | #include <linux/types.h> |
| 13 | #include <asm/cpufeature.h> |
| 14 | #include <asm/hwprobe.h> |
| 15 | #include <asm/vector.h> |
| 16 | |
| 17 | #include "copy-unaligned.h" |
| 18 | |
| 19 | #define MISALIGNED_ACCESS_JIFFIES_LG2 1 |
| 20 | #define MISALIGNED_BUFFER_SIZE 0x4000 |
| 21 | #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) |
| 22 | #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) |
| 23 | |
| 24 | DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; |
| 25 | DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; |
| 26 | |
| 27 | static long unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; |
| 28 | static long unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; |
| 29 | |
| 30 | static cpumask_t fast_misaligned_access; |
| 31 | |
| 32 | #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS |
| 33 | static int check_unaligned_access(void *param) |
| 34 | { |
| 35 | int cpu = smp_processor_id(); |
| 36 | u64 start_cycles, end_cycles; |
| 37 | u64 word_cycles; |
| 38 | u64 byte_cycles; |
| 39 | int ratio; |
| 40 | unsigned long start_jiffies, now; |
| 41 | struct page *page = param; |
| 42 | void *dst; |
| 43 | void *src; |
| 44 | long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; |
| 45 | |
| 46 | if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) |
| 47 | return 0; |
| 48 | |
| 49 | /* Make an unaligned destination buffer. */ |
| 50 | dst = (void *)((unsigned long)page_address(page) | 0x1); |
| 51 | /* Unalign src as well, but differently (off by 1 + 2 = 3). */ |
| 52 | src = dst + (MISALIGNED_BUFFER_SIZE / 2); |
| 53 | src += 2; |
| 54 | word_cycles = -1ULL; |
| 55 | /* Do a warmup. */ |
| 56 | __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 57 | preempt_disable(); |
| 58 | start_jiffies = jiffies; |
| 59 | while ((now = jiffies) == start_jiffies) |
| 60 | cpu_relax(); |
| 61 | |
| 62 | /* |
| 63 | * For a fixed amount of time, repeatedly try the function, and take |
| 64 | * the best time in cycles as the measurement. |
| 65 | */ |
| 66 | while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 67 | start_cycles = get_cycles64(); |
| 68 | /* Ensure the CSR read can't reorder WRT to the copy. */ |
| 69 | mb(); |
| 70 | __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 71 | /* Ensure the copy ends before the end time is snapped. */ |
| 72 | mb(); |
| 73 | end_cycles = get_cycles64(); |
| 74 | if ((end_cycles - start_cycles) < word_cycles) |
| 75 | word_cycles = end_cycles - start_cycles; |
| 76 | } |
| 77 | |
| 78 | byte_cycles = -1ULL; |
| 79 | __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 80 | start_jiffies = jiffies; |
| 81 | while ((now = jiffies) == start_jiffies) |
| 82 | cpu_relax(); |
| 83 | |
| 84 | while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 85 | start_cycles = get_cycles64(); |
| 86 | mb(); |
| 87 | __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 88 | mb(); |
| 89 | end_cycles = get_cycles64(); |
| 90 | if ((end_cycles - start_cycles) < byte_cycles) |
| 91 | byte_cycles = end_cycles - start_cycles; |
| 92 | } |
| 93 | |
| 94 | preempt_enable(); |
| 95 | |
| 96 | /* Don't divide by zero. */ |
| 97 | if (!word_cycles || !byte_cycles) { |
| 98 | pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n" , |
| 99 | cpu); |
| 100 | |
| 101 | return 0; |
| 102 | } |
| 103 | |
| 104 | if (word_cycles < byte_cycles) |
| 105 | speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; |
| 106 | |
| 107 | ratio = div_u64((byte_cycles * 100), word_cycles); |
| 108 | pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n" , |
| 109 | cpu, |
| 110 | ratio / 100, |
| 111 | ratio % 100, |
| 112 | (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow" ); |
| 113 | |
| 114 | per_cpu(misaligned_access_speed, cpu) = speed; |
| 115 | |
| 116 | /* |
| 117 | * Set the value of fast_misaligned_access of a CPU. These operations |
| 118 | * are atomic to avoid race conditions. |
| 119 | */ |
| 120 | if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) |
| 121 | cpumask_set_cpu(cpu, &fast_misaligned_access); |
| 122 | else |
| 123 | cpumask_clear_cpu(cpu, &fast_misaligned_access); |
| 124 | |
| 125 | return 0; |
| 126 | } |
| 127 | |
| 128 | static void __init check_unaligned_access_nonboot_cpu(void *param) |
| 129 | { |
| 130 | unsigned int cpu = smp_processor_id(); |
| 131 | struct page **pages = param; |
| 132 | |
| 133 | if (smp_processor_id() != 0) |
| 134 | check_unaligned_access(pages[cpu]); |
| 135 | } |
| 136 | |
| 137 | /* Measure unaligned access speed on all CPUs present at boot in parallel. */ |
| 138 | static void __init check_unaligned_access_speed_all_cpus(void) |
| 139 | { |
| 140 | unsigned int cpu; |
| 141 | unsigned int cpu_count = num_possible_cpus(); |
| 142 | struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); |
| 143 | |
| 144 | if (!bufs) { |
| 145 | pr_warn("Allocation failure, not measuring misaligned performance\n" ); |
| 146 | return; |
| 147 | } |
| 148 | |
| 149 | /* |
| 150 | * Allocate separate buffers for each CPU so there's no fighting over |
| 151 | * cache lines. |
| 152 | */ |
| 153 | for_each_cpu(cpu, cpu_online_mask) { |
| 154 | bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); |
| 155 | if (!bufs[cpu]) { |
| 156 | pr_warn("Allocation failure, not measuring misaligned performance\n" ); |
| 157 | goto out; |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | /* Check everybody except 0, who stays behind to tend jiffies. */ |
| 162 | on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); |
| 163 | |
| 164 | /* Check core 0. */ |
| 165 | smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); |
| 166 | |
| 167 | out: |
| 168 | for_each_cpu(cpu, cpu_online_mask) { |
| 169 | if (bufs[cpu]) |
| 170 | __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); |
| 171 | } |
| 172 | |
| 173 | kfree(bufs); |
| 174 | } |
| 175 | #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ |
| 176 | static void __init check_unaligned_access_speed_all_cpus(void) |
| 177 | { |
| 178 | } |
| 179 | #endif |
| 180 | |
| 181 | DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); |
| 182 | |
| 183 | static void modify_unaligned_access_branches(cpumask_t *mask, int weight) |
| 184 | { |
| 185 | if (cpumask_weight(srcp: mask) == weight) |
| 186 | static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); |
| 187 | else |
| 188 | static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key); |
| 189 | } |
| 190 | |
| 191 | static void set_unaligned_access_static_branches_except_cpu(int cpu) |
| 192 | { |
| 193 | /* |
| 194 | * Same as set_unaligned_access_static_branches, except excludes the |
| 195 | * given CPU from the result. When a CPU is hotplugged into an offline |
| 196 | * state, this function is called before the CPU is set to offline in |
| 197 | * the cpumask, and thus the CPU needs to be explicitly excluded. |
| 198 | */ |
| 199 | |
| 200 | cpumask_t fast_except_me; |
| 201 | |
| 202 | cpumask_and(dstp: &fast_except_me, src1p: &fast_misaligned_access, cpu_online_mask); |
| 203 | cpumask_clear_cpu(cpu, dstp: &fast_except_me); |
| 204 | |
| 205 | modify_unaligned_access_branches(mask: &fast_except_me, weight: num_online_cpus() - 1); |
| 206 | } |
| 207 | |
| 208 | static void set_unaligned_access_static_branches(void) |
| 209 | { |
| 210 | /* |
| 211 | * This will be called after check_unaligned_access_all_cpus so the |
| 212 | * result of unaligned access speed for all CPUs will be available. |
| 213 | * |
| 214 | * To avoid the number of online cpus changing between reading |
| 215 | * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be |
| 216 | * held before calling this function. |
| 217 | */ |
| 218 | |
| 219 | cpumask_t fast_and_online; |
| 220 | |
| 221 | cpumask_and(dstp: &fast_and_online, src1p: &fast_misaligned_access, cpu_online_mask); |
| 222 | |
| 223 | modify_unaligned_access_branches(mask: &fast_and_online, weight: num_online_cpus()); |
| 224 | } |
| 225 | |
| 226 | static int __init lock_and_set_unaligned_access_static_branch(void) |
| 227 | { |
| 228 | cpus_read_lock(); |
| 229 | set_unaligned_access_static_branches(); |
| 230 | cpus_read_unlock(); |
| 231 | |
| 232 | return 0; |
| 233 | } |
| 234 | |
| 235 | arch_initcall_sync(lock_and_set_unaligned_access_static_branch); |
| 236 | |
| 237 | static int riscv_online_cpu(unsigned int cpu) |
| 238 | { |
| 239 | int ret = cpu_online_unaligned_access_init(cpu); |
| 240 | |
| 241 | if (ret) |
| 242 | return ret; |
| 243 | |
| 244 | /* We are already set since the last check */ |
| 245 | if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { |
| 246 | goto exit; |
| 247 | } else if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { |
| 248 | per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; |
| 249 | goto exit; |
| 250 | } |
| 251 | |
| 252 | #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS |
| 253 | { |
| 254 | static struct page *buf; |
| 255 | |
| 256 | buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); |
| 257 | if (!buf) { |
| 258 | pr_warn("Allocation failure, not measuring misaligned performance\n" ); |
| 259 | return -ENOMEM; |
| 260 | } |
| 261 | |
| 262 | check_unaligned_access(buf); |
| 263 | __free_pages(buf, MISALIGNED_BUFFER_ORDER); |
| 264 | } |
| 265 | #endif |
| 266 | |
| 267 | exit: |
| 268 | set_unaligned_access_static_branches(); |
| 269 | |
| 270 | return 0; |
| 271 | } |
| 272 | |
| 273 | static int riscv_offline_cpu(unsigned int cpu) |
| 274 | { |
| 275 | set_unaligned_access_static_branches_except_cpu(cpu); |
| 276 | |
| 277 | return 0; |
| 278 | } |
| 279 | |
| 280 | #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS |
| 281 | static void check_vector_unaligned_access(struct work_struct *work __always_unused) |
| 282 | { |
| 283 | int cpu = smp_processor_id(); |
| 284 | u64 start_cycles, end_cycles; |
| 285 | u64 word_cycles; |
| 286 | u64 byte_cycles; |
| 287 | int ratio; |
| 288 | unsigned long start_jiffies, now; |
| 289 | struct page *page; |
| 290 | void *dst; |
| 291 | void *src; |
| 292 | long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; |
| 293 | |
| 294 | if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) |
| 295 | return; |
| 296 | |
| 297 | page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); |
| 298 | if (!page) { |
| 299 | pr_warn("Allocation failure, not measuring vector misaligned performance\n" ); |
| 300 | return; |
| 301 | } |
| 302 | |
| 303 | /* Make an unaligned destination buffer. */ |
| 304 | dst = (void *)((unsigned long)page_address(page) | 0x1); |
| 305 | /* Unalign src as well, but differently (off by 1 + 2 = 3). */ |
| 306 | src = dst + (MISALIGNED_BUFFER_SIZE / 2); |
| 307 | src += 2; |
| 308 | word_cycles = -1ULL; |
| 309 | |
| 310 | /* Do a warmup. */ |
| 311 | kernel_vector_begin(); |
| 312 | __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 313 | |
| 314 | start_jiffies = jiffies; |
| 315 | while ((now = jiffies) == start_jiffies) |
| 316 | cpu_relax(); |
| 317 | |
| 318 | /* |
| 319 | * For a fixed amount of time, repeatedly try the function, and take |
| 320 | * the best time in cycles as the measurement. |
| 321 | */ |
| 322 | while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 323 | start_cycles = get_cycles64(); |
| 324 | /* Ensure the CSR read can't reorder WRT to the copy. */ |
| 325 | mb(); |
| 326 | __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 327 | /* Ensure the copy ends before the end time is snapped. */ |
| 328 | mb(); |
| 329 | end_cycles = get_cycles64(); |
| 330 | if ((end_cycles - start_cycles) < word_cycles) |
| 331 | word_cycles = end_cycles - start_cycles; |
| 332 | } |
| 333 | |
| 334 | byte_cycles = -1ULL; |
| 335 | __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 336 | start_jiffies = jiffies; |
| 337 | while ((now = jiffies) == start_jiffies) |
| 338 | cpu_relax(); |
| 339 | |
| 340 | while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { |
| 341 | start_cycles = get_cycles64(); |
| 342 | /* Ensure the CSR read can't reorder WRT to the copy. */ |
| 343 | mb(); |
| 344 | __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); |
| 345 | /* Ensure the copy ends before the end time is snapped. */ |
| 346 | mb(); |
| 347 | end_cycles = get_cycles64(); |
| 348 | if ((end_cycles - start_cycles) < byte_cycles) |
| 349 | byte_cycles = end_cycles - start_cycles; |
| 350 | } |
| 351 | |
| 352 | kernel_vector_end(); |
| 353 | |
| 354 | /* Don't divide by zero. */ |
| 355 | if (!word_cycles || !byte_cycles) { |
| 356 | pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n" , |
| 357 | cpu); |
| 358 | |
| 359 | goto free; |
| 360 | } |
| 361 | |
| 362 | if (word_cycles < byte_cycles) |
| 363 | speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; |
| 364 | |
| 365 | ratio = div_u64((byte_cycles * 100), word_cycles); |
| 366 | pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n" , |
| 367 | cpu, |
| 368 | ratio / 100, |
| 369 | ratio % 100, |
| 370 | (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow" ); |
| 371 | |
| 372 | per_cpu(vector_misaligned_access, cpu) = speed; |
| 373 | |
| 374 | free: |
| 375 | __free_pages(page, MISALIGNED_BUFFER_ORDER); |
| 376 | } |
| 377 | |
| 378 | /* Measure unaligned access speed on all CPUs present at boot in parallel. */ |
| 379 | static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) |
| 380 | { |
| 381 | schedule_on_each_cpu(check_vector_unaligned_access); |
| 382 | riscv_hwprobe_complete_async_probe(); |
| 383 | |
| 384 | return 0; |
| 385 | } |
| 386 | #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ |
| 387 | static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) |
| 388 | { |
| 389 | return 0; |
| 390 | } |
| 391 | #endif |
| 392 | |
| 393 | static int riscv_online_cpu_vec(unsigned int cpu) |
| 394 | { |
| 395 | if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { |
| 396 | per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; |
| 397 | return 0; |
| 398 | } |
| 399 | |
| 400 | #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS |
| 401 | if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) |
| 402 | return 0; |
| 403 | |
| 404 | check_vector_unaligned_access_emulated(NULL); |
| 405 | check_vector_unaligned_access(NULL); |
| 406 | #endif |
| 407 | |
| 408 | return 0; |
| 409 | } |
| 410 | |
| 411 | static const char * const speed_str[] __initconst = { NULL, NULL, "slow" , "fast" , "unsupported" }; |
| 412 | |
| 413 | static int __init set_unaligned_scalar_speed_param(char *str) |
| 414 | { |
| 415 | if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW])) |
| 416 | unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; |
| 417 | else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_FAST])) |
| 418 | unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; |
| 419 | else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED])) |
| 420 | unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED; |
| 421 | else |
| 422 | return -EINVAL; |
| 423 | |
| 424 | return 1; |
| 425 | } |
| 426 | __setup("unaligned_scalar_speed=" , set_unaligned_scalar_speed_param); |
| 427 | |
| 428 | static int __init set_unaligned_vector_speed_param(char *str) |
| 429 | { |
| 430 | if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW])) |
| 431 | unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; |
| 432 | else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_FAST])) |
| 433 | unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; |
| 434 | else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED])) |
| 435 | unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; |
| 436 | else |
| 437 | return -EINVAL; |
| 438 | |
| 439 | return 1; |
| 440 | } |
| 441 | __setup("unaligned_vector_speed=" , set_unaligned_vector_speed_param); |
| 442 | |
| 443 | static int __init check_unaligned_access_all_cpus(void) |
| 444 | { |
| 445 | int cpu; |
| 446 | |
| 447 | unaligned_access_init(); |
| 448 | |
| 449 | if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { |
| 450 | pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n" , |
| 451 | speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param); |
| 452 | for_each_online_cpu(cpu) |
| 453 | per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; |
| 454 | } else if (!check_unaligned_access_emulated_all_cpus()) { |
| 455 | check_unaligned_access_speed_all_cpus(); |
| 456 | } |
| 457 | |
| 458 | if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { |
| 459 | if (!has_vector() && |
| 460 | unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) { |
| 461 | pr_warn("vector support is not available, ignoring unaligned_vector_speed=%s\n" , |
| 462 | speed_str[unaligned_vector_speed_param]); |
| 463 | } else { |
| 464 | pr_info("vector unaligned access speed set to '%s' (%lu) by command line\n" , |
| 465 | speed_str[unaligned_vector_speed_param], unaligned_vector_speed_param); |
| 466 | } |
| 467 | } |
| 468 | |
| 469 | if (!has_vector()) |
| 470 | unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; |
| 471 | |
| 472 | if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { |
| 473 | for_each_online_cpu(cpu) |
| 474 | per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; |
| 475 | } else if (!check_vector_unaligned_access_emulated_all_cpus() && |
| 476 | IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { |
| 477 | riscv_hwprobe_register_async_probe(); |
| 478 | if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus, |
| 479 | NULL, "vec_check_unaligned_access_speed_all_cpus" ))) { |
| 480 | pr_warn("Failed to create vec_unalign_check kthread\n" ); |
| 481 | riscv_hwprobe_complete_async_probe(); |
| 482 | } |
| 483 | } |
| 484 | |
| 485 | /* |
| 486 | * Setup hotplug callbacks for any new CPUs that come online or go |
| 487 | * offline. |
| 488 | */ |
| 489 | cpuhp_setup_state_nocalls(state: CPUHP_AP_ONLINE_DYN, name: "riscv:online" , |
| 490 | startup: riscv_online_cpu, teardown: riscv_offline_cpu); |
| 491 | cpuhp_setup_state_nocalls(state: CPUHP_AP_ONLINE_DYN, name: "riscv:online" , |
| 492 | startup: riscv_online_cpu_vec, NULL); |
| 493 | |
| 494 | return 0; |
| 495 | } |
| 496 | |
| 497 | arch_initcall(check_unaligned_access_all_cpus); |
| 498 | |