| 1 | /* SPDX-License-Identifier: GPL-2.0+ */ |
| 2 | /* |
| 3 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) |
| 4 | * Internal non-public definitions that provide either classic |
| 5 | * or preemptible semantics. |
| 6 | * |
| 7 | * Copyright Red Hat, 2009 |
| 8 | * Copyright IBM Corporation, 2009 |
| 9 | * Copyright SUSE, 2021 |
| 10 | * |
| 11 | * Author: Ingo Molnar <mingo@elte.hu> |
| 12 | * Paul E. McKenney <paulmck@linux.ibm.com> |
| 13 | * Frederic Weisbecker <frederic@kernel.org> |
| 14 | */ |
| 15 | |
| 16 | #ifdef CONFIG_RCU_NOCB_CPU |
| 17 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ |
| 18 | static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ |
| 19 | |
| 20 | static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) |
| 21 | { |
| 22 | /* Race on early boot between thread creation and assignment */ |
| 23 | if (!rdp->nocb_cb_kthread || !rdp->nocb_gp_kthread) |
| 24 | return true; |
| 25 | |
| 26 | if (current == rdp->nocb_cb_kthread || current == rdp->nocb_gp_kthread) |
| 27 | if (in_task()) |
| 28 | return true; |
| 29 | return false; |
| 30 | } |
| 31 | |
| 32 | /* |
| 33 | * Offload callback processing from the boot-time-specified set of CPUs |
| 34 | * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads |
| 35 | * created that pull the callbacks from the corresponding CPU, wait for |
| 36 | * a grace period to elapse, and invoke the callbacks. These kthreads |
| 37 | * are organized into GP kthreads, which manage incoming callbacks, wait for |
| 38 | * grace periods, and awaken CB kthreads, and the CB kthreads, which only |
| 39 | * invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs |
| 40 | * do a wake_up() on their GP kthread when they insert a callback into any |
| 41 | * empty list, unless the rcu_nocb_poll boot parameter has been specified, |
| 42 | * in which case each kthread actively polls its CPU. (Which isn't so great |
| 43 | * for energy efficiency, but which does reduce RCU's overhead on that CPU.) |
| 44 | * |
| 45 | * This is intended to be used in conjunction with Frederic Weisbecker's |
| 46 | * adaptive-idle work, which would seriously reduce OS jitter on CPUs |
| 47 | * running CPU-bound user-mode computations. |
| 48 | * |
| 49 | * Offloading of callbacks can also be used as an energy-efficiency |
| 50 | * measure because CPUs with no RCU callbacks queued are more aggressive |
| 51 | * about entering dyntick-idle mode. |
| 52 | */ |
| 53 | |
| 54 | |
| 55 | /* |
| 56 | * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. |
| 57 | * If the list is invalid, a warning is emitted and all CPUs are offloaded. |
| 58 | */ |
| 59 | static int __init rcu_nocb_setup(char *str) |
| 60 | { |
| 61 | alloc_bootmem_cpumask_var(mask: &rcu_nocb_mask); |
| 62 | if (*str == '=') { |
| 63 | if (cpulist_parse(buf: ++str, dstp: rcu_nocb_mask)) { |
| 64 | pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n" ); |
| 65 | cpumask_setall(dstp: rcu_nocb_mask); |
| 66 | } |
| 67 | } |
| 68 | rcu_state.nocb_is_setup = true; |
| 69 | return 1; |
| 70 | } |
| 71 | __setup("rcu_nocbs" , rcu_nocb_setup); |
| 72 | |
| 73 | static int __init parse_rcu_nocb_poll(char *arg) |
| 74 | { |
| 75 | rcu_nocb_poll = true; |
| 76 | return 1; |
| 77 | } |
| 78 | __setup("rcu_nocb_poll" , parse_rcu_nocb_poll); |
| 79 | |
| 80 | /* |
| 81 | * Don't bother bypassing ->cblist if the call_rcu() rate is low. |
| 82 | * After all, the main point of bypassing is to avoid lock contention |
| 83 | * on ->nocb_lock, which only can happen at high call_rcu() rates. |
| 84 | */ |
| 85 | static int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ; |
| 86 | module_param(nocb_nobypass_lim_per_jiffy, int, 0); |
| 87 | |
| 88 | /* |
| 89 | * Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the |
| 90 | * lock isn't immediately available, perform minimal sanity check. |
| 91 | */ |
| 92 | static void rcu_nocb_bypass_lock(struct rcu_data *rdp) |
| 93 | __acquires(&rdp->nocb_bypass_lock) |
| 94 | { |
| 95 | lockdep_assert_irqs_disabled(); |
| 96 | if (raw_spin_trylock(&rdp->nocb_bypass_lock)) |
| 97 | return; |
| 98 | /* |
| 99 | * Contention expected only when local enqueue collide with |
| 100 | * remote flush from kthreads. |
| 101 | */ |
| 102 | WARN_ON_ONCE(smp_processor_id() != rdp->cpu); |
| 103 | raw_spin_lock(&rdp->nocb_bypass_lock); |
| 104 | } |
| 105 | |
| 106 | /* |
| 107 | * Conditionally acquire the specified rcu_data structure's |
| 108 | * ->nocb_bypass_lock. |
| 109 | */ |
| 110 | static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp) |
| 111 | { |
| 112 | lockdep_assert_irqs_disabled(); |
| 113 | return raw_spin_trylock(&rdp->nocb_bypass_lock); |
| 114 | } |
| 115 | |
| 116 | /* |
| 117 | * Release the specified rcu_data structure's ->nocb_bypass_lock. |
| 118 | */ |
| 119 | static void rcu_nocb_bypass_unlock(struct rcu_data *rdp) |
| 120 | __releases(&rdp->nocb_bypass_lock) |
| 121 | { |
| 122 | lockdep_assert_irqs_disabled(); |
| 123 | raw_spin_unlock(&rdp->nocb_bypass_lock); |
| 124 | } |
| 125 | |
| 126 | /* |
| 127 | * Acquire the specified rcu_data structure's ->nocb_lock, but only |
| 128 | * if it corresponds to a no-CBs CPU. |
| 129 | */ |
| 130 | static void rcu_nocb_lock(struct rcu_data *rdp) |
| 131 | { |
| 132 | lockdep_assert_irqs_disabled(); |
| 133 | if (!rcu_rdp_is_offloaded(rdp)) |
| 134 | return; |
| 135 | raw_spin_lock(&rdp->nocb_lock); |
| 136 | } |
| 137 | |
| 138 | /* |
| 139 | * Release the specified rcu_data structure's ->nocb_lock, but only |
| 140 | * if it corresponds to a no-CBs CPU. |
| 141 | */ |
| 142 | static void rcu_nocb_unlock(struct rcu_data *rdp) |
| 143 | { |
| 144 | if (rcu_rdp_is_offloaded(rdp)) { |
| 145 | lockdep_assert_irqs_disabled(); |
| 146 | raw_spin_unlock(&rdp->nocb_lock); |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | /* |
| 151 | * Release the specified rcu_data structure's ->nocb_lock and restore |
| 152 | * interrupts, but only if it corresponds to a no-CBs CPU. |
| 153 | */ |
| 154 | static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, |
| 155 | unsigned long flags) |
| 156 | { |
| 157 | if (rcu_rdp_is_offloaded(rdp)) { |
| 158 | lockdep_assert_irqs_disabled(); |
| 159 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
| 160 | } else { |
| 161 | local_irq_restore(flags); |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | /* Lockdep check that ->cblist may be safely accessed. */ |
| 166 | static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) |
| 167 | { |
| 168 | lockdep_assert_irqs_disabled(); |
| 169 | if (rcu_rdp_is_offloaded(rdp)) |
| 170 | lockdep_assert_held(&rdp->nocb_lock); |
| 171 | } |
| 172 | |
| 173 | /* |
| 174 | * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended |
| 175 | * grace period. |
| 176 | */ |
| 177 | static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) |
| 178 | { |
| 179 | swake_up_all(q: sq); |
| 180 | } |
| 181 | |
| 182 | static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) |
| 183 | { |
| 184 | return &rnp->nocb_gp_wq[rcu_seq_ctr(s: rnp->gp_seq) & 0x1]; |
| 185 | } |
| 186 | |
| 187 | static void rcu_init_one_nocb(struct rcu_node *rnp) |
| 188 | { |
| 189 | init_swait_queue_head(&rnp->nocb_gp_wq[0]); |
| 190 | init_swait_queue_head(&rnp->nocb_gp_wq[1]); |
| 191 | } |
| 192 | |
| 193 | static bool __wake_nocb_gp(struct rcu_data *rdp_gp, |
| 194 | struct rcu_data *rdp, |
| 195 | bool force, unsigned long flags) |
| 196 | __releases(rdp_gp->nocb_gp_lock) |
| 197 | { |
| 198 | bool needwake = false; |
| 199 | |
| 200 | if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) { |
| 201 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
| 202 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 203 | TPS("AlreadyAwake" )); |
| 204 | return false; |
| 205 | } |
| 206 | |
| 207 | if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { |
| 208 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); |
| 209 | timer_delete(timer: &rdp_gp->nocb_timer); |
| 210 | } |
| 211 | |
| 212 | if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) { |
| 213 | WRITE_ONCE(rdp_gp->nocb_gp_sleep, false); |
| 214 | needwake = true; |
| 215 | } |
| 216 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
| 217 | if (needwake) { |
| 218 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("DoWake" )); |
| 219 | swake_up_one(q: &rdp_gp->nocb_gp_wq); |
| 220 | } |
| 221 | |
| 222 | return needwake; |
| 223 | } |
| 224 | |
| 225 | /* |
| 226 | * Kick the GP kthread for this NOCB group. |
| 227 | */ |
| 228 | static bool wake_nocb_gp(struct rcu_data *rdp, bool force) |
| 229 | { |
| 230 | unsigned long flags; |
| 231 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
| 232 | |
| 233 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
| 234 | return __wake_nocb_gp(rdp_gp, rdp, force, flags); |
| 235 | } |
| 236 | |
| 237 | #ifdef CONFIG_RCU_LAZY |
| 238 | /* |
| 239 | * LAZY_FLUSH_JIFFIES decides the maximum amount of time that |
| 240 | * can elapse before lazy callbacks are flushed. Lazy callbacks |
| 241 | * could be flushed much earlier for a number of other reasons |
| 242 | * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are |
| 243 | * left unsubmitted to RCU after those many jiffies. |
| 244 | */ |
| 245 | #define LAZY_FLUSH_JIFFIES (10 * HZ) |
| 246 | static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES; |
| 247 | |
| 248 | // To be called only from test code. |
| 249 | void rcu_set_jiffies_lazy_flush(unsigned long jif) |
| 250 | { |
| 251 | jiffies_lazy_flush = jif; |
| 252 | } |
| 253 | EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush); |
| 254 | |
| 255 | unsigned long rcu_get_jiffies_lazy_flush(void) |
| 256 | { |
| 257 | return jiffies_lazy_flush; |
| 258 | } |
| 259 | EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush); |
| 260 | #endif |
| 261 | |
| 262 | /* |
| 263 | * Arrange to wake the GP kthread for this NOCB group at some future |
| 264 | * time when it is safe to do so. |
| 265 | */ |
| 266 | static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, |
| 267 | const char *reason) |
| 268 | { |
| 269 | unsigned long flags; |
| 270 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
| 271 | |
| 272 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
| 273 | |
| 274 | /* |
| 275 | * Bypass wakeup overrides previous deferments. In case of |
| 276 | * callback storms, no need to wake up too early. |
| 277 | */ |
| 278 | if (waketype == RCU_NOCB_WAKE_LAZY && |
| 279 | rdp_gp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { |
| 280 | mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + rcu_get_jiffies_lazy_flush()); |
| 281 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); |
| 282 | } else if (waketype == RCU_NOCB_WAKE_BYPASS) { |
| 283 | mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + 2); |
| 284 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); |
| 285 | } else { |
| 286 | if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE) |
| 287 | mod_timer(timer: &rdp_gp->nocb_timer, expires: jiffies + 1); |
| 288 | if (rdp_gp->nocb_defer_wakeup < waketype) |
| 289 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); |
| 290 | } |
| 291 | |
| 292 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
| 293 | |
| 294 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, reason); |
| 295 | } |
| 296 | |
| 297 | /* |
| 298 | * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL. |
| 299 | * However, if there is a callback to be enqueued and if ->nocb_bypass |
| 300 | * proves to be initially empty, just return false because the no-CB GP |
| 301 | * kthread may need to be awakened in this case. |
| 302 | * |
| 303 | * Return true if there was something to be flushed and it succeeded, otherwise |
| 304 | * false. |
| 305 | * |
| 306 | * Note that this function always returns true if rhp is NULL. |
| 307 | */ |
| 308 | static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in, |
| 309 | unsigned long j, bool lazy) |
| 310 | { |
| 311 | struct rcu_cblist rcl; |
| 312 | struct rcu_head *rhp = rhp_in; |
| 313 | |
| 314 | WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); |
| 315 | rcu_lockdep_assert_cblist_protected(rdp); |
| 316 | lockdep_assert_held(&rdp->nocb_bypass_lock); |
| 317 | if (rhp && !rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass)) { |
| 318 | raw_spin_unlock(&rdp->nocb_bypass_lock); |
| 319 | return false; |
| 320 | } |
| 321 | /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ |
| 322 | if (rhp) |
| 323 | rcu_segcblist_inc_len(rsclp: &rdp->cblist); /* Must precede enqueue. */ |
| 324 | |
| 325 | /* |
| 326 | * If the new CB requested was a lazy one, queue it onto the main |
| 327 | * ->cblist so that we can take advantage of the grace-period that will |
| 328 | * happen regardless. But queue it onto the bypass list first so that |
| 329 | * the lazy CB is ordered with the existing CBs in the bypass list. |
| 330 | */ |
| 331 | if (lazy && rhp) { |
| 332 | rcu_cblist_enqueue(rclp: &rdp->nocb_bypass, rhp); |
| 333 | rhp = NULL; |
| 334 | } |
| 335 | rcu_cblist_flush_enqueue(drclp: &rcl, srclp: &rdp->nocb_bypass, rhp); |
| 336 | WRITE_ONCE(rdp->lazy_len, 0); |
| 337 | |
| 338 | rcu_segcblist_insert_pend_cbs(rsclp: &rdp->cblist, rclp: &rcl); |
| 339 | WRITE_ONCE(rdp->nocb_bypass_first, j); |
| 340 | rcu_nocb_bypass_unlock(rdp); |
| 341 | return true; |
| 342 | } |
| 343 | |
| 344 | /* |
| 345 | * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL. |
| 346 | * However, if there is a callback to be enqueued and if ->nocb_bypass |
| 347 | * proves to be initially empty, just return false because the no-CB GP |
| 348 | * kthread may need to be awakened in this case. |
| 349 | * |
| 350 | * Note that this function always returns true if rhp is NULL. |
| 351 | */ |
| 352 | static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, |
| 353 | unsigned long j, bool lazy) |
| 354 | { |
| 355 | if (!rcu_rdp_is_offloaded(rdp)) |
| 356 | return true; |
| 357 | rcu_lockdep_assert_cblist_protected(rdp); |
| 358 | rcu_nocb_bypass_lock(rdp); |
| 359 | return rcu_nocb_do_flush_bypass(rdp, rhp_in: rhp, j, lazy); |
| 360 | } |
| 361 | |
| 362 | /* |
| 363 | * If the ->nocb_bypass_lock is immediately available, flush the |
| 364 | * ->nocb_bypass queue into ->cblist. |
| 365 | */ |
| 366 | static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) |
| 367 | { |
| 368 | rcu_lockdep_assert_cblist_protected(rdp); |
| 369 | if (!rcu_rdp_is_offloaded(rdp) || |
| 370 | !rcu_nocb_bypass_trylock(rdp)) |
| 371 | return; |
| 372 | WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); |
| 373 | } |
| 374 | |
| 375 | /* |
| 376 | * See whether it is appropriate to use the ->nocb_bypass list in order |
| 377 | * to control contention on ->nocb_lock. A limited number of direct |
| 378 | * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass |
| 379 | * is non-empty, further callbacks must be placed into ->nocb_bypass, |
| 380 | * otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch |
| 381 | * back to direct use of ->cblist. However, ->nocb_bypass should not be |
| 382 | * used if ->cblist is empty, because otherwise callbacks can be stranded |
| 383 | * on ->nocb_bypass because we cannot count on the current CPU ever again |
| 384 | * invoking call_rcu(). The general rule is that if ->nocb_bypass is |
| 385 | * non-empty, the corresponding no-CBs grace-period kthread must not be |
| 386 | * in an indefinite sleep state. |
| 387 | * |
| 388 | * Finally, it is not permitted to use the bypass during early boot, |
| 389 | * as doing so would confuse the auto-initialization code. Besides |
| 390 | * which, there is no point in worrying about lock contention while |
| 391 | * there is only one CPU in operation. |
| 392 | */ |
| 393 | static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, |
| 394 | bool *was_alldone, unsigned long flags, |
| 395 | bool lazy) |
| 396 | { |
| 397 | unsigned long c; |
| 398 | unsigned long cur_gp_seq; |
| 399 | unsigned long j = jiffies; |
| 400 | long ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
| 401 | bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); |
| 402 | |
| 403 | lockdep_assert_irqs_disabled(); |
| 404 | |
| 405 | // Pure softirq/rcuc based processing: no bypassing, no |
| 406 | // locking. |
| 407 | if (!rcu_rdp_is_offloaded(rdp)) { |
| 408 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
| 409 | return false; |
| 410 | } |
| 411 | |
| 412 | // Don't use ->nocb_bypass during early boot. |
| 413 | if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) { |
| 414 | rcu_nocb_lock(rdp); |
| 415 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
| 416 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
| 417 | return false; |
| 418 | } |
| 419 | |
| 420 | // If we have advanced to a new jiffy, reset counts to allow |
| 421 | // moving back from ->nocb_bypass to ->cblist. |
| 422 | if (j == rdp->nocb_nobypass_last) { |
| 423 | c = rdp->nocb_nobypass_count + 1; |
| 424 | } else { |
| 425 | WRITE_ONCE(rdp->nocb_nobypass_last, j); |
| 426 | c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy; |
| 427 | if (ULONG_CMP_LT(rdp->nocb_nobypass_count, |
| 428 | nocb_nobypass_lim_per_jiffy)) |
| 429 | c = 0; |
| 430 | else if (c > nocb_nobypass_lim_per_jiffy) |
| 431 | c = nocb_nobypass_lim_per_jiffy; |
| 432 | } |
| 433 | WRITE_ONCE(rdp->nocb_nobypass_count, c); |
| 434 | |
| 435 | // If there hasn't yet been all that many ->cblist enqueues |
| 436 | // this jiffy, tell the caller to enqueue onto ->cblist. But flush |
| 437 | // ->nocb_bypass first. |
| 438 | // Lazy CBs throttle this back and do immediate bypass queuing. |
| 439 | if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) { |
| 440 | rcu_nocb_lock(rdp); |
| 441 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
| 442 | if (*was_alldone) |
| 443 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 444 | TPS("FirstQ" )); |
| 445 | |
| 446 | WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false)); |
| 447 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
| 448 | return false; // Caller must enqueue the callback. |
| 449 | } |
| 450 | |
| 451 | // If ->nocb_bypass has been used too long or is too full, |
| 452 | // flush ->nocb_bypass to ->cblist. |
| 453 | if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || |
| 454 | (ncbs && bypass_is_lazy && |
| 455 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) || |
| 456 | ncbs >= qhimark) { |
| 457 | rcu_nocb_lock(rdp); |
| 458 | *was_alldone = !rcu_segcblist_pend_cbs(rsclp: &rdp->cblist); |
| 459 | |
| 460 | if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) { |
| 461 | if (*was_alldone) |
| 462 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 463 | TPS("FirstQ" )); |
| 464 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
| 465 | return false; // Caller must enqueue the callback. |
| 466 | } |
| 467 | if (j != rdp->nocb_gp_adv_time && |
| 468 | rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) && |
| 469 | rcu_seq_done(sp: &rdp->mynode->gp_seq, s: cur_gp_seq)) { |
| 470 | rcu_advance_cbs_nowake(rnp: rdp->mynode, rdp); |
| 471 | rdp->nocb_gp_adv_time = j; |
| 472 | } |
| 473 | |
| 474 | // The flush succeeded and we moved CBs into the regular list. |
| 475 | // Don't wait for the wake up timer as it may be too far ahead. |
| 476 | // Wake up the GP thread now instead, if the cblist was empty. |
| 477 | __call_rcu_nocb_wake(rdp, was_empty: *was_alldone, flags); |
| 478 | |
| 479 | return true; // Callback already enqueued. |
| 480 | } |
| 481 | |
| 482 | // We need to use the bypass. |
| 483 | rcu_nocb_bypass_lock(rdp); |
| 484 | ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
| 485 | rcu_segcblist_inc_len(rsclp: &rdp->cblist); /* Must precede enqueue. */ |
| 486 | rcu_cblist_enqueue(rclp: &rdp->nocb_bypass, rhp); |
| 487 | |
| 488 | if (lazy) |
| 489 | WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1); |
| 490 | |
| 491 | if (!ncbs) { |
| 492 | WRITE_ONCE(rdp->nocb_bypass_first, j); |
| 493 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("FirstBQ" )); |
| 494 | } |
| 495 | rcu_nocb_bypass_unlock(rdp); |
| 496 | |
| 497 | // A wake up of the grace period kthread or timer adjustment |
| 498 | // needs to be done only if: |
| 499 | // 1. Bypass list was fully empty before (this is the first |
| 500 | // bypass list entry), or: |
| 501 | // 2. Both of these conditions are met: |
| 502 | // a. The bypass list previously had only lazy CBs, and: |
| 503 | // b. The new CB is non-lazy. |
| 504 | if (!ncbs || (bypass_is_lazy && !lazy)) { |
| 505 | // No-CBs GP kthread might be indefinitely asleep, if so, wake. |
| 506 | rcu_nocb_lock(rdp); // Rare during call_rcu() flood. |
| 507 | if (!rcu_segcblist_pend_cbs(rsclp: &rdp->cblist)) { |
| 508 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 509 | TPS("FirstBQwake" )); |
| 510 | __call_rcu_nocb_wake(rdp, was_empty: true, flags); |
| 511 | } else { |
| 512 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 513 | TPS("FirstBQnoWake" )); |
| 514 | rcu_nocb_unlock(rdp); |
| 515 | } |
| 516 | } |
| 517 | return true; // Callback already enqueued. |
| 518 | } |
| 519 | |
| 520 | /* |
| 521 | * Awaken the no-CBs grace-period kthread if needed, either due to it |
| 522 | * legitimately being asleep or due to overload conditions. |
| 523 | * |
| 524 | * If warranted, also wake up the kthread servicing this CPUs queues. |
| 525 | */ |
| 526 | static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, |
| 527 | unsigned long flags) |
| 528 | __releases(rdp->nocb_lock) |
| 529 | { |
| 530 | long bypass_len; |
| 531 | unsigned long cur_gp_seq; |
| 532 | unsigned long j; |
| 533 | long lazy_len; |
| 534 | long len; |
| 535 | struct task_struct *t; |
| 536 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
| 537 | |
| 538 | // If we are being polled or there is no kthread, just leave. |
| 539 | t = READ_ONCE(rdp->nocb_gp_kthread); |
| 540 | if (rcu_nocb_poll || !t) { |
| 541 | rcu_nocb_unlock(rdp); |
| 542 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 543 | TPS("WakeNotPoll" )); |
| 544 | return; |
| 545 | } |
| 546 | // Need to actually to a wakeup. |
| 547 | len = rcu_segcblist_n_cbs(rsclp: &rdp->cblist); |
| 548 | bypass_len = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
| 549 | lazy_len = READ_ONCE(rdp->lazy_len); |
| 550 | if (was_alldone) { |
| 551 | rdp->qlen_last_fqs_check = len; |
| 552 | // Only lazy CBs in bypass list |
| 553 | if (lazy_len && bypass_len == lazy_len) { |
| 554 | rcu_nocb_unlock(rdp); |
| 555 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, |
| 556 | TPS("WakeLazy" )); |
| 557 | } else if (!irqs_disabled_flags(flags)) { |
| 558 | /* ... if queue was empty ... */ |
| 559 | rcu_nocb_unlock(rdp); |
| 560 | wake_nocb_gp(rdp, force: false); |
| 561 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 562 | TPS("WakeEmpty" )); |
| 563 | } else { |
| 564 | rcu_nocb_unlock(rdp); |
| 565 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, |
| 566 | TPS("WakeEmptyIsDeferred" )); |
| 567 | } |
| 568 | } else if (len > rdp->qlen_last_fqs_check + qhimark) { |
| 569 | /* ... or if many callbacks queued. */ |
| 570 | rdp->qlen_last_fqs_check = len; |
| 571 | j = jiffies; |
| 572 | if (j != rdp->nocb_gp_adv_time && |
| 573 | rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) && |
| 574 | rcu_seq_done(sp: &rdp->mynode->gp_seq, s: cur_gp_seq)) { |
| 575 | rcu_advance_cbs_nowake(rnp: rdp->mynode, rdp); |
| 576 | rdp->nocb_gp_adv_time = j; |
| 577 | } |
| 578 | smp_mb(); /* Enqueue before timer_pending(). */ |
| 579 | if ((rdp->nocb_cb_sleep || |
| 580 | !rcu_segcblist_ready_cbs(rsclp: &rdp->cblist)) && |
| 581 | !timer_pending(timer: &rdp_gp->nocb_timer)) { |
| 582 | rcu_nocb_unlock(rdp); |
| 583 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE, |
| 584 | TPS("WakeOvfIsDeferred" )); |
| 585 | } else { |
| 586 | rcu_nocb_unlock(rdp); |
| 587 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WakeNot" )); |
| 588 | } |
| 589 | } else { |
| 590 | rcu_nocb_unlock(rdp); |
| 591 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WakeNot" )); |
| 592 | } |
| 593 | } |
| 594 | |
| 595 | static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, |
| 596 | rcu_callback_t func, unsigned long flags, bool lazy) |
| 597 | { |
| 598 | bool was_alldone; |
| 599 | |
| 600 | if (!rcu_nocb_try_bypass(rdp, rhp: head, was_alldone: &was_alldone, flags, lazy)) { |
| 601 | /* Not enqueued on bypass but locked, do regular enqueue */ |
| 602 | rcutree_enqueue(rdp, head, func); |
| 603 | __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | static void nocb_gp_toggle_rdp(struct rcu_data *rdp_gp, struct rcu_data *rdp) |
| 608 | { |
| 609 | struct rcu_segcblist *cblist = &rdp->cblist; |
| 610 | unsigned long flags; |
| 611 | |
| 612 | /* |
| 613 | * Locking orders future de-offloaded callbacks enqueue against previous |
| 614 | * handling of this rdp. Ie: Make sure rcuog is done with this rdp before |
| 615 | * deoffloaded callbacks can be enqueued. |
| 616 | */ |
| 617 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
| 618 | if (!rcu_segcblist_test_flags(rsclp: cblist, SEGCBLIST_OFFLOADED)) { |
| 619 | /* |
| 620 | * Offloading. Set our flag and notify the offload worker. |
| 621 | * We will handle this rdp until it ever gets de-offloaded. |
| 622 | */ |
| 623 | list_add_tail(new: &rdp->nocb_entry_rdp, head: &rdp_gp->nocb_head_rdp); |
| 624 | rcu_segcblist_set_flags(rsclp: cblist, SEGCBLIST_OFFLOADED); |
| 625 | } else { |
| 626 | /* |
| 627 | * De-offloading. Clear our flag and notify the de-offload worker. |
| 628 | * We will ignore this rdp until it ever gets re-offloaded. |
| 629 | */ |
| 630 | list_del(entry: &rdp->nocb_entry_rdp); |
| 631 | rcu_segcblist_clear_flags(rsclp: cblist, SEGCBLIST_OFFLOADED); |
| 632 | } |
| 633 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
| 634 | } |
| 635 | |
| 636 | static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) |
| 637 | { |
| 638 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("Sleep" )); |
| 639 | swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq, |
| 640 | !READ_ONCE(my_rdp->nocb_gp_sleep)); |
| 641 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("EndSleep" )); |
| 642 | } |
| 643 | |
| 644 | /* |
| 645 | * No-CBs GP kthreads come here to wait for additional callbacks to show up |
| 646 | * or for grace periods to end. |
| 647 | */ |
| 648 | static void nocb_gp_wait(struct rcu_data *my_rdp) |
| 649 | { |
| 650 | bool bypass = false; |
| 651 | int __maybe_unused cpu = my_rdp->cpu; |
| 652 | unsigned long cur_gp_seq; |
| 653 | unsigned long flags; |
| 654 | bool gotcbs = false; |
| 655 | unsigned long j = jiffies; |
| 656 | bool lazy = false; |
| 657 | bool needwait_gp = false; // This prevents actual uninitialized use. |
| 658 | bool needwake; |
| 659 | bool needwake_gp; |
| 660 | struct rcu_data *rdp, *rdp_toggling = NULL; |
| 661 | struct rcu_node *rnp; |
| 662 | unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning. |
| 663 | bool wasempty = false; |
| 664 | |
| 665 | /* |
| 666 | * Each pass through the following loop checks for CBs and for the |
| 667 | * nearest grace period (if any) to wait for next. The CB kthreads |
| 668 | * and the global grace-period kthread are awakened if needed. |
| 669 | */ |
| 670 | WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp); |
| 671 | /* |
| 672 | * An rcu_data structure is removed from the list after its |
| 673 | * CPU is de-offloaded and added to the list before that CPU is |
| 674 | * (re-)offloaded. If the following loop happens to be referencing |
| 675 | * that rcu_data structure during the time that the corresponding |
| 676 | * CPU is de-offloaded and then immediately re-offloaded, this |
| 677 | * loop's rdp pointer will be carried to the end of the list by |
| 678 | * the resulting pair of list operations. This can cause the loop |
| 679 | * to skip over some of the rcu_data structures that were supposed |
| 680 | * to have been scanned. Fortunately a new iteration through the |
| 681 | * entire loop is forced after a given CPU's rcu_data structure |
| 682 | * is added to the list, so the skipped-over rcu_data structures |
| 683 | * won't be ignored for long. |
| 684 | */ |
| 685 | list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) { |
| 686 | long bypass_ncbs; |
| 687 | bool flush_bypass = false; |
| 688 | long lazy_ncbs; |
| 689 | |
| 690 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("Check" )); |
| 691 | rcu_nocb_lock_irqsave(rdp, flags); |
| 692 | lockdep_assert_held(&rdp->nocb_lock); |
| 693 | bypass_ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
| 694 | lazy_ncbs = READ_ONCE(rdp->lazy_len); |
| 695 | |
| 696 | if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && |
| 697 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) || |
| 698 | bypass_ncbs > 2 * qhimark)) { |
| 699 | flush_bypass = true; |
| 700 | } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && |
| 701 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || |
| 702 | bypass_ncbs > 2 * qhimark)) { |
| 703 | flush_bypass = true; |
| 704 | } else if (!bypass_ncbs && rcu_segcblist_empty(rsclp: &rdp->cblist)) { |
| 705 | rcu_nocb_unlock_irqrestore(rdp, flags); |
| 706 | continue; /* No callbacks here, try next. */ |
| 707 | } |
| 708 | |
| 709 | if (flush_bypass) { |
| 710 | // Bypass full or old, so flush it. |
| 711 | (void)rcu_nocb_try_flush_bypass(rdp, j); |
| 712 | bypass_ncbs = rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass); |
| 713 | lazy_ncbs = READ_ONCE(rdp->lazy_len); |
| 714 | } |
| 715 | |
| 716 | if (bypass_ncbs) { |
| 717 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 718 | reason: bypass_ncbs == lazy_ncbs ? TPS("Lazy" ) : TPS("Bypass" )); |
| 719 | if (bypass_ncbs == lazy_ncbs) |
| 720 | lazy = true; |
| 721 | else |
| 722 | bypass = true; |
| 723 | } |
| 724 | rnp = rdp->mynode; |
| 725 | |
| 726 | // Advance callbacks if helpful and low contention. |
| 727 | needwake_gp = false; |
| 728 | if (!rcu_segcblist_restempty(rsclp: &rdp->cblist, |
| 729 | RCU_NEXT_READY_TAIL) || |
| 730 | (rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq) && |
| 731 | rcu_seq_done(sp: &rnp->gp_seq, s: cur_gp_seq))) { |
| 732 | raw_spin_lock_rcu_node(rnp); /* irqs disabled. */ |
| 733 | needwake_gp = rcu_advance_cbs(rnp, rdp); |
| 734 | wasempty = rcu_segcblist_restempty(rsclp: &rdp->cblist, |
| 735 | RCU_NEXT_READY_TAIL); |
| 736 | raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */ |
| 737 | } |
| 738 | // Need to wait on some grace period? |
| 739 | WARN_ON_ONCE(wasempty && |
| 740 | !rcu_segcblist_restempty(&rdp->cblist, |
| 741 | RCU_NEXT_READY_TAIL)); |
| 742 | if (rcu_segcblist_nextgp(rsclp: &rdp->cblist, lp: &cur_gp_seq)) { |
| 743 | if (!needwait_gp || |
| 744 | ULONG_CMP_LT(cur_gp_seq, wait_gp_seq)) |
| 745 | wait_gp_seq = cur_gp_seq; |
| 746 | needwait_gp = true; |
| 747 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, |
| 748 | TPS("NeedWaitGP" )); |
| 749 | } |
| 750 | if (rcu_segcblist_ready_cbs(rsclp: &rdp->cblist)) { |
| 751 | needwake = rdp->nocb_cb_sleep; |
| 752 | WRITE_ONCE(rdp->nocb_cb_sleep, false); |
| 753 | } else { |
| 754 | needwake = false; |
| 755 | } |
| 756 | rcu_nocb_unlock_irqrestore(rdp, flags); |
| 757 | if (needwake) { |
| 758 | swake_up_one(q: &rdp->nocb_cb_wq); |
| 759 | gotcbs = true; |
| 760 | } |
| 761 | if (needwake_gp) |
| 762 | rcu_gp_kthread_wake(); |
| 763 | } |
| 764 | |
| 765 | my_rdp->nocb_gp_bypass = bypass; |
| 766 | my_rdp->nocb_gp_gp = needwait_gp; |
| 767 | my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; |
| 768 | |
| 769 | // At least one child with non-empty ->nocb_bypass, so set |
| 770 | // timer in order to avoid stranding its callbacks. |
| 771 | if (!rcu_nocb_poll) { |
| 772 | // If bypass list only has lazy CBs. Add a deferred lazy wake up. |
| 773 | if (lazy && !bypass) { |
| 774 | wake_nocb_gp_defer(rdp: my_rdp, RCU_NOCB_WAKE_LAZY, |
| 775 | TPS("WakeLazyIsDeferred" )); |
| 776 | // Otherwise add a deferred bypass wake up. |
| 777 | } else if (bypass) { |
| 778 | wake_nocb_gp_defer(rdp: my_rdp, RCU_NOCB_WAKE_BYPASS, |
| 779 | TPS("WakeBypassIsDeferred" )); |
| 780 | } |
| 781 | } |
| 782 | |
| 783 | if (rcu_nocb_poll) { |
| 784 | /* Polling, so trace if first poll in the series. */ |
| 785 | if (gotcbs) |
| 786 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu, TPS("Poll" )); |
| 787 | if (list_empty(head: &my_rdp->nocb_head_rdp)) { |
| 788 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); |
| 789 | if (!my_rdp->nocb_toggling_rdp) |
| 790 | WRITE_ONCE(my_rdp->nocb_gp_sleep, true); |
| 791 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); |
| 792 | /* Wait for any offloading rdp */ |
| 793 | nocb_gp_sleep(my_rdp, cpu); |
| 794 | } else { |
| 795 | schedule_timeout_idle(timeout: 1); |
| 796 | } |
| 797 | } else if (!needwait_gp) { |
| 798 | /* Wait for callbacks to appear. */ |
| 799 | nocb_gp_sleep(my_rdp, cpu); |
| 800 | } else { |
| 801 | rnp = my_rdp->mynode; |
| 802 | trace_rcu_this_gp(rnp, rdp: my_rdp, gp_seq_req: wait_gp_seq, TPS("StartWait" )); |
| 803 | swait_event_interruptible_exclusive( |
| 804 | rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1], |
| 805 | rcu_seq_done(&rnp->gp_seq, wait_gp_seq) || |
| 806 | !READ_ONCE(my_rdp->nocb_gp_sleep)); |
| 807 | trace_rcu_this_gp(rnp, rdp: my_rdp, gp_seq_req: wait_gp_seq, TPS("EndWait" )); |
| 808 | } |
| 809 | |
| 810 | if (!rcu_nocb_poll) { |
| 811 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); |
| 812 | // (De-)queue an rdp to/from the group if its nocb state is changing |
| 813 | rdp_toggling = my_rdp->nocb_toggling_rdp; |
| 814 | if (rdp_toggling) |
| 815 | my_rdp->nocb_toggling_rdp = NULL; |
| 816 | |
| 817 | if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { |
| 818 | WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); |
| 819 | timer_delete(timer: &my_rdp->nocb_timer); |
| 820 | } |
| 821 | WRITE_ONCE(my_rdp->nocb_gp_sleep, true); |
| 822 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); |
| 823 | } else { |
| 824 | rdp_toggling = READ_ONCE(my_rdp->nocb_toggling_rdp); |
| 825 | if (rdp_toggling) { |
| 826 | /* |
| 827 | * Paranoid locking to make sure nocb_toggling_rdp is well |
| 828 | * reset *before* we (re)set SEGCBLIST_KTHREAD_GP or we could |
| 829 | * race with another round of nocb toggling for this rdp. |
| 830 | * Nocb locking should prevent from that already but we stick |
| 831 | * to paranoia, especially in rare path. |
| 832 | */ |
| 833 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); |
| 834 | my_rdp->nocb_toggling_rdp = NULL; |
| 835 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); |
| 836 | } |
| 837 | } |
| 838 | |
| 839 | if (rdp_toggling) { |
| 840 | nocb_gp_toggle_rdp(rdp_gp: my_rdp, rdp: rdp_toggling); |
| 841 | swake_up_one(q: &rdp_toggling->nocb_state_wq); |
| 842 | } |
| 843 | |
| 844 | my_rdp->nocb_gp_seq = -1; |
| 845 | WARN_ON(signal_pending(current)); |
| 846 | } |
| 847 | |
| 848 | /* |
| 849 | * No-CBs grace-period-wait kthread. There is one of these per group |
| 850 | * of CPUs, but only once at least one CPU in that group has come online |
| 851 | * at least once since boot. This kthread checks for newly posted |
| 852 | * callbacks from any of the CPUs it is responsible for, waits for a |
| 853 | * grace period, then awakens all of the rcu_nocb_cb_kthread() instances |
| 854 | * that then have callback-invocation work to do. |
| 855 | */ |
| 856 | static int rcu_nocb_gp_kthread(void *arg) |
| 857 | { |
| 858 | struct rcu_data *rdp = arg; |
| 859 | |
| 860 | for (;;) { |
| 861 | WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1); |
| 862 | nocb_gp_wait(my_rdp: rdp); |
| 863 | cond_resched_tasks_rcu_qs(); |
| 864 | } |
| 865 | return 0; |
| 866 | } |
| 867 | |
| 868 | static inline bool nocb_cb_wait_cond(struct rcu_data *rdp) |
| 869 | { |
| 870 | return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park(); |
| 871 | } |
| 872 | |
| 873 | /* |
| 874 | * Invoke any ready callbacks from the corresponding no-CBs CPU, |
| 875 | * then, if there are no more, wait for more to appear. |
| 876 | */ |
| 877 | static void nocb_cb_wait(struct rcu_data *rdp) |
| 878 | { |
| 879 | struct rcu_segcblist *cblist = &rdp->cblist; |
| 880 | unsigned long cur_gp_seq; |
| 881 | unsigned long flags; |
| 882 | bool needwake_gp = false; |
| 883 | struct rcu_node *rnp = rdp->mynode; |
| 884 | |
| 885 | swait_event_interruptible_exclusive(rdp->nocb_cb_wq, |
| 886 | nocb_cb_wait_cond(rdp)); |
| 887 | if (kthread_should_park()) { |
| 888 | /* |
| 889 | * kthread_park() must be preceded by an rcu_barrier(). |
| 890 | * But yet another rcu_barrier() might have sneaked in between |
| 891 | * the barrier callback execution and the callbacks counter |
| 892 | * decrement. |
| 893 | */ |
| 894 | if (rdp->nocb_cb_sleep) { |
| 895 | rcu_nocb_lock_irqsave(rdp, flags); |
| 896 | WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)); |
| 897 | rcu_nocb_unlock_irqrestore(rdp, flags); |
| 898 | kthread_parkme(); |
| 899 | } |
| 900 | } else if (READ_ONCE(rdp->nocb_cb_sleep)) { |
| 901 | WARN_ON(signal_pending(current)); |
| 902 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("WokeEmpty" )); |
| 903 | } |
| 904 | |
| 905 | WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); |
| 906 | |
| 907 | local_irq_save(flags); |
| 908 | rcu_momentary_eqs(); |
| 909 | local_irq_restore(flags); |
| 910 | /* |
| 911 | * Disable BH to provide the expected environment. Also, when |
| 912 | * transitioning to/from NOCB mode, a self-requeuing callback might |
| 913 | * be invoked from softirq. A short grace period could cause both |
| 914 | * instances of this callback would execute concurrently. |
| 915 | */ |
| 916 | local_bh_disable(); |
| 917 | rcu_do_batch(rdp); |
| 918 | local_bh_enable(); |
| 919 | lockdep_assert_irqs_enabled(); |
| 920 | rcu_nocb_lock_irqsave(rdp, flags); |
| 921 | if (rcu_segcblist_nextgp(rsclp: cblist, lp: &cur_gp_seq) && |
| 922 | rcu_seq_done(sp: &rnp->gp_seq, s: cur_gp_seq) && |
| 923 | raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */ |
| 924 | needwake_gp = rcu_advance_cbs(rnp: rdp->mynode, rdp); |
| 925 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
| 926 | } |
| 927 | |
| 928 | if (!rcu_segcblist_ready_cbs(rsclp: cblist)) { |
| 929 | WRITE_ONCE(rdp->nocb_cb_sleep, true); |
| 930 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("CBSleep" )); |
| 931 | } else { |
| 932 | WRITE_ONCE(rdp->nocb_cb_sleep, false); |
| 933 | } |
| 934 | |
| 935 | rcu_nocb_unlock_irqrestore(rdp, flags); |
| 936 | if (needwake_gp) |
| 937 | rcu_gp_kthread_wake(); |
| 938 | } |
| 939 | |
| 940 | /* |
| 941 | * Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke |
| 942 | * nocb_cb_wait() to do the dirty work. |
| 943 | */ |
| 944 | static int rcu_nocb_cb_kthread(void *arg) |
| 945 | { |
| 946 | struct rcu_data *rdp = arg; |
| 947 | |
| 948 | // Each pass through this loop does one callback batch, and, |
| 949 | // if there are no more ready callbacks, waits for them. |
| 950 | for (;;) { |
| 951 | nocb_cb_wait(rdp); |
| 952 | cond_resched_tasks_rcu_qs(); |
| 953 | } |
| 954 | return 0; |
| 955 | } |
| 956 | |
| 957 | /* Is a deferred wakeup of rcu_nocb_kthread() required? */ |
| 958 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level) |
| 959 | { |
| 960 | return READ_ONCE(rdp->nocb_defer_wakeup) >= level; |
| 961 | } |
| 962 | |
| 963 | /* Do a deferred wakeup of rcu_nocb_kthread(). */ |
| 964 | static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp, |
| 965 | struct rcu_data *rdp, int level, |
| 966 | unsigned long flags) |
| 967 | __releases(rdp_gp->nocb_gp_lock) |
| 968 | { |
| 969 | int ndw; |
| 970 | int ret; |
| 971 | |
| 972 | if (!rcu_nocb_need_deferred_wakeup(rdp: rdp_gp, level)) { |
| 973 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
| 974 | return false; |
| 975 | } |
| 976 | |
| 977 | ndw = rdp_gp->nocb_defer_wakeup; |
| 978 | ret = __wake_nocb_gp(rdp_gp, rdp, force: ndw == RCU_NOCB_WAKE_FORCE, flags); |
| 979 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("DeferredWake" )); |
| 980 | |
| 981 | return ret; |
| 982 | } |
| 983 | |
| 984 | /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ |
| 985 | static void do_nocb_deferred_wakeup_timer(struct timer_list *t) |
| 986 | { |
| 987 | unsigned long flags; |
| 988 | struct rcu_data *rdp = timer_container_of(rdp, t, nocb_timer); |
| 989 | |
| 990 | WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp); |
| 991 | trace_rcu_nocb_wake(rcuname: rcu_state.name, cpu: rdp->cpu, TPS("Timer" )); |
| 992 | |
| 993 | raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags); |
| 994 | smp_mb__after_spinlock(); /* Timer expire before wakeup. */ |
| 995 | do_nocb_deferred_wakeup_common(rdp_gp: rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags); |
| 996 | } |
| 997 | |
| 998 | /* |
| 999 | * Do a deferred wakeup of rcu_nocb_kthread() from fastpath. |
| 1000 | * This means we do an inexact common-case check. Note that if |
| 1001 | * we miss, ->nocb_timer will eventually clean things up. |
| 1002 | */ |
| 1003 | static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) |
| 1004 | { |
| 1005 | unsigned long flags; |
| 1006 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
| 1007 | |
| 1008 | if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp: rdp_gp, RCU_NOCB_WAKE)) |
| 1009 | return false; |
| 1010 | |
| 1011 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
| 1012 | return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags); |
| 1013 | } |
| 1014 | |
| 1015 | void rcu_nocb_flush_deferred_wakeup(void) |
| 1016 | { |
| 1017 | do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); |
| 1018 | } |
| 1019 | EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup); |
| 1020 | |
| 1021 | static int rcu_nocb_queue_toggle_rdp(struct rcu_data *rdp) |
| 1022 | { |
| 1023 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
| 1024 | bool wake_gp = false; |
| 1025 | unsigned long flags; |
| 1026 | |
| 1027 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); |
| 1028 | // Queue this rdp for add/del to/from the list to iterate on rcuog |
| 1029 | WRITE_ONCE(rdp_gp->nocb_toggling_rdp, rdp); |
| 1030 | if (rdp_gp->nocb_gp_sleep) { |
| 1031 | rdp_gp->nocb_gp_sleep = false; |
| 1032 | wake_gp = true; |
| 1033 | } |
| 1034 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); |
| 1035 | |
| 1036 | return wake_gp; |
| 1037 | } |
| 1038 | |
| 1039 | static bool rcu_nocb_rdp_deoffload_wait_cond(struct rcu_data *rdp) |
| 1040 | { |
| 1041 | unsigned long flags; |
| 1042 | bool ret; |
| 1043 | |
| 1044 | /* |
| 1045 | * Locking makes sure rcuog is done handling this rdp before deoffloaded |
| 1046 | * enqueue can happen. Also it keeps the SEGCBLIST_OFFLOADED flag stable |
| 1047 | * while the ->nocb_lock is held. |
| 1048 | */ |
| 1049 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
| 1050 | ret = !rcu_segcblist_test_flags(rsclp: &rdp->cblist, SEGCBLIST_OFFLOADED); |
| 1051 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
| 1052 | |
| 1053 | return ret; |
| 1054 | } |
| 1055 | |
| 1056 | static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp) |
| 1057 | { |
| 1058 | unsigned long flags; |
| 1059 | int wake_gp; |
| 1060 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; |
| 1061 | |
| 1062 | /* CPU must be offline, unless it's early boot */ |
| 1063 | WARN_ON_ONCE(cpu_online(rdp->cpu) && rdp->cpu != raw_smp_processor_id()); |
| 1064 | |
| 1065 | pr_info("De-offloading %d\n" , rdp->cpu); |
| 1066 | |
| 1067 | /* Flush all callbacks from segcblist and bypass */ |
| 1068 | rcu_barrier(); |
| 1069 | |
| 1070 | /* |
| 1071 | * Make sure the rcuoc kthread isn't in the middle of a nocb locked |
| 1072 | * sequence while offloading is deactivated, along with nocb locking. |
| 1073 | */ |
| 1074 | if (rdp->nocb_cb_kthread) |
| 1075 | kthread_park(k: rdp->nocb_cb_kthread); |
| 1076 | |
| 1077 | rcu_nocb_lock_irqsave(rdp, flags); |
| 1078 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
| 1079 | WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)); |
| 1080 | rcu_nocb_unlock_irqrestore(rdp, flags); |
| 1081 | |
| 1082 | wake_gp = rcu_nocb_queue_toggle_rdp(rdp); |
| 1083 | |
| 1084 | mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); |
| 1085 | |
| 1086 | if (rdp_gp->nocb_gp_kthread) { |
| 1087 | if (wake_gp) |
| 1088 | wake_up_process(tsk: rdp_gp->nocb_gp_kthread); |
| 1089 | |
| 1090 | swait_event_exclusive(rdp->nocb_state_wq, |
| 1091 | rcu_nocb_rdp_deoffload_wait_cond(rdp)); |
| 1092 | } else { |
| 1093 | /* |
| 1094 | * No kthread to clear the flags for us or remove the rdp from the nocb list |
| 1095 | * to iterate. Do it here instead. Locking doesn't look stricly necessary |
| 1096 | * but we stick to paranoia in this rare path. |
| 1097 | */ |
| 1098 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
| 1099 | rcu_segcblist_clear_flags(rsclp: &rdp->cblist, SEGCBLIST_OFFLOADED); |
| 1100 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
| 1101 | |
| 1102 | list_del(entry: &rdp->nocb_entry_rdp); |
| 1103 | } |
| 1104 | |
| 1105 | mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex); |
| 1106 | |
| 1107 | return 0; |
| 1108 | } |
| 1109 | |
| 1110 | int rcu_nocb_cpu_deoffload(int cpu) |
| 1111 | { |
| 1112 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
| 1113 | int ret = 0; |
| 1114 | |
| 1115 | cpus_read_lock(); |
| 1116 | mutex_lock(&rcu_state.nocb_mutex); |
| 1117 | if (rcu_rdp_is_offloaded(rdp)) { |
| 1118 | if (!cpu_online(cpu)) { |
| 1119 | ret = rcu_nocb_rdp_deoffload(rdp); |
| 1120 | if (!ret) |
| 1121 | cpumask_clear_cpu(cpu, dstp: rcu_nocb_mask); |
| 1122 | } else { |
| 1123 | pr_info("NOCB: Cannot CB-deoffload online CPU %d\n" , rdp->cpu); |
| 1124 | ret = -EINVAL; |
| 1125 | } |
| 1126 | } |
| 1127 | mutex_unlock(lock: &rcu_state.nocb_mutex); |
| 1128 | cpus_read_unlock(); |
| 1129 | |
| 1130 | return ret; |
| 1131 | } |
| 1132 | EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); |
| 1133 | |
| 1134 | static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp) |
| 1135 | { |
| 1136 | unsigned long flags; |
| 1137 | bool ret; |
| 1138 | |
| 1139 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
| 1140 | ret = rcu_segcblist_test_flags(rsclp: &rdp->cblist, SEGCBLIST_OFFLOADED); |
| 1141 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
| 1142 | |
| 1143 | return ret; |
| 1144 | } |
| 1145 | |
| 1146 | static int rcu_nocb_rdp_offload(struct rcu_data *rdp) |
| 1147 | { |
| 1148 | int wake_gp; |
| 1149 | |
| 1150 | WARN_ON_ONCE(cpu_online(rdp->cpu)); |
| 1151 | /* |
| 1152 | * For now we only support re-offload, ie: the rdp must have been |
| 1153 | * offloaded on boot first. |
| 1154 | */ |
| 1155 | if (!rdp->nocb_gp_rdp) |
| 1156 | return -EINVAL; |
| 1157 | |
| 1158 | if (WARN_ON_ONCE(!rdp->nocb_gp_kthread)) |
| 1159 | return -EINVAL; |
| 1160 | |
| 1161 | pr_info("Offloading %d\n" , rdp->cpu); |
| 1162 | |
| 1163 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); |
| 1164 | WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)); |
| 1165 | |
| 1166 | wake_gp = rcu_nocb_queue_toggle_rdp(rdp); |
| 1167 | if (wake_gp) |
| 1168 | wake_up_process(tsk: rdp->nocb_gp_kthread); |
| 1169 | |
| 1170 | swait_event_exclusive(rdp->nocb_state_wq, |
| 1171 | rcu_nocb_rdp_offload_wait_cond(rdp)); |
| 1172 | |
| 1173 | kthread_unpark(k: rdp->nocb_cb_kthread); |
| 1174 | |
| 1175 | return 0; |
| 1176 | } |
| 1177 | |
| 1178 | int rcu_nocb_cpu_offload(int cpu) |
| 1179 | { |
| 1180 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
| 1181 | int ret = 0; |
| 1182 | |
| 1183 | cpus_read_lock(); |
| 1184 | mutex_lock(&rcu_state.nocb_mutex); |
| 1185 | if (!rcu_rdp_is_offloaded(rdp)) { |
| 1186 | if (!cpu_online(cpu)) { |
| 1187 | ret = rcu_nocb_rdp_offload(rdp); |
| 1188 | if (!ret) |
| 1189 | cpumask_set_cpu(cpu, dstp: rcu_nocb_mask); |
| 1190 | } else { |
| 1191 | pr_info("NOCB: Cannot CB-offload online CPU %d\n" , rdp->cpu); |
| 1192 | ret = -EINVAL; |
| 1193 | } |
| 1194 | } |
| 1195 | mutex_unlock(lock: &rcu_state.nocb_mutex); |
| 1196 | cpus_read_unlock(); |
| 1197 | |
| 1198 | return ret; |
| 1199 | } |
| 1200 | EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); |
| 1201 | |
| 1202 | #ifdef CONFIG_RCU_LAZY |
| 1203 | static unsigned long |
| 1204 | lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
| 1205 | { |
| 1206 | int cpu; |
| 1207 | unsigned long count = 0; |
| 1208 | |
| 1209 | if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) |
| 1210 | return 0; |
| 1211 | |
| 1212 | /* Protect rcu_nocb_mask against concurrent (de-)offloading. */ |
| 1213 | if (!mutex_trylock(&rcu_state.nocb_mutex)) |
| 1214 | return 0; |
| 1215 | |
| 1216 | /* Snapshot count of all CPUs */ |
| 1217 | for_each_cpu(cpu, rcu_nocb_mask) { |
| 1218 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
| 1219 | |
| 1220 | count += READ_ONCE(rdp->lazy_len); |
| 1221 | } |
| 1222 | |
| 1223 | mutex_unlock(lock: &rcu_state.nocb_mutex); |
| 1224 | |
| 1225 | return count ? count : SHRINK_EMPTY; |
| 1226 | } |
| 1227 | |
| 1228 | static unsigned long |
| 1229 | lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
| 1230 | { |
| 1231 | int cpu; |
| 1232 | unsigned long flags; |
| 1233 | unsigned long count = 0; |
| 1234 | |
| 1235 | if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) |
| 1236 | return 0; |
| 1237 | /* |
| 1238 | * Protect against concurrent (de-)offloading. Otherwise nocb locking |
| 1239 | * may be ignored or imbalanced. |
| 1240 | */ |
| 1241 | if (!mutex_trylock(&rcu_state.nocb_mutex)) { |
| 1242 | /* |
| 1243 | * But really don't insist if nocb_mutex is contended since we |
| 1244 | * can't guarantee that it will never engage in a dependency |
| 1245 | * chain involving memory allocation. The lock is seldom contended |
| 1246 | * anyway. |
| 1247 | */ |
| 1248 | return 0; |
| 1249 | } |
| 1250 | |
| 1251 | /* Snapshot count of all CPUs */ |
| 1252 | for_each_cpu(cpu, rcu_nocb_mask) { |
| 1253 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
| 1254 | int _count; |
| 1255 | |
| 1256 | if (WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp))) |
| 1257 | continue; |
| 1258 | |
| 1259 | if (!READ_ONCE(rdp->lazy_len)) |
| 1260 | continue; |
| 1261 | |
| 1262 | rcu_nocb_lock_irqsave(rdp, flags); |
| 1263 | /* |
| 1264 | * Recheck under the nocb lock. Since we are not holding the bypass |
| 1265 | * lock we may still race with increments from the enqueuer but still |
| 1266 | * we know for sure if there is at least one lazy callback. |
| 1267 | */ |
| 1268 | _count = READ_ONCE(rdp->lazy_len); |
| 1269 | if (!_count) { |
| 1270 | rcu_nocb_unlock_irqrestore(rdp, flags); |
| 1271 | continue; |
| 1272 | } |
| 1273 | rcu_nocb_try_flush_bypass(rdp, j: jiffies); |
| 1274 | rcu_nocb_unlock_irqrestore(rdp, flags); |
| 1275 | wake_nocb_gp(rdp, force: false); |
| 1276 | sc->nr_to_scan -= _count; |
| 1277 | count += _count; |
| 1278 | if (sc->nr_to_scan <= 0) |
| 1279 | break; |
| 1280 | } |
| 1281 | |
| 1282 | mutex_unlock(lock: &rcu_state.nocb_mutex); |
| 1283 | |
| 1284 | return count ? count : SHRINK_STOP; |
| 1285 | } |
| 1286 | #endif // #ifdef CONFIG_RCU_LAZY |
| 1287 | |
| 1288 | void __init rcu_init_nohz(void) |
| 1289 | { |
| 1290 | int cpu; |
| 1291 | struct rcu_data *rdp; |
| 1292 | const struct cpumask *cpumask = NULL; |
| 1293 | struct shrinker * __maybe_unused lazy_rcu_shrinker; |
| 1294 | |
| 1295 | #if defined(CONFIG_NO_HZ_FULL) |
| 1296 | if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) |
| 1297 | cpumask = tick_nohz_full_mask; |
| 1298 | #endif |
| 1299 | |
| 1300 | if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) && |
| 1301 | !rcu_state.nocb_is_setup && !cpumask) |
| 1302 | cpumask = cpu_possible_mask; |
| 1303 | |
| 1304 | if (cpumask) { |
| 1305 | if (!cpumask_available(mask: rcu_nocb_mask)) { |
| 1306 | if (!zalloc_cpumask_var(mask: &rcu_nocb_mask, GFP_KERNEL)) { |
| 1307 | pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n" ); |
| 1308 | return; |
| 1309 | } |
| 1310 | } |
| 1311 | |
| 1312 | cpumask_or(dstp: rcu_nocb_mask, src1p: rcu_nocb_mask, src2p: cpumask); |
| 1313 | rcu_state.nocb_is_setup = true; |
| 1314 | } |
| 1315 | |
| 1316 | if (!rcu_state.nocb_is_setup) |
| 1317 | return; |
| 1318 | |
| 1319 | #ifdef CONFIG_RCU_LAZY |
| 1320 | lazy_rcu_shrinker = shrinker_alloc(flags: 0, fmt: "rcu-lazy" ); |
| 1321 | if (!lazy_rcu_shrinker) { |
| 1322 | pr_err("Failed to allocate lazy_rcu shrinker!\n" ); |
| 1323 | } else { |
| 1324 | lazy_rcu_shrinker->count_objects = lazy_rcu_shrink_count; |
| 1325 | lazy_rcu_shrinker->scan_objects = lazy_rcu_shrink_scan; |
| 1326 | |
| 1327 | shrinker_register(shrinker: lazy_rcu_shrinker); |
| 1328 | } |
| 1329 | #endif // #ifdef CONFIG_RCU_LAZY |
| 1330 | |
| 1331 | if (!cpumask_subset(src1p: rcu_nocb_mask, cpu_possible_mask)) { |
| 1332 | pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n" ); |
| 1333 | cpumask_and(dstp: rcu_nocb_mask, cpu_possible_mask, |
| 1334 | src2p: rcu_nocb_mask); |
| 1335 | } |
| 1336 | if (cpumask_empty(srcp: rcu_nocb_mask)) |
| 1337 | pr_info("\tOffload RCU callbacks from CPUs: (none).\n" ); |
| 1338 | else |
| 1339 | pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n" , |
| 1340 | cpumask_pr_args(rcu_nocb_mask)); |
| 1341 | if (rcu_nocb_poll) |
| 1342 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n" ); |
| 1343 | |
| 1344 | for_each_cpu(cpu, rcu_nocb_mask) { |
| 1345 | rdp = per_cpu_ptr(&rcu_data, cpu); |
| 1346 | if (rcu_segcblist_empty(rsclp: &rdp->cblist)) |
| 1347 | rcu_segcblist_init(rsclp: &rdp->cblist); |
| 1348 | rcu_segcblist_set_flags(rsclp: &rdp->cblist, SEGCBLIST_OFFLOADED); |
| 1349 | } |
| 1350 | rcu_organize_nocb_kthreads(); |
| 1351 | } |
| 1352 | |
| 1353 | /* Initialize per-rcu_data variables for no-CBs CPUs. */ |
| 1354 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) |
| 1355 | { |
| 1356 | init_swait_queue_head(&rdp->nocb_cb_wq); |
| 1357 | init_swait_queue_head(&rdp->nocb_gp_wq); |
| 1358 | init_swait_queue_head(&rdp->nocb_state_wq); |
| 1359 | raw_spin_lock_init(&rdp->nocb_lock); |
| 1360 | raw_spin_lock_init(&rdp->nocb_bypass_lock); |
| 1361 | raw_spin_lock_init(&rdp->nocb_gp_lock); |
| 1362 | timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); |
| 1363 | rcu_cblist_init(rclp: &rdp->nocb_bypass); |
| 1364 | WRITE_ONCE(rdp->lazy_len, 0); |
| 1365 | mutex_init(&rdp->nocb_gp_kthread_mutex); |
| 1366 | } |
| 1367 | |
| 1368 | /* |
| 1369 | * If the specified CPU is a no-CBs CPU that does not already have its |
| 1370 | * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread |
| 1371 | * for this CPU's group has not yet been created, spawn it as well. |
| 1372 | */ |
| 1373 | static void rcu_spawn_cpu_nocb_kthread(int cpu) |
| 1374 | { |
| 1375 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
| 1376 | struct rcu_data *rdp_gp; |
| 1377 | struct task_struct *t; |
| 1378 | struct sched_param sp; |
| 1379 | |
| 1380 | if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup) |
| 1381 | return; |
| 1382 | |
| 1383 | /* If there already is an rcuo kthread, then nothing to do. */ |
| 1384 | if (rdp->nocb_cb_kthread) |
| 1385 | return; |
| 1386 | |
| 1387 | /* If we didn't spawn the GP kthread first, reorganize! */ |
| 1388 | sp.sched_priority = kthread_prio; |
| 1389 | rdp_gp = rdp->nocb_gp_rdp; |
| 1390 | mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); |
| 1391 | if (!rdp_gp->nocb_gp_kthread) { |
| 1392 | t = kthread_run(rcu_nocb_gp_kthread, rdp_gp, |
| 1393 | "rcuog/%d" , rdp_gp->cpu); |
| 1394 | if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n" , __func__)) { |
| 1395 | mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex); |
| 1396 | goto err; |
| 1397 | } |
| 1398 | WRITE_ONCE(rdp_gp->nocb_gp_kthread, t); |
| 1399 | if (kthread_prio) |
| 1400 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
| 1401 | } |
| 1402 | mutex_unlock(lock: &rdp_gp->nocb_gp_kthread_mutex); |
| 1403 | |
| 1404 | /* Spawn the kthread for this CPU. */ |
| 1405 | t = kthread_create(rcu_nocb_cb_kthread, rdp, |
| 1406 | "rcuo%c/%d" , rcu_state.abbr, cpu); |
| 1407 | if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n" , __func__)) |
| 1408 | goto err; |
| 1409 | |
| 1410 | if (rcu_rdp_is_offloaded(rdp)) |
| 1411 | wake_up_process(tsk: t); |
| 1412 | else |
| 1413 | kthread_park(k: t); |
| 1414 | |
| 1415 | if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_CB_BOOST) && kthread_prio) |
| 1416 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
| 1417 | |
| 1418 | WRITE_ONCE(rdp->nocb_cb_kthread, t); |
| 1419 | WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread); |
| 1420 | return; |
| 1421 | |
| 1422 | err: |
| 1423 | /* |
| 1424 | * No need to protect against concurrent rcu_barrier() |
| 1425 | * because the number of callbacks should be 0 for a non-boot CPU, |
| 1426 | * therefore rcu_barrier() shouldn't even try to grab the nocb_lock. |
| 1427 | * But hold nocb_mutex to avoid nocb_lock imbalance from shrinker. |
| 1428 | */ |
| 1429 | WARN_ON_ONCE(system_state > SYSTEM_BOOTING && rcu_segcblist_n_cbs(&rdp->cblist)); |
| 1430 | mutex_lock(&rcu_state.nocb_mutex); |
| 1431 | if (rcu_rdp_is_offloaded(rdp)) { |
| 1432 | rcu_nocb_rdp_deoffload(rdp); |
| 1433 | cpumask_clear_cpu(cpu, dstp: rcu_nocb_mask); |
| 1434 | } |
| 1435 | mutex_unlock(lock: &rcu_state.nocb_mutex); |
| 1436 | } |
| 1437 | |
| 1438 | /* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */ |
| 1439 | static int rcu_nocb_gp_stride = -1; |
| 1440 | module_param(rcu_nocb_gp_stride, int, 0444); |
| 1441 | |
| 1442 | /* |
| 1443 | * Initialize GP-CB relationships for all no-CBs CPU. |
| 1444 | */ |
| 1445 | static void __init rcu_organize_nocb_kthreads(void) |
| 1446 | { |
| 1447 | int cpu; |
| 1448 | bool firsttime = true; |
| 1449 | bool gotnocbs = false; |
| 1450 | bool gotnocbscbs = true; |
| 1451 | int ls = rcu_nocb_gp_stride; |
| 1452 | int nl = 0; /* Next GP kthread. */ |
| 1453 | struct rcu_data *rdp; |
| 1454 | struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */ |
| 1455 | |
| 1456 | if (!cpumask_available(mask: rcu_nocb_mask)) |
| 1457 | return; |
| 1458 | if (ls == -1) { |
| 1459 | ls = nr_cpu_ids / int_sqrt(nr_cpu_ids); |
| 1460 | rcu_nocb_gp_stride = ls; |
| 1461 | } |
| 1462 | |
| 1463 | /* |
| 1464 | * Each pass through this loop sets up one rcu_data structure. |
| 1465 | * Should the corresponding CPU come online in the future, then |
| 1466 | * we will spawn the needed set of rcu_nocb_kthread() kthreads. |
| 1467 | */ |
| 1468 | for_each_possible_cpu(cpu) { |
| 1469 | rdp = per_cpu_ptr(&rcu_data, cpu); |
| 1470 | if (rdp->cpu >= nl) { |
| 1471 | /* New GP kthread, set up for CBs & next GP. */ |
| 1472 | gotnocbs = true; |
| 1473 | nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; |
| 1474 | rdp_gp = rdp; |
| 1475 | INIT_LIST_HEAD(list: &rdp->nocb_head_rdp); |
| 1476 | if (dump_tree) { |
| 1477 | if (!firsttime) |
| 1478 | pr_cont("%s\n" , gotnocbscbs |
| 1479 | ? "" : " (self only)" ); |
| 1480 | gotnocbscbs = false; |
| 1481 | firsttime = false; |
| 1482 | pr_alert("%s: No-CB GP kthread CPU %d:" , |
| 1483 | __func__, cpu); |
| 1484 | } |
| 1485 | } else { |
| 1486 | /* Another CB kthread, link to previous GP kthread. */ |
| 1487 | gotnocbscbs = true; |
| 1488 | if (dump_tree) |
| 1489 | pr_cont(" %d" , cpu); |
| 1490 | } |
| 1491 | rdp->nocb_gp_rdp = rdp_gp; |
| 1492 | if (cpumask_test_cpu(cpu, cpumask: rcu_nocb_mask)) |
| 1493 | list_add_tail(new: &rdp->nocb_entry_rdp, head: &rdp_gp->nocb_head_rdp); |
| 1494 | } |
| 1495 | if (gotnocbs && dump_tree) |
| 1496 | pr_cont("%s\n" , gotnocbscbs ? "" : " (self only)" ); |
| 1497 | } |
| 1498 | |
| 1499 | /* |
| 1500 | * Bind the current task to the offloaded CPUs. If there are no offloaded |
| 1501 | * CPUs, leave the task unbound. Splat if the bind attempt fails. |
| 1502 | */ |
| 1503 | void rcu_bind_current_to_nocb(void) |
| 1504 | { |
| 1505 | if (cpumask_available(mask: rcu_nocb_mask) && !cpumask_empty(srcp: rcu_nocb_mask)) |
| 1506 | WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); |
| 1507 | } |
| 1508 | EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); |
| 1509 | |
| 1510 | // The ->on_cpu field is available only in CONFIG_SMP=y, so... |
| 1511 | #ifdef CONFIG_SMP |
| 1512 | static char *show_rcu_should_be_on_cpu(struct task_struct *tsp) |
| 1513 | { |
| 1514 | return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!" : "" ; |
| 1515 | } |
| 1516 | #else // #ifdef CONFIG_SMP |
| 1517 | static char *show_rcu_should_be_on_cpu(struct task_struct *tsp) |
| 1518 | { |
| 1519 | return "" ; |
| 1520 | } |
| 1521 | #endif // #else #ifdef CONFIG_SMP |
| 1522 | |
| 1523 | /* |
| 1524 | * Dump out nocb grace-period kthread state for the specified rcu_data |
| 1525 | * structure. |
| 1526 | */ |
| 1527 | static void show_rcu_nocb_gp_state(struct rcu_data *rdp) |
| 1528 | { |
| 1529 | struct rcu_node *rnp = rdp->mynode; |
| 1530 | |
| 1531 | pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n" , |
| 1532 | rdp->cpu, |
| 1533 | "kK" [!!rdp->nocb_gp_kthread], |
| 1534 | "lL" [raw_spin_is_locked(&rdp->nocb_gp_lock)], |
| 1535 | "dD" [!!rdp->nocb_defer_wakeup], |
| 1536 | "tT" [timer_pending(&rdp->nocb_timer)], |
| 1537 | "sS" [!!rdp->nocb_gp_sleep], |
| 1538 | ".W" [swait_active(&rdp->nocb_gp_wq)], |
| 1539 | ".W" [swait_active(&rnp->nocb_gp_wq[0])], |
| 1540 | ".W" [swait_active(&rnp->nocb_gp_wq[1])], |
| 1541 | ".B" [!!rdp->nocb_gp_bypass], |
| 1542 | ".G" [!!rdp->nocb_gp_gp], |
| 1543 | (long)rdp->nocb_gp_seq, |
| 1544 | rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops), |
| 1545 | rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.', |
| 1546 | rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, |
| 1547 | show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread)); |
| 1548 | } |
| 1549 | |
| 1550 | /* Dump out nocb kthread state for the specified rcu_data structure. */ |
| 1551 | static void show_rcu_nocb_state(struct rcu_data *rdp) |
| 1552 | { |
| 1553 | char bufd[22]; |
| 1554 | char bufw[45]; |
| 1555 | char bufr[45]; |
| 1556 | char bufn[22]; |
| 1557 | char bufb[22]; |
| 1558 | struct rcu_data *nocb_next_rdp; |
| 1559 | struct rcu_segcblist *rsclp = &rdp->cblist; |
| 1560 | bool waslocked; |
| 1561 | bool wassleep; |
| 1562 | |
| 1563 | if (rdp->nocb_gp_rdp == rdp) |
| 1564 | show_rcu_nocb_gp_state(rdp); |
| 1565 | |
| 1566 | if (!rcu_segcblist_is_offloaded(rsclp: &rdp->cblist)) |
| 1567 | return; |
| 1568 | |
| 1569 | nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp, |
| 1570 | &rdp->nocb_entry_rdp, |
| 1571 | typeof(*rdp), |
| 1572 | nocb_entry_rdp); |
| 1573 | |
| 1574 | sprintf(buf: bufd, fmt: "%ld" , rsclp->seglen[RCU_DONE_TAIL]); |
| 1575 | sprintf(buf: bufw, fmt: "%ld(%ld)" , rsclp->seglen[RCU_WAIT_TAIL], rsclp->gp_seq[RCU_WAIT_TAIL]); |
| 1576 | sprintf(buf: bufr, fmt: "%ld(%ld)" , rsclp->seglen[RCU_NEXT_READY_TAIL], |
| 1577 | rsclp->gp_seq[RCU_NEXT_READY_TAIL]); |
| 1578 | sprintf(buf: bufn, fmt: "%ld" , rsclp->seglen[RCU_NEXT_TAIL]); |
| 1579 | sprintf(buf: bufb, fmt: "%ld" , rcu_cblist_n_cbs(rclp: &rdp->nocb_bypass)); |
| 1580 | pr_info(" CB %d^%d->%d %c%c%c%c%c F%ld L%ld C%d %c%s%c%s%c%s%c%s%c%s q%ld %c CPU %d%s\n" , |
| 1581 | rdp->cpu, rdp->nocb_gp_rdp->cpu, |
| 1582 | nocb_next_rdp ? nocb_next_rdp->cpu : -1, |
| 1583 | "kK" [!!rdp->nocb_cb_kthread], |
| 1584 | "bB" [raw_spin_is_locked(&rdp->nocb_bypass_lock)], |
| 1585 | "lL" [raw_spin_is_locked(&rdp->nocb_lock)], |
| 1586 | "sS" [!!rdp->nocb_cb_sleep], |
| 1587 | ".W" [swait_active(&rdp->nocb_cb_wq)], |
| 1588 | jiffies - rdp->nocb_bypass_first, |
| 1589 | jiffies - rdp->nocb_nobypass_last, |
| 1590 | rdp->nocb_nobypass_count, |
| 1591 | ".D" [rcu_segcblist_ready_cbs(rsclp)], |
| 1592 | rcu_segcblist_segempty(rsclp, RCU_DONE_TAIL) ? "" : bufd, |
| 1593 | ".W" [!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)], |
| 1594 | rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw, |
| 1595 | ".R" [!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)], |
| 1596 | rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr, |
| 1597 | ".N" [!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)], |
| 1598 | rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL) ? "" : bufn, |
| 1599 | ".B" [!!rcu_cblist_n_cbs(&rdp->nocb_bypass)], |
| 1600 | !rcu_cblist_n_cbs(&rdp->nocb_bypass) ? "" : bufb, |
| 1601 | rcu_segcblist_n_cbs(&rdp->cblist), |
| 1602 | rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.', |
| 1603 | rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1, |
| 1604 | show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread)); |
| 1605 | |
| 1606 | /* It is OK for GP kthreads to have GP state. */ |
| 1607 | if (rdp->nocb_gp_rdp == rdp) |
| 1608 | return; |
| 1609 | |
| 1610 | waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock); |
| 1611 | wassleep = swait_active(wq: &rdp->nocb_gp_wq); |
| 1612 | if (!rdp->nocb_gp_sleep && !waslocked && !wassleep) |
| 1613 | return; /* Nothing untoward. */ |
| 1614 | |
| 1615 | pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c %c\n" , |
| 1616 | "lL" [waslocked], |
| 1617 | "dD" [!!rdp->nocb_defer_wakeup], |
| 1618 | "sS" [!!rdp->nocb_gp_sleep], |
| 1619 | ".W" [wassleep]); |
| 1620 | } |
| 1621 | |
| 1622 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ |
| 1623 | |
| 1624 | /* No ->nocb_lock to acquire. */ |
| 1625 | static void rcu_nocb_lock(struct rcu_data *rdp) |
| 1626 | { |
| 1627 | } |
| 1628 | |
| 1629 | /* No ->nocb_lock to release. */ |
| 1630 | static void rcu_nocb_unlock(struct rcu_data *rdp) |
| 1631 | { |
| 1632 | } |
| 1633 | |
| 1634 | /* No ->nocb_lock to release. */ |
| 1635 | static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, |
| 1636 | unsigned long flags) |
| 1637 | { |
| 1638 | local_irq_restore(flags); |
| 1639 | } |
| 1640 | |
| 1641 | /* Lockdep check that ->cblist may be safely accessed. */ |
| 1642 | static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) |
| 1643 | { |
| 1644 | lockdep_assert_irqs_disabled(); |
| 1645 | } |
| 1646 | |
| 1647 | static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) |
| 1648 | { |
| 1649 | } |
| 1650 | |
| 1651 | static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) |
| 1652 | { |
| 1653 | return NULL; |
| 1654 | } |
| 1655 | |
| 1656 | static void rcu_init_one_nocb(struct rcu_node *rnp) |
| 1657 | { |
| 1658 | } |
| 1659 | |
| 1660 | static bool wake_nocb_gp(struct rcu_data *rdp, bool force) |
| 1661 | { |
| 1662 | return false; |
| 1663 | } |
| 1664 | |
| 1665 | static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, |
| 1666 | unsigned long j, bool lazy) |
| 1667 | { |
| 1668 | return true; |
| 1669 | } |
| 1670 | |
| 1671 | static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, |
| 1672 | rcu_callback_t func, unsigned long flags, bool lazy) |
| 1673 | { |
| 1674 | WARN_ON_ONCE(1); /* Should be dead code! */ |
| 1675 | } |
| 1676 | |
| 1677 | static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, |
| 1678 | unsigned long flags) |
| 1679 | { |
| 1680 | WARN_ON_ONCE(1); /* Should be dead code! */ |
| 1681 | } |
| 1682 | |
| 1683 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) |
| 1684 | { |
| 1685 | } |
| 1686 | |
| 1687 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level) |
| 1688 | { |
| 1689 | return false; |
| 1690 | } |
| 1691 | |
| 1692 | static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) |
| 1693 | { |
| 1694 | return false; |
| 1695 | } |
| 1696 | |
| 1697 | static void rcu_spawn_cpu_nocb_kthread(int cpu) |
| 1698 | { |
| 1699 | } |
| 1700 | |
| 1701 | static void show_rcu_nocb_state(struct rcu_data *rdp) |
| 1702 | { |
| 1703 | } |
| 1704 | |
| 1705 | #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ |
| 1706 | |