| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Virtual cpu timer based timer functions. |
| 4 | * |
| 5 | * Copyright IBM Corp. 2004, 2012 |
| 6 | * Author(s): Jan Glauber <jan.glauber@de.ibm.com> |
| 7 | */ |
| 8 | |
| 9 | #include <linux/kernel_stat.h> |
| 10 | #include <linux/export.h> |
| 11 | #include <linux/kernel.h> |
| 12 | #include <linux/timex.h> |
| 13 | #include <linux/types.h> |
| 14 | #include <linux/time.h> |
| 15 | #include <asm/alternative.h> |
| 16 | #include <asm/cputime.h> |
| 17 | #include <asm/vtimer.h> |
| 18 | #include <asm/vtime.h> |
| 19 | #include <asm/cpu_mf.h> |
| 20 | #include <asm/smp.h> |
| 21 | |
| 22 | #include "entry.h" |
| 23 | |
| 24 | static void virt_timer_expire(void); |
| 25 | |
| 26 | static LIST_HEAD(virt_timer_list); |
| 27 | static DEFINE_SPINLOCK(virt_timer_lock); |
| 28 | static atomic64_t virt_timer_current; |
| 29 | static atomic64_t virt_timer_elapsed; |
| 30 | |
| 31 | DEFINE_PER_CPU(u64, mt_cycles[8]); |
| 32 | static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 }; |
| 33 | static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 }; |
| 34 | static DEFINE_PER_CPU(u64, mt_scaling_jiffies); |
| 35 | |
| 36 | static inline void set_vtimer(u64 expires) |
| 37 | { |
| 38 | struct lowcore *lc = get_lowcore(); |
| 39 | u64 timer; |
| 40 | |
| 41 | asm volatile( |
| 42 | " stpt %0\n" /* Store current cpu timer value */ |
| 43 | " spt %1" /* Set new value imm. afterwards */ |
| 44 | : "=Q" (timer) : "Q" (expires)); |
| 45 | lc->system_timer += lc->last_update_timer - timer; |
| 46 | lc->last_update_timer = expires; |
| 47 | } |
| 48 | |
| 49 | static inline int virt_timer_forward(u64 elapsed) |
| 50 | { |
| 51 | BUG_ON(!irqs_disabled()); |
| 52 | |
| 53 | if (list_empty(head: &virt_timer_list)) |
| 54 | return 0; |
| 55 | elapsed = atomic64_add_return(i: elapsed, v: &virt_timer_elapsed); |
| 56 | return elapsed >= atomic64_read(v: &virt_timer_current); |
| 57 | } |
| 58 | |
| 59 | static void update_mt_scaling(void) |
| 60 | { |
| 61 | u64 cycles_new[8], *cycles_old; |
| 62 | u64 delta, fac, mult, div; |
| 63 | int i; |
| 64 | |
| 65 | stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new); |
| 66 | cycles_old = this_cpu_ptr(mt_cycles); |
| 67 | fac = 1; |
| 68 | mult = div = 0; |
| 69 | for (i = 0; i <= smp_cpu_mtid; i++) { |
| 70 | delta = cycles_new[i] - cycles_old[i]; |
| 71 | div += delta; |
| 72 | mult *= i + 1; |
| 73 | mult += delta * fac; |
| 74 | fac *= i + 1; |
| 75 | } |
| 76 | div *= fac; |
| 77 | if (div > 0) { |
| 78 | /* Update scaling factor */ |
| 79 | __this_cpu_write(mt_scaling_mult, mult); |
| 80 | __this_cpu_write(mt_scaling_div, div); |
| 81 | memcpy(cycles_old, cycles_new, |
| 82 | sizeof(u64) * (smp_cpu_mtid + 1)); |
| 83 | } |
| 84 | __this_cpu_write(mt_scaling_jiffies, jiffies_64); |
| 85 | } |
| 86 | |
| 87 | static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new) |
| 88 | { |
| 89 | u64 delta; |
| 90 | |
| 91 | delta = new - *tsk_vtime; |
| 92 | *tsk_vtime = new; |
| 93 | return delta; |
| 94 | } |
| 95 | |
| 96 | |
| 97 | static inline u64 scale_vtime(u64 vtime) |
| 98 | { |
| 99 | u64 mult = __this_cpu_read(mt_scaling_mult); |
| 100 | u64 div = __this_cpu_read(mt_scaling_div); |
| 101 | |
| 102 | if (smp_cpu_mtid) |
| 103 | return vtime * mult / div; |
| 104 | return vtime; |
| 105 | } |
| 106 | |
| 107 | static void account_system_index_scaled(struct task_struct *p, u64 cputime, |
| 108 | enum cpu_usage_stat index) |
| 109 | { |
| 110 | p->stimescaled += cputime_to_nsecs(scale_vtime(vtime: cputime)); |
| 111 | account_system_index_time(p, cputime_to_nsecs(cputime), index); |
| 112 | } |
| 113 | |
| 114 | /* |
| 115 | * Update process times based on virtual cpu times stored by entry.S |
| 116 | * to the lowcore fields user_timer, system_timer & steal_clock. |
| 117 | */ |
| 118 | static int do_account_vtime(struct task_struct *tsk) |
| 119 | { |
| 120 | u64 timer, clock, user, guest, system, hardirq, softirq; |
| 121 | struct lowcore *lc = get_lowcore(); |
| 122 | |
| 123 | timer = lc->last_update_timer; |
| 124 | clock = lc->last_update_clock; |
| 125 | asm volatile( |
| 126 | " stpt %0\n" /* Store current cpu timer value */ |
| 127 | " stckf %1" /* Store current tod clock value */ |
| 128 | : "=Q" (lc->last_update_timer), |
| 129 | "=Q" (lc->last_update_clock) |
| 130 | : : "cc" ); |
| 131 | clock = lc->last_update_clock - clock; |
| 132 | timer -= lc->last_update_timer; |
| 133 | |
| 134 | if (hardirq_count()) |
| 135 | lc->hardirq_timer += timer; |
| 136 | else |
| 137 | lc->system_timer += timer; |
| 138 | |
| 139 | /* Update MT utilization calculation */ |
| 140 | if (smp_cpu_mtid && |
| 141 | time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) |
| 142 | update_mt_scaling(); |
| 143 | |
| 144 | /* Calculate cputime delta */ |
| 145 | user = update_tsk_timer(&tsk->thread.user_timer, |
| 146 | READ_ONCE(lc->user_timer)); |
| 147 | guest = update_tsk_timer(&tsk->thread.guest_timer, |
| 148 | READ_ONCE(lc->guest_timer)); |
| 149 | system = update_tsk_timer(&tsk->thread.system_timer, |
| 150 | READ_ONCE(lc->system_timer)); |
| 151 | hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, |
| 152 | READ_ONCE(lc->hardirq_timer)); |
| 153 | softirq = update_tsk_timer(&tsk->thread.softirq_timer, |
| 154 | READ_ONCE(lc->softirq_timer)); |
| 155 | lc->steal_timer += |
| 156 | clock - user - guest - system - hardirq - softirq; |
| 157 | |
| 158 | /* Push account value */ |
| 159 | if (user) { |
| 160 | account_user_time(tsk, cputime_to_nsecs(user)); |
| 161 | tsk->utimescaled += cputime_to_nsecs(scale_vtime(vtime: user)); |
| 162 | } |
| 163 | |
| 164 | if (guest) { |
| 165 | account_guest_time(tsk, cputime_to_nsecs(guest)); |
| 166 | tsk->utimescaled += cputime_to_nsecs(scale_vtime(vtime: guest)); |
| 167 | } |
| 168 | |
| 169 | if (system) |
| 170 | account_system_index_scaled(p: tsk, cputime: system, index: CPUTIME_SYSTEM); |
| 171 | if (hardirq) |
| 172 | account_system_index_scaled(p: tsk, cputime: hardirq, index: CPUTIME_IRQ); |
| 173 | if (softirq) |
| 174 | account_system_index_scaled(p: tsk, cputime: softirq, index: CPUTIME_SOFTIRQ); |
| 175 | |
| 176 | return virt_timer_forward(elapsed: user + guest + system + hardirq + softirq); |
| 177 | } |
| 178 | |
| 179 | void vtime_task_switch(struct task_struct *prev) |
| 180 | { |
| 181 | struct lowcore *lc = get_lowcore(); |
| 182 | |
| 183 | do_account_vtime(tsk: prev); |
| 184 | prev->thread.user_timer = lc->user_timer; |
| 185 | prev->thread.guest_timer = lc->guest_timer; |
| 186 | prev->thread.system_timer = lc->system_timer; |
| 187 | prev->thread.hardirq_timer = lc->hardirq_timer; |
| 188 | prev->thread.softirq_timer = lc->softirq_timer; |
| 189 | lc->user_timer = current->thread.user_timer; |
| 190 | lc->guest_timer = current->thread.guest_timer; |
| 191 | lc->system_timer = current->thread.system_timer; |
| 192 | lc->hardirq_timer = current->thread.hardirq_timer; |
| 193 | lc->softirq_timer = current->thread.softirq_timer; |
| 194 | } |
| 195 | |
| 196 | /* |
| 197 | * In s390, accounting pending user time also implies |
| 198 | * accounting system time in order to correctly compute |
| 199 | * the stolen time accounting. |
| 200 | */ |
| 201 | void vtime_flush(struct task_struct *tsk) |
| 202 | { |
| 203 | struct lowcore *lc = get_lowcore(); |
| 204 | u64 steal, avg_steal; |
| 205 | |
| 206 | if (do_account_vtime(tsk)) |
| 207 | virt_timer_expire(); |
| 208 | |
| 209 | steal = lc->steal_timer; |
| 210 | avg_steal = lc->avg_steal_timer; |
| 211 | if ((s64) steal > 0) { |
| 212 | lc->steal_timer = 0; |
| 213 | account_steal_time(cputime_to_nsecs(steal)); |
| 214 | avg_steal += steal; |
| 215 | } |
| 216 | lc->avg_steal_timer = avg_steal / 2; |
| 217 | } |
| 218 | |
| 219 | static u64 vtime_delta(void) |
| 220 | { |
| 221 | struct lowcore *lc = get_lowcore(); |
| 222 | u64 timer = lc->last_update_timer; |
| 223 | |
| 224 | lc->last_update_timer = get_cpu_timer(); |
| 225 | return timer - lc->last_update_timer; |
| 226 | } |
| 227 | |
| 228 | /* |
| 229 | * Update process times based on virtual cpu times stored by entry.S |
| 230 | * to the lowcore fields user_timer, system_timer & steal_clock. |
| 231 | */ |
| 232 | void vtime_account_kernel(struct task_struct *tsk) |
| 233 | { |
| 234 | struct lowcore *lc = get_lowcore(); |
| 235 | u64 delta = vtime_delta(); |
| 236 | |
| 237 | if (tsk->flags & PF_VCPU) |
| 238 | lc->guest_timer += delta; |
| 239 | else |
| 240 | lc->system_timer += delta; |
| 241 | |
| 242 | virt_timer_forward(elapsed: delta); |
| 243 | } |
| 244 | EXPORT_SYMBOL_GPL(vtime_account_kernel); |
| 245 | |
| 246 | void vtime_account_softirq(struct task_struct *tsk) |
| 247 | { |
| 248 | u64 delta = vtime_delta(); |
| 249 | |
| 250 | get_lowcore()->softirq_timer += delta; |
| 251 | |
| 252 | virt_timer_forward(elapsed: delta); |
| 253 | } |
| 254 | |
| 255 | void vtime_account_hardirq(struct task_struct *tsk) |
| 256 | { |
| 257 | u64 delta = vtime_delta(); |
| 258 | |
| 259 | get_lowcore()->hardirq_timer += delta; |
| 260 | |
| 261 | virt_timer_forward(elapsed: delta); |
| 262 | } |
| 263 | |
| 264 | /* |
| 265 | * Sorted add to a list. List is linear searched until first bigger |
| 266 | * element is found. |
| 267 | */ |
| 268 | static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) |
| 269 | { |
| 270 | struct vtimer_list *tmp; |
| 271 | |
| 272 | list_for_each_entry(tmp, head, entry) { |
| 273 | if (tmp->expires > timer->expires) { |
| 274 | list_add_tail(new: &timer->entry, head: &tmp->entry); |
| 275 | return; |
| 276 | } |
| 277 | } |
| 278 | list_add_tail(new: &timer->entry, head); |
| 279 | } |
| 280 | |
| 281 | /* |
| 282 | * Handler for expired virtual CPU timer. |
| 283 | */ |
| 284 | static void virt_timer_expire(void) |
| 285 | { |
| 286 | struct vtimer_list *timer, *tmp; |
| 287 | unsigned long elapsed; |
| 288 | LIST_HEAD(cb_list); |
| 289 | |
| 290 | /* walk timer list, fire all expired timers */ |
| 291 | spin_lock(lock: &virt_timer_lock); |
| 292 | elapsed = atomic64_read(v: &virt_timer_elapsed); |
| 293 | list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) { |
| 294 | if (timer->expires < elapsed) |
| 295 | /* move expired timer to the callback queue */ |
| 296 | list_move_tail(list: &timer->entry, head: &cb_list); |
| 297 | else |
| 298 | timer->expires -= elapsed; |
| 299 | } |
| 300 | if (!list_empty(head: &virt_timer_list)) { |
| 301 | timer = list_first_entry(&virt_timer_list, |
| 302 | struct vtimer_list, entry); |
| 303 | atomic64_set(v: &virt_timer_current, i: timer->expires); |
| 304 | } |
| 305 | atomic64_sub(i: elapsed, v: &virt_timer_elapsed); |
| 306 | spin_unlock(lock: &virt_timer_lock); |
| 307 | |
| 308 | /* Do callbacks and recharge periodic timers */ |
| 309 | list_for_each_entry_safe(timer, tmp, &cb_list, entry) { |
| 310 | list_del_init(entry: &timer->entry); |
| 311 | timer->function(timer->data); |
| 312 | if (timer->interval) { |
| 313 | /* Recharge interval timer */ |
| 314 | timer->expires = timer->interval + |
| 315 | atomic64_read(v: &virt_timer_elapsed); |
| 316 | spin_lock(lock: &virt_timer_lock); |
| 317 | list_add_sorted(timer, head: &virt_timer_list); |
| 318 | spin_unlock(lock: &virt_timer_lock); |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | void init_virt_timer(struct vtimer_list *timer) |
| 324 | { |
| 325 | timer->function = NULL; |
| 326 | INIT_LIST_HEAD(list: &timer->entry); |
| 327 | } |
| 328 | EXPORT_SYMBOL(init_virt_timer); |
| 329 | |
| 330 | static inline int vtimer_pending(struct vtimer_list *timer) |
| 331 | { |
| 332 | return !list_empty(head: &timer->entry); |
| 333 | } |
| 334 | |
| 335 | static void internal_add_vtimer(struct vtimer_list *timer) |
| 336 | { |
| 337 | if (list_empty(head: &virt_timer_list)) { |
| 338 | /* First timer, just program it. */ |
| 339 | atomic64_set(v: &virt_timer_current, i: timer->expires); |
| 340 | atomic64_set(v: &virt_timer_elapsed, i: 0); |
| 341 | list_add(new: &timer->entry, head: &virt_timer_list); |
| 342 | } else { |
| 343 | /* Update timer against current base. */ |
| 344 | timer->expires += atomic64_read(v: &virt_timer_elapsed); |
| 345 | if (likely((s64) timer->expires < |
| 346 | (s64) atomic64_read(&virt_timer_current))) |
| 347 | /* The new timer expires before the current timer. */ |
| 348 | atomic64_set(v: &virt_timer_current, i: timer->expires); |
| 349 | /* Insert new timer into the list. */ |
| 350 | list_add_sorted(timer, head: &virt_timer_list); |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | static void __add_vtimer(struct vtimer_list *timer, int periodic) |
| 355 | { |
| 356 | unsigned long flags; |
| 357 | |
| 358 | timer->interval = periodic ? timer->expires : 0; |
| 359 | spin_lock_irqsave(&virt_timer_lock, flags); |
| 360 | internal_add_vtimer(timer); |
| 361 | spin_unlock_irqrestore(lock: &virt_timer_lock, flags); |
| 362 | } |
| 363 | |
| 364 | /* |
| 365 | * add_virt_timer - add a oneshot virtual CPU timer |
| 366 | */ |
| 367 | void add_virt_timer(struct vtimer_list *timer) |
| 368 | { |
| 369 | __add_vtimer(timer, periodic: 0); |
| 370 | } |
| 371 | EXPORT_SYMBOL(add_virt_timer); |
| 372 | |
| 373 | /* |
| 374 | * add_virt_timer_int - add an interval virtual CPU timer |
| 375 | */ |
| 376 | void add_virt_timer_periodic(struct vtimer_list *timer) |
| 377 | { |
| 378 | __add_vtimer(timer, periodic: 1); |
| 379 | } |
| 380 | EXPORT_SYMBOL(add_virt_timer_periodic); |
| 381 | |
| 382 | static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic) |
| 383 | { |
| 384 | unsigned long flags; |
| 385 | int rc; |
| 386 | |
| 387 | BUG_ON(!timer->function); |
| 388 | |
| 389 | if (timer->expires == expires && vtimer_pending(timer)) |
| 390 | return 1; |
| 391 | spin_lock_irqsave(&virt_timer_lock, flags); |
| 392 | rc = vtimer_pending(timer); |
| 393 | if (rc) |
| 394 | list_del_init(entry: &timer->entry); |
| 395 | timer->interval = periodic ? expires : 0; |
| 396 | timer->expires = expires; |
| 397 | internal_add_vtimer(timer); |
| 398 | spin_unlock_irqrestore(lock: &virt_timer_lock, flags); |
| 399 | return rc; |
| 400 | } |
| 401 | |
| 402 | /* |
| 403 | * returns whether it has modified a pending timer (1) or not (0) |
| 404 | */ |
| 405 | int mod_virt_timer(struct vtimer_list *timer, u64 expires) |
| 406 | { |
| 407 | return __mod_vtimer(timer, expires, periodic: 0); |
| 408 | } |
| 409 | EXPORT_SYMBOL(mod_virt_timer); |
| 410 | |
| 411 | /* |
| 412 | * returns whether it has modified a pending timer (1) or not (0) |
| 413 | */ |
| 414 | int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires) |
| 415 | { |
| 416 | return __mod_vtimer(timer, expires, periodic: 1); |
| 417 | } |
| 418 | EXPORT_SYMBOL(mod_virt_timer_periodic); |
| 419 | |
| 420 | /* |
| 421 | * Delete a virtual timer. |
| 422 | * |
| 423 | * returns whether the deleted timer was pending (1) or not (0) |
| 424 | */ |
| 425 | int del_virt_timer(struct vtimer_list *timer) |
| 426 | { |
| 427 | unsigned long flags; |
| 428 | |
| 429 | if (!vtimer_pending(timer)) |
| 430 | return 0; |
| 431 | spin_lock_irqsave(&virt_timer_lock, flags); |
| 432 | list_del_init(entry: &timer->entry); |
| 433 | spin_unlock_irqrestore(lock: &virt_timer_lock, flags); |
| 434 | return 1; |
| 435 | } |
| 436 | EXPORT_SYMBOL(del_virt_timer); |
| 437 | |
| 438 | /* |
| 439 | * Start the virtual CPU timer on the current CPU. |
| 440 | */ |
| 441 | void vtime_init(void) |
| 442 | { |
| 443 | /* set initial cpu timer */ |
| 444 | set_vtimer(VTIMER_MAX_SLICE); |
| 445 | /* Setup initial MT scaling values */ |
| 446 | if (smp_cpu_mtid) { |
| 447 | __this_cpu_write(mt_scaling_jiffies, jiffies); |
| 448 | __this_cpu_write(mt_scaling_mult, 1); |
| 449 | __this_cpu_write(mt_scaling_div, 1); |
| 450 | stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles)); |
| 451 | } |
| 452 | } |
| 453 | |