| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Resource Director Technology(RDT) |
| 4 | * - Monitoring code |
| 5 | * |
| 6 | * Copyright (C) 2017 Intel Corporation |
| 7 | * |
| 8 | * Author: |
| 9 | * Vikas Shivappa <vikas.shivappa@intel.com> |
| 10 | * |
| 11 | * This replaces the cqm.c based on perf but we reuse a lot of |
| 12 | * code and datastructures originally from Peter Zijlstra and Matt Fleming. |
| 13 | * |
| 14 | * More information about RDT be found in the Intel (R) x86 Architecture |
| 15 | * Software Developer Manual June 2016, volume 3, section 17.17. |
| 16 | */ |
| 17 | |
| 18 | #define pr_fmt(fmt) "resctrl: " fmt |
| 19 | |
| 20 | #include <linux/cpu.h> |
| 21 | #include <linux/resctrl.h> |
| 22 | #include <linux/sizes.h> |
| 23 | #include <linux/slab.h> |
| 24 | |
| 25 | #include "internal.h" |
| 26 | |
| 27 | #define CREATE_TRACE_POINTS |
| 28 | |
| 29 | #include "monitor_trace.h" |
| 30 | |
| 31 | /** |
| 32 | * struct rmid_entry - dirty tracking for all RMID. |
| 33 | * @closid: The CLOSID for this entry. |
| 34 | * @rmid: The RMID for this entry. |
| 35 | * @busy: The number of domains with cached data using this RMID. |
| 36 | * @list: Member of the rmid_free_lru list when busy == 0. |
| 37 | * |
| 38 | * Depending on the architecture the correct monitor is accessed using |
| 39 | * both @closid and @rmid, or @rmid only. |
| 40 | * |
| 41 | * Take the rdtgroup_mutex when accessing. |
| 42 | */ |
| 43 | struct rmid_entry { |
| 44 | u32 closid; |
| 45 | u32 rmid; |
| 46 | int busy; |
| 47 | struct list_head list; |
| 48 | }; |
| 49 | |
| 50 | /* |
| 51 | * @rmid_free_lru - A least recently used list of free RMIDs |
| 52 | * These RMIDs are guaranteed to have an occupancy less than the |
| 53 | * threshold occupancy |
| 54 | */ |
| 55 | static LIST_HEAD(rmid_free_lru); |
| 56 | |
| 57 | /* |
| 58 | * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has. |
| 59 | * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined. |
| 60 | * Indexed by CLOSID. Protected by rdtgroup_mutex. |
| 61 | */ |
| 62 | static u32 *closid_num_dirty_rmid; |
| 63 | |
| 64 | /* |
| 65 | * @rmid_limbo_count - count of currently unused but (potentially) |
| 66 | * dirty RMIDs. |
| 67 | * This counts RMIDs that no one is currently using but that |
| 68 | * may have a occupancy value > resctrl_rmid_realloc_threshold. User can |
| 69 | * change the threshold occupancy value. |
| 70 | */ |
| 71 | static unsigned int rmid_limbo_count; |
| 72 | |
| 73 | /* |
| 74 | * @rmid_entry - The entry in the limbo and free lists. |
| 75 | */ |
| 76 | static struct rmid_entry *rmid_ptrs; |
| 77 | |
| 78 | /* |
| 79 | * This is the threshold cache occupancy in bytes at which we will consider an |
| 80 | * RMID available for re-allocation. |
| 81 | */ |
| 82 | unsigned int resctrl_rmid_realloc_threshold; |
| 83 | |
| 84 | /* |
| 85 | * This is the maximum value for the reallocation threshold, in bytes. |
| 86 | */ |
| 87 | unsigned int resctrl_rmid_realloc_limit; |
| 88 | |
| 89 | /* |
| 90 | * x86 and arm64 differ in their handling of monitoring. |
| 91 | * x86's RMID are independent numbers, there is only one source of traffic |
| 92 | * with an RMID value of '1'. |
| 93 | * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of |
| 94 | * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID |
| 95 | * value is no longer unique. |
| 96 | * To account for this, resctrl uses an index. On x86 this is just the RMID, |
| 97 | * on arm64 it encodes the CLOSID and RMID. This gives a unique number. |
| 98 | * |
| 99 | * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code |
| 100 | * must accept an attempt to read every index. |
| 101 | */ |
| 102 | static inline struct rmid_entry *__rmid_entry(u32 idx) |
| 103 | { |
| 104 | struct rmid_entry *entry; |
| 105 | u32 closid, rmid; |
| 106 | |
| 107 | entry = &rmid_ptrs[idx]; |
| 108 | resctrl_arch_rmid_idx_decode(idx, closid: &closid, rmid: &rmid); |
| 109 | |
| 110 | WARN_ON_ONCE(entry->closid != closid); |
| 111 | WARN_ON_ONCE(entry->rmid != rmid); |
| 112 | |
| 113 | return entry; |
| 114 | } |
| 115 | |
| 116 | static void limbo_release_entry(struct rmid_entry *entry) |
| 117 | { |
| 118 | lockdep_assert_held(&rdtgroup_mutex); |
| 119 | |
| 120 | rmid_limbo_count--; |
| 121 | list_add_tail(new: &entry->list, head: &rmid_free_lru); |
| 122 | |
| 123 | if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) |
| 124 | closid_num_dirty_rmid[entry->closid]--; |
| 125 | } |
| 126 | |
| 127 | /* |
| 128 | * Check the RMIDs that are marked as busy for this domain. If the |
| 129 | * reported LLC occupancy is below the threshold clear the busy bit and |
| 130 | * decrement the count. If the busy count gets to zero on an RMID, we |
| 131 | * free the RMID |
| 132 | */ |
| 133 | void __check_limbo(struct rdt_mon_domain *d, bool force_free) |
| 134 | { |
| 135 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 136 | u32 idx_limit = resctrl_arch_system_num_rmid_idx(); |
| 137 | struct rmid_entry *entry; |
| 138 | u32 idx, cur_idx = 1; |
| 139 | void *arch_mon_ctx; |
| 140 | bool rmid_dirty; |
| 141 | u64 val = 0; |
| 142 | |
| 143 | arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid: QOS_L3_OCCUP_EVENT_ID); |
| 144 | if (IS_ERR(ptr: arch_mon_ctx)) { |
| 145 | pr_warn_ratelimited("Failed to allocate monitor context: %ld" , |
| 146 | PTR_ERR(arch_mon_ctx)); |
| 147 | return; |
| 148 | } |
| 149 | |
| 150 | /* |
| 151 | * Skip RMID 0 and start from RMID 1 and check all the RMIDs that |
| 152 | * are marked as busy for occupancy < threshold. If the occupancy |
| 153 | * is less than the threshold decrement the busy counter of the |
| 154 | * RMID and move it to the free list when the counter reaches 0. |
| 155 | */ |
| 156 | for (;;) { |
| 157 | idx = find_next_bit(addr: d->rmid_busy_llc, size: idx_limit, offset: cur_idx); |
| 158 | if (idx >= idx_limit) |
| 159 | break; |
| 160 | |
| 161 | entry = __rmid_entry(idx); |
| 162 | if (resctrl_arch_rmid_read(r, d, closid: entry->closid, rmid: entry->rmid, |
| 163 | eventid: QOS_L3_OCCUP_EVENT_ID, val: &val, |
| 164 | arch_mon_ctx)) { |
| 165 | rmid_dirty = true; |
| 166 | } else { |
| 167 | rmid_dirty = (val >= resctrl_rmid_realloc_threshold); |
| 168 | |
| 169 | /* |
| 170 | * x86's CLOSID and RMID are independent numbers, so the entry's |
| 171 | * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the |
| 172 | * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't |
| 173 | * used to select the configuration. It is thus necessary to track both |
| 174 | * CLOSID and RMID because there may be dependencies between them |
| 175 | * on some architectures. |
| 176 | */ |
| 177 | trace_mon_llc_occupancy_limbo(ctrl_hw_id: entry->closid, mon_hw_id: entry->rmid, domain_id: d->hdr.id, llc_occupancy_bytes: val); |
| 178 | } |
| 179 | |
| 180 | if (force_free || !rmid_dirty) { |
| 181 | clear_bit(nr: idx, addr: d->rmid_busy_llc); |
| 182 | if (!--entry->busy) |
| 183 | limbo_release_entry(entry); |
| 184 | } |
| 185 | cur_idx = idx + 1; |
| 186 | } |
| 187 | |
| 188 | resctrl_arch_mon_ctx_free(r, evtid: QOS_L3_OCCUP_EVENT_ID, ctx: arch_mon_ctx); |
| 189 | } |
| 190 | |
| 191 | bool has_busy_rmid(struct rdt_mon_domain *d) |
| 192 | { |
| 193 | u32 idx_limit = resctrl_arch_system_num_rmid_idx(); |
| 194 | |
| 195 | return find_first_bit(addr: d->rmid_busy_llc, size: idx_limit) != idx_limit; |
| 196 | } |
| 197 | |
| 198 | static struct rmid_entry *resctrl_find_free_rmid(u32 closid) |
| 199 | { |
| 200 | struct rmid_entry *itr; |
| 201 | u32 itr_idx, cmp_idx; |
| 202 | |
| 203 | if (list_empty(head: &rmid_free_lru)) |
| 204 | return rmid_limbo_count ? ERR_PTR(error: -EBUSY) : ERR_PTR(error: -ENOSPC); |
| 205 | |
| 206 | list_for_each_entry(itr, &rmid_free_lru, list) { |
| 207 | /* |
| 208 | * Get the index of this free RMID, and the index it would need |
| 209 | * to be if it were used with this CLOSID. |
| 210 | * If the CLOSID is irrelevant on this architecture, the two |
| 211 | * index values are always the same on every entry and thus the |
| 212 | * very first entry will be returned. |
| 213 | */ |
| 214 | itr_idx = resctrl_arch_rmid_idx_encode(ignored: itr->closid, rmid: itr->rmid); |
| 215 | cmp_idx = resctrl_arch_rmid_idx_encode(ignored: closid, rmid: itr->rmid); |
| 216 | |
| 217 | if (itr_idx == cmp_idx) |
| 218 | return itr; |
| 219 | } |
| 220 | |
| 221 | return ERR_PTR(error: -ENOSPC); |
| 222 | } |
| 223 | |
| 224 | /** |
| 225 | * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated |
| 226 | * RMID are clean, or the CLOSID that has |
| 227 | * the most clean RMID. |
| 228 | * |
| 229 | * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID |
| 230 | * may not be able to allocate clean RMID. To avoid this the allocator will |
| 231 | * choose the CLOSID with the most clean RMID. |
| 232 | * |
| 233 | * When the CLOSID and RMID are independent numbers, the first free CLOSID will |
| 234 | * be returned. |
| 235 | */ |
| 236 | int resctrl_find_cleanest_closid(void) |
| 237 | { |
| 238 | u32 cleanest_closid = ~0; |
| 239 | int i = 0; |
| 240 | |
| 241 | lockdep_assert_held(&rdtgroup_mutex); |
| 242 | |
| 243 | if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) |
| 244 | return -EIO; |
| 245 | |
| 246 | for (i = 0; i < closids_supported(); i++) { |
| 247 | int num_dirty; |
| 248 | |
| 249 | if (closid_allocated(closid: i)) |
| 250 | continue; |
| 251 | |
| 252 | num_dirty = closid_num_dirty_rmid[i]; |
| 253 | if (num_dirty == 0) |
| 254 | return i; |
| 255 | |
| 256 | if (cleanest_closid == ~0) |
| 257 | cleanest_closid = i; |
| 258 | |
| 259 | if (num_dirty < closid_num_dirty_rmid[cleanest_closid]) |
| 260 | cleanest_closid = i; |
| 261 | } |
| 262 | |
| 263 | if (cleanest_closid == ~0) |
| 264 | return -ENOSPC; |
| 265 | |
| 266 | return cleanest_closid; |
| 267 | } |
| 268 | |
| 269 | /* |
| 270 | * For MPAM the RMID value is not unique, and has to be considered with |
| 271 | * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which |
| 272 | * allows all domains to be managed by a single free list. |
| 273 | * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler. |
| 274 | */ |
| 275 | int alloc_rmid(u32 closid) |
| 276 | { |
| 277 | struct rmid_entry *entry; |
| 278 | |
| 279 | lockdep_assert_held(&rdtgroup_mutex); |
| 280 | |
| 281 | entry = resctrl_find_free_rmid(closid); |
| 282 | if (IS_ERR(ptr: entry)) |
| 283 | return PTR_ERR(ptr: entry); |
| 284 | |
| 285 | list_del(entry: &entry->list); |
| 286 | return entry->rmid; |
| 287 | } |
| 288 | |
| 289 | static void add_rmid_to_limbo(struct rmid_entry *entry) |
| 290 | { |
| 291 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 292 | struct rdt_mon_domain *d; |
| 293 | u32 idx; |
| 294 | |
| 295 | lockdep_assert_held(&rdtgroup_mutex); |
| 296 | |
| 297 | /* Walking r->domains, ensure it can't race with cpuhp */ |
| 298 | lockdep_assert_cpus_held(); |
| 299 | |
| 300 | idx = resctrl_arch_rmid_idx_encode(ignored: entry->closid, rmid: entry->rmid); |
| 301 | |
| 302 | entry->busy = 0; |
| 303 | list_for_each_entry(d, &r->mon_domains, hdr.list) { |
| 304 | /* |
| 305 | * For the first limbo RMID in the domain, |
| 306 | * setup up the limbo worker. |
| 307 | */ |
| 308 | if (!has_busy_rmid(d)) |
| 309 | cqm_setup_limbo_handler(dom: d, CQM_LIMBOCHECK_INTERVAL, |
| 310 | RESCTRL_PICK_ANY_CPU); |
| 311 | set_bit(nr: idx, addr: d->rmid_busy_llc); |
| 312 | entry->busy++; |
| 313 | } |
| 314 | |
| 315 | rmid_limbo_count++; |
| 316 | if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) |
| 317 | closid_num_dirty_rmid[entry->closid]++; |
| 318 | } |
| 319 | |
| 320 | void free_rmid(u32 closid, u32 rmid) |
| 321 | { |
| 322 | u32 idx = resctrl_arch_rmid_idx_encode(ignored: closid, rmid); |
| 323 | struct rmid_entry *entry; |
| 324 | |
| 325 | lockdep_assert_held(&rdtgroup_mutex); |
| 326 | |
| 327 | /* |
| 328 | * Do not allow the default rmid to be free'd. Comparing by index |
| 329 | * allows architectures that ignore the closid parameter to avoid an |
| 330 | * unnecessary check. |
| 331 | */ |
| 332 | if (!resctrl_arch_mon_capable() || |
| 333 | idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, |
| 334 | RESCTRL_RESERVED_RMID)) |
| 335 | return; |
| 336 | |
| 337 | entry = __rmid_entry(idx); |
| 338 | |
| 339 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_OCCUP_EVENT_ID)) |
| 340 | add_rmid_to_limbo(entry); |
| 341 | else |
| 342 | list_add_tail(new: &entry->list, head: &rmid_free_lru); |
| 343 | } |
| 344 | |
| 345 | static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, |
| 346 | u32 rmid, enum resctrl_event_id evtid) |
| 347 | { |
| 348 | u32 idx = resctrl_arch_rmid_idx_encode(ignored: closid, rmid); |
| 349 | struct mbm_state *state; |
| 350 | |
| 351 | if (!resctrl_is_mbm_event(eventid: evtid)) |
| 352 | return NULL; |
| 353 | |
| 354 | state = d->mbm_states[MBM_STATE_IDX(evtid)]; |
| 355 | |
| 356 | return state ? &state[idx] : NULL; |
| 357 | } |
| 358 | |
| 359 | /* |
| 360 | * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp. |
| 361 | * |
| 362 | * Return: |
| 363 | * Valid counter ID on success, or -ENOENT on failure. |
| 364 | */ |
| 365 | static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d, |
| 366 | struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) |
| 367 | { |
| 368 | int cntr_id; |
| 369 | |
| 370 | if (!r->mon.mbm_cntr_assignable) |
| 371 | return -ENOENT; |
| 372 | |
| 373 | if (!resctrl_is_mbm_event(eventid: evtid)) |
| 374 | return -ENOENT; |
| 375 | |
| 376 | for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) { |
| 377 | if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp && |
| 378 | d->cntr_cfg[cntr_id].evtid == evtid) |
| 379 | return cntr_id; |
| 380 | } |
| 381 | |
| 382 | return -ENOENT; |
| 383 | } |
| 384 | |
| 385 | /* |
| 386 | * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d. |
| 387 | * Caller must ensure that the specified event is not assigned already. |
| 388 | * |
| 389 | * Return: |
| 390 | * Valid counter ID on success, or -ENOSPC on failure. |
| 391 | */ |
| 392 | static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d, |
| 393 | struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) |
| 394 | { |
| 395 | int cntr_id; |
| 396 | |
| 397 | for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) { |
| 398 | if (!d->cntr_cfg[cntr_id].rdtgrp) { |
| 399 | d->cntr_cfg[cntr_id].rdtgrp = rdtgrp; |
| 400 | d->cntr_cfg[cntr_id].evtid = evtid; |
| 401 | return cntr_id; |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | return -ENOSPC; |
| 406 | } |
| 407 | |
| 408 | /* |
| 409 | * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d. |
| 410 | */ |
| 411 | static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id) |
| 412 | { |
| 413 | memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg)); |
| 414 | } |
| 415 | |
| 416 | static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) |
| 417 | { |
| 418 | int cpu = smp_processor_id(); |
| 419 | u32 closid = rdtgrp->closid; |
| 420 | u32 rmid = rdtgrp->mon.rmid; |
| 421 | struct rdt_mon_domain *d; |
| 422 | int cntr_id = -ENOENT; |
| 423 | struct mbm_state *m; |
| 424 | int err, ret; |
| 425 | u64 tval = 0; |
| 426 | |
| 427 | if (rr->is_mbm_cntr) { |
| 428 | cntr_id = mbm_cntr_get(r: rr->r, d: rr->d, rdtgrp, evtid: rr->evtid); |
| 429 | if (cntr_id < 0) { |
| 430 | rr->err = -ENOENT; |
| 431 | return -EINVAL; |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | if (rr->first) { |
| 436 | if (rr->is_mbm_cntr) |
| 437 | resctrl_arch_reset_cntr(r: rr->r, d: rr->d, closid, rmid, cntr_id, eventid: rr->evtid); |
| 438 | else |
| 439 | resctrl_arch_reset_rmid(r: rr->r, d: rr->d, closid, rmid, eventid: rr->evtid); |
| 440 | m = get_mbm_state(d: rr->d, closid, rmid, evtid: rr->evtid); |
| 441 | if (m) |
| 442 | memset(m, 0, sizeof(struct mbm_state)); |
| 443 | return 0; |
| 444 | } |
| 445 | |
| 446 | if (rr->d) { |
| 447 | /* Reading a single domain, must be on a CPU in that domain. */ |
| 448 | if (!cpumask_test_cpu(cpu, cpumask: &rr->d->hdr.cpu_mask)) |
| 449 | return -EINVAL; |
| 450 | if (rr->is_mbm_cntr) |
| 451 | rr->err = resctrl_arch_cntr_read(r: rr->r, d: rr->d, closid, rmid, cntr_id, |
| 452 | eventid: rr->evtid, val: &tval); |
| 453 | else |
| 454 | rr->err = resctrl_arch_rmid_read(r: rr->r, d: rr->d, closid, rmid, |
| 455 | eventid: rr->evtid, val: &tval, arch_mon_ctx: rr->arch_mon_ctx); |
| 456 | if (rr->err) |
| 457 | return rr->err; |
| 458 | |
| 459 | rr->val += tval; |
| 460 | |
| 461 | return 0; |
| 462 | } |
| 463 | |
| 464 | /* Summing domains that share a cache, must be on a CPU for that cache. */ |
| 465 | if (!cpumask_test_cpu(cpu, cpumask: &rr->ci->shared_cpu_map)) |
| 466 | return -EINVAL; |
| 467 | |
| 468 | /* |
| 469 | * Legacy files must report the sum of an event across all |
| 470 | * domains that share the same L3 cache instance. |
| 471 | * Report success if a read from any domain succeeds, -EINVAL |
| 472 | * (translated to "Unavailable" for user space) if reading from |
| 473 | * all domains fail for any reason. |
| 474 | */ |
| 475 | ret = -EINVAL; |
| 476 | list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { |
| 477 | if (d->ci_id != rr->ci->id) |
| 478 | continue; |
| 479 | if (rr->is_mbm_cntr) |
| 480 | err = resctrl_arch_cntr_read(r: rr->r, d, closid, rmid, cntr_id, |
| 481 | eventid: rr->evtid, val: &tval); |
| 482 | else |
| 483 | err = resctrl_arch_rmid_read(r: rr->r, d, closid, rmid, |
| 484 | eventid: rr->evtid, val: &tval, arch_mon_ctx: rr->arch_mon_ctx); |
| 485 | if (!err) { |
| 486 | rr->val += tval; |
| 487 | ret = 0; |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | if (ret) |
| 492 | rr->err = ret; |
| 493 | |
| 494 | return ret; |
| 495 | } |
| 496 | |
| 497 | /* |
| 498 | * mbm_bw_count() - Update bw count from values previously read by |
| 499 | * __mon_event_count(). |
| 500 | * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify |
| 501 | * the cached mbm_state. |
| 502 | * @rr: The struct rmid_read populated by __mon_event_count(). |
| 503 | * |
| 504 | * Supporting function to calculate the memory bandwidth |
| 505 | * and delta bandwidth in MBps. The chunks value previously read by |
| 506 | * __mon_event_count() is compared with the chunks value from the previous |
| 507 | * invocation. This must be called once per second to maintain values in MBps. |
| 508 | */ |
| 509 | static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) |
| 510 | { |
| 511 | u64 cur_bw, bytes, cur_bytes; |
| 512 | u32 closid = rdtgrp->closid; |
| 513 | u32 rmid = rdtgrp->mon.rmid; |
| 514 | struct mbm_state *m; |
| 515 | |
| 516 | m = get_mbm_state(d: rr->d, closid, rmid, evtid: rr->evtid); |
| 517 | if (WARN_ON_ONCE(!m)) |
| 518 | return; |
| 519 | |
| 520 | cur_bytes = rr->val; |
| 521 | bytes = cur_bytes - m->prev_bw_bytes; |
| 522 | m->prev_bw_bytes = cur_bytes; |
| 523 | |
| 524 | cur_bw = bytes / SZ_1M; |
| 525 | |
| 526 | m->prev_bw = cur_bw; |
| 527 | } |
| 528 | |
| 529 | /* |
| 530 | * This is scheduled by mon_event_read() to read the CQM/MBM counters |
| 531 | * on a domain. |
| 532 | */ |
| 533 | void mon_event_count(void *info) |
| 534 | { |
| 535 | struct rdtgroup *rdtgrp, *entry; |
| 536 | struct rmid_read *rr = info; |
| 537 | struct list_head *head; |
| 538 | int ret; |
| 539 | |
| 540 | rdtgrp = rr->rgrp; |
| 541 | |
| 542 | ret = __mon_event_count(rdtgrp, rr); |
| 543 | |
| 544 | /* |
| 545 | * For Ctrl groups read data from child monitor groups and |
| 546 | * add them together. Count events which are read successfully. |
| 547 | * Discard the rmid_read's reporting errors. |
| 548 | */ |
| 549 | head = &rdtgrp->mon.crdtgrp_list; |
| 550 | |
| 551 | if (rdtgrp->type == RDTCTRL_GROUP) { |
| 552 | list_for_each_entry(entry, head, mon.crdtgrp_list) { |
| 553 | if (__mon_event_count(rdtgrp: entry, rr) == 0) |
| 554 | ret = 0; |
| 555 | } |
| 556 | } |
| 557 | |
| 558 | /* |
| 559 | * __mon_event_count() calls for newly created monitor groups may |
| 560 | * report -EINVAL/Unavailable if the monitor hasn't seen any traffic. |
| 561 | * Discard error if any of the monitor event reads succeeded. |
| 562 | */ |
| 563 | if (ret == 0) |
| 564 | rr->err = 0; |
| 565 | } |
| 566 | |
| 567 | static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, |
| 568 | struct rdt_resource *r) |
| 569 | { |
| 570 | struct rdt_ctrl_domain *d; |
| 571 | |
| 572 | lockdep_assert_cpus_held(); |
| 573 | |
| 574 | list_for_each_entry(d, &r->ctrl_domains, hdr.list) { |
| 575 | /* Find the domain that contains this CPU */ |
| 576 | if (cpumask_test_cpu(cpu, cpumask: &d->hdr.cpu_mask)) |
| 577 | return d; |
| 578 | } |
| 579 | |
| 580 | return NULL; |
| 581 | } |
| 582 | |
| 583 | /* |
| 584 | * Feedback loop for MBA software controller (mba_sc) |
| 585 | * |
| 586 | * mba_sc is a feedback loop where we periodically read MBM counters and |
| 587 | * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so |
| 588 | * that: |
| 589 | * |
| 590 | * current bandwidth(cur_bw) < user specified bandwidth(user_bw) |
| 591 | * |
| 592 | * This uses the MBM counters to measure the bandwidth and MBA throttle |
| 593 | * MSRs to control the bandwidth for a particular rdtgrp. It builds on the |
| 594 | * fact that resctrl rdtgroups have both monitoring and control. |
| 595 | * |
| 596 | * The frequency of the checks is 1s and we just tag along the MBM overflow |
| 597 | * timer. Having 1s interval makes the calculation of bandwidth simpler. |
| 598 | * |
| 599 | * Although MBA's goal is to restrict the bandwidth to a maximum, there may |
| 600 | * be a need to increase the bandwidth to avoid unnecessarily restricting |
| 601 | * the L2 <-> L3 traffic. |
| 602 | * |
| 603 | * Since MBA controls the L2 external bandwidth where as MBM measures the |
| 604 | * L3 external bandwidth the following sequence could lead to such a |
| 605 | * situation. |
| 606 | * |
| 607 | * Consider an rdtgroup which had high L3 <-> memory traffic in initial |
| 608 | * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but |
| 609 | * after some time rdtgroup has mostly L2 <-> L3 traffic. |
| 610 | * |
| 611 | * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its |
| 612 | * throttle MSRs already have low percentage values. To avoid |
| 613 | * unnecessarily restricting such rdtgroups, we also increase the bandwidth. |
| 614 | */ |
| 615 | static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) |
| 616 | { |
| 617 | u32 closid, rmid, cur_msr_val, new_msr_val; |
| 618 | struct mbm_state *pmbm_data, *cmbm_data; |
| 619 | struct rdt_ctrl_domain *dom_mba; |
| 620 | enum resctrl_event_id evt_id; |
| 621 | struct rdt_resource *r_mba; |
| 622 | struct list_head *head; |
| 623 | struct rdtgroup *entry; |
| 624 | u32 cur_bw, user_bw; |
| 625 | |
| 626 | r_mba = resctrl_arch_get_resource(l: RDT_RESOURCE_MBA); |
| 627 | evt_id = rgrp->mba_mbps_event; |
| 628 | |
| 629 | closid = rgrp->closid; |
| 630 | rmid = rgrp->mon.rmid; |
| 631 | pmbm_data = get_mbm_state(d: dom_mbm, closid, rmid, evtid: evt_id); |
| 632 | if (WARN_ON_ONCE(!pmbm_data)) |
| 633 | return; |
| 634 | |
| 635 | dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r: r_mba); |
| 636 | if (!dom_mba) { |
| 637 | pr_warn_once("Failure to get domain for MBA update\n" ); |
| 638 | return; |
| 639 | } |
| 640 | |
| 641 | cur_bw = pmbm_data->prev_bw; |
| 642 | user_bw = dom_mba->mbps_val[closid]; |
| 643 | |
| 644 | /* MBA resource doesn't support CDP */ |
| 645 | cur_msr_val = resctrl_arch_get_config(r: r_mba, d: dom_mba, closid, type: CDP_NONE); |
| 646 | |
| 647 | /* |
| 648 | * For Ctrl groups read data from child monitor groups. |
| 649 | */ |
| 650 | head = &rgrp->mon.crdtgrp_list; |
| 651 | list_for_each_entry(entry, head, mon.crdtgrp_list) { |
| 652 | cmbm_data = get_mbm_state(d: dom_mbm, closid: entry->closid, rmid: entry->mon.rmid, evtid: evt_id); |
| 653 | if (WARN_ON_ONCE(!cmbm_data)) |
| 654 | return; |
| 655 | cur_bw += cmbm_data->prev_bw; |
| 656 | } |
| 657 | |
| 658 | /* |
| 659 | * Scale up/down the bandwidth linearly for the ctrl group. The |
| 660 | * bandwidth step is the bandwidth granularity specified by the |
| 661 | * hardware. |
| 662 | * Always increase throttling if current bandwidth is above the |
| 663 | * target set by user. |
| 664 | * But avoid thrashing up and down on every poll by checking |
| 665 | * whether a decrease in throttling is likely to push the group |
| 666 | * back over target. E.g. if currently throttling to 30% of bandwidth |
| 667 | * on a system with 10% granularity steps, check whether moving to |
| 668 | * 40% would go past the limit by multiplying current bandwidth by |
| 669 | * "(30 + 10) / 30". |
| 670 | */ |
| 671 | if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { |
| 672 | new_msr_val = cur_msr_val - r_mba->membw.bw_gran; |
| 673 | } else if (cur_msr_val < MAX_MBA_BW && |
| 674 | (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) { |
| 675 | new_msr_val = cur_msr_val + r_mba->membw.bw_gran; |
| 676 | } else { |
| 677 | return; |
| 678 | } |
| 679 | |
| 680 | resctrl_arch_update_one(r: r_mba, d: dom_mba, closid, t: CDP_NONE, cfg_val: new_msr_val); |
| 681 | } |
| 682 | |
| 683 | static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d, |
| 684 | struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) |
| 685 | { |
| 686 | struct rmid_read rr = {0}; |
| 687 | |
| 688 | rr.r = r; |
| 689 | rr.d = d; |
| 690 | rr.evtid = evtid; |
| 691 | if (resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 692 | rr.is_mbm_cntr = true; |
| 693 | } else { |
| 694 | rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r: rr.r, evtid: rr.evtid); |
| 695 | if (IS_ERR(ptr: rr.arch_mon_ctx)) { |
| 696 | pr_warn_ratelimited("Failed to allocate monitor context: %ld" , |
| 697 | PTR_ERR(rr.arch_mon_ctx)); |
| 698 | return; |
| 699 | } |
| 700 | } |
| 701 | |
| 702 | __mon_event_count(rdtgrp, rr: &rr); |
| 703 | |
| 704 | /* |
| 705 | * If the software controller is enabled, compute the |
| 706 | * bandwidth for this event id. |
| 707 | */ |
| 708 | if (is_mba_sc(NULL)) |
| 709 | mbm_bw_count(rdtgrp, rr: &rr); |
| 710 | |
| 711 | if (rr.arch_mon_ctx) |
| 712 | resctrl_arch_mon_ctx_free(r: rr.r, evtid: rr.evtid, ctx: rr.arch_mon_ctx); |
| 713 | } |
| 714 | |
| 715 | static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, |
| 716 | struct rdtgroup *rdtgrp) |
| 717 | { |
| 718 | /* |
| 719 | * This is protected from concurrent reads from user as both |
| 720 | * the user and overflow handler hold the global mutex. |
| 721 | */ |
| 722 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_TOTAL_EVENT_ID)) |
| 723 | mbm_update_one_event(r, d, rdtgrp, evtid: QOS_L3_MBM_TOTAL_EVENT_ID); |
| 724 | |
| 725 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_LOCAL_EVENT_ID)) |
| 726 | mbm_update_one_event(r, d, rdtgrp, evtid: QOS_L3_MBM_LOCAL_EVENT_ID); |
| 727 | } |
| 728 | |
| 729 | /* |
| 730 | * Handler to scan the limbo list and move the RMIDs |
| 731 | * to free list whose occupancy < threshold_occupancy. |
| 732 | */ |
| 733 | void cqm_handle_limbo(struct work_struct *work) |
| 734 | { |
| 735 | unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); |
| 736 | struct rdt_mon_domain *d; |
| 737 | |
| 738 | cpus_read_lock(); |
| 739 | mutex_lock(&rdtgroup_mutex); |
| 740 | |
| 741 | d = container_of(work, struct rdt_mon_domain, cqm_limbo.work); |
| 742 | |
| 743 | __check_limbo(d, force_free: false); |
| 744 | |
| 745 | if (has_busy_rmid(d)) { |
| 746 | d->cqm_work_cpu = cpumask_any_housekeeping(mask: &d->hdr.cpu_mask, |
| 747 | RESCTRL_PICK_ANY_CPU); |
| 748 | schedule_delayed_work_on(cpu: d->cqm_work_cpu, dwork: &d->cqm_limbo, |
| 749 | delay); |
| 750 | } |
| 751 | |
| 752 | mutex_unlock(lock: &rdtgroup_mutex); |
| 753 | cpus_read_unlock(); |
| 754 | } |
| 755 | |
| 756 | /** |
| 757 | * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this |
| 758 | * domain. |
| 759 | * @dom: The domain the limbo handler should run for. |
| 760 | * @delay_ms: How far in the future the handler should run. |
| 761 | * @exclude_cpu: Which CPU the handler should not run on, |
| 762 | * RESCTRL_PICK_ANY_CPU to pick any CPU. |
| 763 | */ |
| 764 | void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, |
| 765 | int exclude_cpu) |
| 766 | { |
| 767 | unsigned long delay = msecs_to_jiffies(m: delay_ms); |
| 768 | int cpu; |
| 769 | |
| 770 | cpu = cpumask_any_housekeeping(mask: &dom->hdr.cpu_mask, exclude_cpu); |
| 771 | dom->cqm_work_cpu = cpu; |
| 772 | |
| 773 | if (cpu < nr_cpu_ids) |
| 774 | schedule_delayed_work_on(cpu, dwork: &dom->cqm_limbo, delay); |
| 775 | } |
| 776 | |
| 777 | void mbm_handle_overflow(struct work_struct *work) |
| 778 | { |
| 779 | unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); |
| 780 | struct rdtgroup *prgrp, *crgrp; |
| 781 | struct rdt_mon_domain *d; |
| 782 | struct list_head *head; |
| 783 | struct rdt_resource *r; |
| 784 | |
| 785 | cpus_read_lock(); |
| 786 | mutex_lock(&rdtgroup_mutex); |
| 787 | |
| 788 | /* |
| 789 | * If the filesystem has been unmounted this work no longer needs to |
| 790 | * run. |
| 791 | */ |
| 792 | if (!resctrl_mounted || !resctrl_arch_mon_capable()) |
| 793 | goto out_unlock; |
| 794 | |
| 795 | r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 796 | d = container_of(work, struct rdt_mon_domain, mbm_over.work); |
| 797 | |
| 798 | list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { |
| 799 | mbm_update(r, d, rdtgrp: prgrp); |
| 800 | |
| 801 | head = &prgrp->mon.crdtgrp_list; |
| 802 | list_for_each_entry(crgrp, head, mon.crdtgrp_list) |
| 803 | mbm_update(r, d, rdtgrp: crgrp); |
| 804 | |
| 805 | if (is_mba_sc(NULL)) |
| 806 | update_mba_bw(rgrp: prgrp, dom_mbm: d); |
| 807 | } |
| 808 | |
| 809 | /* |
| 810 | * Re-check for housekeeping CPUs. This allows the overflow handler to |
| 811 | * move off a nohz_full CPU quickly. |
| 812 | */ |
| 813 | d->mbm_work_cpu = cpumask_any_housekeeping(mask: &d->hdr.cpu_mask, |
| 814 | RESCTRL_PICK_ANY_CPU); |
| 815 | schedule_delayed_work_on(cpu: d->mbm_work_cpu, dwork: &d->mbm_over, delay); |
| 816 | |
| 817 | out_unlock: |
| 818 | mutex_unlock(lock: &rdtgroup_mutex); |
| 819 | cpus_read_unlock(); |
| 820 | } |
| 821 | |
| 822 | /** |
| 823 | * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this |
| 824 | * domain. |
| 825 | * @dom: The domain the overflow handler should run for. |
| 826 | * @delay_ms: How far in the future the handler should run. |
| 827 | * @exclude_cpu: Which CPU the handler should not run on, |
| 828 | * RESCTRL_PICK_ANY_CPU to pick any CPU. |
| 829 | */ |
| 830 | void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, |
| 831 | int exclude_cpu) |
| 832 | { |
| 833 | unsigned long delay = msecs_to_jiffies(m: delay_ms); |
| 834 | int cpu; |
| 835 | |
| 836 | /* |
| 837 | * When a domain comes online there is no guarantee the filesystem is |
| 838 | * mounted. If not, there is no need to catch counter overflow. |
| 839 | */ |
| 840 | if (!resctrl_mounted || !resctrl_arch_mon_capable()) |
| 841 | return; |
| 842 | cpu = cpumask_any_housekeeping(mask: &dom->hdr.cpu_mask, exclude_cpu); |
| 843 | dom->mbm_work_cpu = cpu; |
| 844 | |
| 845 | if (cpu < nr_cpu_ids) |
| 846 | schedule_delayed_work_on(cpu, dwork: &dom->mbm_over, delay); |
| 847 | } |
| 848 | |
| 849 | static int dom_data_init(struct rdt_resource *r) |
| 850 | { |
| 851 | u32 idx_limit = resctrl_arch_system_num_rmid_idx(); |
| 852 | u32 num_closid = resctrl_arch_get_num_closid(r); |
| 853 | struct rmid_entry *entry = NULL; |
| 854 | int err = 0, i; |
| 855 | u32 idx; |
| 856 | |
| 857 | mutex_lock(&rdtgroup_mutex); |
| 858 | if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { |
| 859 | u32 *tmp; |
| 860 | |
| 861 | /* |
| 862 | * If the architecture hasn't provided a sanitised value here, |
| 863 | * this may result in larger arrays than necessary. Resctrl will |
| 864 | * use a smaller system wide value based on the resources in |
| 865 | * use. |
| 866 | */ |
| 867 | tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL); |
| 868 | if (!tmp) { |
| 869 | err = -ENOMEM; |
| 870 | goto out_unlock; |
| 871 | } |
| 872 | |
| 873 | closid_num_dirty_rmid = tmp; |
| 874 | } |
| 875 | |
| 876 | rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL); |
| 877 | if (!rmid_ptrs) { |
| 878 | if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { |
| 879 | kfree(objp: closid_num_dirty_rmid); |
| 880 | closid_num_dirty_rmid = NULL; |
| 881 | } |
| 882 | err = -ENOMEM; |
| 883 | goto out_unlock; |
| 884 | } |
| 885 | |
| 886 | for (i = 0; i < idx_limit; i++) { |
| 887 | entry = &rmid_ptrs[i]; |
| 888 | INIT_LIST_HEAD(list: &entry->list); |
| 889 | |
| 890 | resctrl_arch_rmid_idx_decode(idx: i, closid: &entry->closid, rmid: &entry->rmid); |
| 891 | list_add_tail(new: &entry->list, head: &rmid_free_lru); |
| 892 | } |
| 893 | |
| 894 | /* |
| 895 | * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and |
| 896 | * are always allocated. These are used for the rdtgroup_default |
| 897 | * control group, which will be setup later in resctrl_init(). |
| 898 | */ |
| 899 | idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, |
| 900 | RESCTRL_RESERVED_RMID); |
| 901 | entry = __rmid_entry(idx); |
| 902 | list_del(entry: &entry->list); |
| 903 | |
| 904 | out_unlock: |
| 905 | mutex_unlock(lock: &rdtgroup_mutex); |
| 906 | |
| 907 | return err; |
| 908 | } |
| 909 | |
| 910 | static void dom_data_exit(struct rdt_resource *r) |
| 911 | { |
| 912 | mutex_lock(&rdtgroup_mutex); |
| 913 | |
| 914 | if (!r->mon_capable) |
| 915 | goto out_unlock; |
| 916 | |
| 917 | if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { |
| 918 | kfree(objp: closid_num_dirty_rmid); |
| 919 | closid_num_dirty_rmid = NULL; |
| 920 | } |
| 921 | |
| 922 | kfree(objp: rmid_ptrs); |
| 923 | rmid_ptrs = NULL; |
| 924 | |
| 925 | out_unlock: |
| 926 | mutex_unlock(lock: &rdtgroup_mutex); |
| 927 | } |
| 928 | |
| 929 | /* |
| 930 | * All available events. Architecture code marks the ones that |
| 931 | * are supported by a system using resctrl_enable_mon_event() |
| 932 | * to set .enabled. |
| 933 | */ |
| 934 | struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { |
| 935 | [QOS_L3_OCCUP_EVENT_ID] = { |
| 936 | .name = "llc_occupancy" , |
| 937 | .evtid = QOS_L3_OCCUP_EVENT_ID, |
| 938 | .rid = RDT_RESOURCE_L3, |
| 939 | }, |
| 940 | [QOS_L3_MBM_TOTAL_EVENT_ID] = { |
| 941 | .name = "mbm_total_bytes" , |
| 942 | .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, |
| 943 | .rid = RDT_RESOURCE_L3, |
| 944 | }, |
| 945 | [QOS_L3_MBM_LOCAL_EVENT_ID] = { |
| 946 | .name = "mbm_local_bytes" , |
| 947 | .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, |
| 948 | .rid = RDT_RESOURCE_L3, |
| 949 | }, |
| 950 | }; |
| 951 | |
| 952 | void resctrl_enable_mon_event(enum resctrl_event_id eventid) |
| 953 | { |
| 954 | if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS)) |
| 955 | return; |
| 956 | if (mon_event_all[eventid].enabled) { |
| 957 | pr_warn("Duplicate enable for event %d\n" , eventid); |
| 958 | return; |
| 959 | } |
| 960 | |
| 961 | mon_event_all[eventid].enabled = true; |
| 962 | } |
| 963 | |
| 964 | bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid) |
| 965 | { |
| 966 | return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS && |
| 967 | mon_event_all[eventid].enabled; |
| 968 | } |
| 969 | |
| 970 | u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid) |
| 971 | { |
| 972 | return mon_event_all[evtid].evt_cfg; |
| 973 | } |
| 974 | |
| 975 | /** |
| 976 | * struct mbm_transaction - Memory transaction an MBM event can be configured with. |
| 977 | * @name: Name of memory transaction (read, write ...). |
| 978 | * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to |
| 979 | * represent the memory transaction within an event's configuration. |
| 980 | */ |
| 981 | struct mbm_transaction { |
| 982 | char name[32]; |
| 983 | u32 val; |
| 984 | }; |
| 985 | |
| 986 | /* Decoded values for each type of memory transaction. */ |
| 987 | static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = { |
| 988 | {"local_reads" , READS_TO_LOCAL_MEM}, |
| 989 | {"remote_reads" , READS_TO_REMOTE_MEM}, |
| 990 | {"local_non_temporal_writes" , NON_TEMP_WRITE_TO_LOCAL_MEM}, |
| 991 | {"remote_non_temporal_writes" , NON_TEMP_WRITE_TO_REMOTE_MEM}, |
| 992 | {"local_reads_slow_memory" , READS_TO_LOCAL_S_MEM}, |
| 993 | {"remote_reads_slow_memory" , READS_TO_REMOTE_S_MEM}, |
| 994 | {"dirty_victim_writes_all" , DIRTY_VICTIMS_TO_ALL_MEM}, |
| 995 | }; |
| 996 | |
| 997 | int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) |
| 998 | { |
| 999 | struct mon_evt *mevt = rdt_kn_parent_priv(kn: of->kn); |
| 1000 | struct rdt_resource *r; |
| 1001 | bool sep = false; |
| 1002 | int ret = 0, i; |
| 1003 | |
| 1004 | mutex_lock(&rdtgroup_mutex); |
| 1005 | rdt_last_cmd_clear(); |
| 1006 | |
| 1007 | r = resctrl_arch_get_resource(l: mevt->rid); |
| 1008 | if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1009 | rdt_last_cmd_puts(s: "mbm_event counter assignment mode is not enabled\n" ); |
| 1010 | ret = -EINVAL; |
| 1011 | goto out_unlock; |
| 1012 | } |
| 1013 | |
| 1014 | for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) { |
| 1015 | if (mevt->evt_cfg & mbm_transactions[i].val) { |
| 1016 | if (sep) |
| 1017 | seq_putc(m: seq, c: ','); |
| 1018 | seq_printf(m: seq, fmt: "%s" , mbm_transactions[i].name); |
| 1019 | sep = true; |
| 1020 | } |
| 1021 | } |
| 1022 | seq_putc(m: seq, c: '\n'); |
| 1023 | |
| 1024 | out_unlock: |
| 1025 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1026 | |
| 1027 | return ret; |
| 1028 | } |
| 1029 | |
| 1030 | int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s, |
| 1031 | void *v) |
| 1032 | { |
| 1033 | struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn); |
| 1034 | int ret = 0; |
| 1035 | |
| 1036 | mutex_lock(&rdtgroup_mutex); |
| 1037 | rdt_last_cmd_clear(); |
| 1038 | |
| 1039 | if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1040 | rdt_last_cmd_puts(s: "mbm_event counter assignment mode is not enabled\n" ); |
| 1041 | ret = -EINVAL; |
| 1042 | goto out_unlock; |
| 1043 | } |
| 1044 | |
| 1045 | seq_printf(m: s, fmt: "%u\n" , r->mon.mbm_assign_on_mkdir); |
| 1046 | |
| 1047 | out_unlock: |
| 1048 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1049 | |
| 1050 | return ret; |
| 1051 | } |
| 1052 | |
| 1053 | ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf, |
| 1054 | size_t nbytes, loff_t off) |
| 1055 | { |
| 1056 | struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn); |
| 1057 | bool value; |
| 1058 | int ret; |
| 1059 | |
| 1060 | ret = kstrtobool(s: buf, res: &value); |
| 1061 | if (ret) |
| 1062 | return ret; |
| 1063 | |
| 1064 | mutex_lock(&rdtgroup_mutex); |
| 1065 | rdt_last_cmd_clear(); |
| 1066 | |
| 1067 | if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1068 | rdt_last_cmd_puts(s: "mbm_event counter assignment mode is not enabled\n" ); |
| 1069 | ret = -EINVAL; |
| 1070 | goto out_unlock; |
| 1071 | } |
| 1072 | |
| 1073 | r->mon.mbm_assign_on_mkdir = value; |
| 1074 | |
| 1075 | out_unlock: |
| 1076 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1077 | |
| 1078 | return ret ?: nbytes; |
| 1079 | } |
| 1080 | |
| 1081 | /* |
| 1082 | * mbm_cntr_free_all() - Clear all the counter ID configuration details in the |
| 1083 | * domain @d. Called when mbm_assign_mode is changed. |
| 1084 | */ |
| 1085 | static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d) |
| 1086 | { |
| 1087 | memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs); |
| 1088 | } |
| 1089 | |
| 1090 | /* |
| 1091 | * resctrl_reset_rmid_all() - Reset all non-architecture states for all the |
| 1092 | * supported RMIDs. |
| 1093 | */ |
| 1094 | static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) |
| 1095 | { |
| 1096 | u32 idx_limit = resctrl_arch_system_num_rmid_idx(); |
| 1097 | enum resctrl_event_id evt; |
| 1098 | int idx; |
| 1099 | |
| 1100 | for_each_mbm_event_id(evt) { |
| 1101 | if (!resctrl_is_mon_event_enabled(eventid: evt)) |
| 1102 | continue; |
| 1103 | idx = MBM_STATE_IDX(evt); |
| 1104 | memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit); |
| 1105 | } |
| 1106 | } |
| 1107 | |
| 1108 | /* |
| 1109 | * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID |
| 1110 | * pair in the domain. |
| 1111 | * |
| 1112 | * Assign the counter if @assign is true else unassign the counter. Reset the |
| 1113 | * associated non-architectural state. |
| 1114 | */ |
| 1115 | static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, |
| 1116 | enum resctrl_event_id evtid, u32 rmid, u32 closid, |
| 1117 | u32 cntr_id, bool assign) |
| 1118 | { |
| 1119 | struct mbm_state *m; |
| 1120 | |
| 1121 | resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign); |
| 1122 | |
| 1123 | m = get_mbm_state(d, closid, rmid, evtid); |
| 1124 | if (m) |
| 1125 | memset(m, 0, sizeof(*m)); |
| 1126 | } |
| 1127 | |
| 1128 | /* |
| 1129 | * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event |
| 1130 | * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d. |
| 1131 | * |
| 1132 | * Return: |
| 1133 | * 0 on success, < 0 on failure. |
| 1134 | */ |
| 1135 | static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, |
| 1136 | struct rdtgroup *rdtgrp, struct mon_evt *mevt) |
| 1137 | { |
| 1138 | int cntr_id; |
| 1139 | |
| 1140 | /* No action required if the counter is assigned already. */ |
| 1141 | cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid: mevt->evtid); |
| 1142 | if (cntr_id >= 0) |
| 1143 | return 0; |
| 1144 | |
| 1145 | cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid: mevt->evtid); |
| 1146 | if (cntr_id < 0) { |
| 1147 | rdt_last_cmd_printf(fmt: "Failed to allocate counter for %s in domain %d\n" , |
| 1148 | mevt->name, d->hdr.id); |
| 1149 | return cntr_id; |
| 1150 | } |
| 1151 | |
| 1152 | rdtgroup_assign_cntr(r, d, evtid: mevt->evtid, rmid: rdtgrp->mon.rmid, closid: rdtgrp->closid, cntr_id, assign: true); |
| 1153 | |
| 1154 | return 0; |
| 1155 | } |
| 1156 | |
| 1157 | /* |
| 1158 | * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in |
| 1159 | * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is |
| 1160 | * NULL; otherwise, assign the counter to the specified domain @d. |
| 1161 | * |
| 1162 | * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr() |
| 1163 | * will fail. The assignment process will abort at the first failure encountered |
| 1164 | * during domain traversal, which may result in the event being only partially |
| 1165 | * assigned. |
| 1166 | * |
| 1167 | * Return: |
| 1168 | * 0 on success, < 0 on failure. |
| 1169 | */ |
| 1170 | static int rdtgroup_assign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, |
| 1171 | struct mon_evt *mevt) |
| 1172 | { |
| 1173 | struct rdt_resource *r = resctrl_arch_get_resource(l: mevt->rid); |
| 1174 | int ret = 0; |
| 1175 | |
| 1176 | if (!d) { |
| 1177 | list_for_each_entry(d, &r->mon_domains, hdr.list) { |
| 1178 | ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt); |
| 1179 | if (ret) |
| 1180 | return ret; |
| 1181 | } |
| 1182 | } else { |
| 1183 | ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt); |
| 1184 | } |
| 1185 | |
| 1186 | return ret; |
| 1187 | } |
| 1188 | |
| 1189 | /* |
| 1190 | * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when |
| 1191 | * a new group is created. |
| 1192 | * |
| 1193 | * Each group can accommodate two counters per domain: one for the total |
| 1194 | * event and one for the local event. Assignments may fail due to the limited |
| 1195 | * number of counters. However, it is not necessary to fail the group creation |
| 1196 | * and thus no failure is returned. Users have the option to modify the |
| 1197 | * counter assignments after the group has been created. |
| 1198 | */ |
| 1199 | void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp) |
| 1200 | { |
| 1201 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 1202 | |
| 1203 | if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) || |
| 1204 | !r->mon.mbm_assign_on_mkdir) |
| 1205 | return; |
| 1206 | |
| 1207 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_TOTAL_EVENT_ID)) |
| 1208 | rdtgroup_assign_cntr_event(NULL, rdtgrp, |
| 1209 | mevt: &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]); |
| 1210 | |
| 1211 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_LOCAL_EVENT_ID)) |
| 1212 | rdtgroup_assign_cntr_event(NULL, rdtgrp, |
| 1213 | mevt: &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); |
| 1214 | } |
| 1215 | |
| 1216 | /* |
| 1217 | * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration |
| 1218 | * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp. |
| 1219 | */ |
| 1220 | static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, |
| 1221 | struct rdtgroup *rdtgrp, struct mon_evt *mevt) |
| 1222 | { |
| 1223 | int cntr_id; |
| 1224 | |
| 1225 | cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid: mevt->evtid); |
| 1226 | |
| 1227 | /* If there is no cntr_id assigned, nothing to do */ |
| 1228 | if (cntr_id < 0) |
| 1229 | return; |
| 1230 | |
| 1231 | rdtgroup_assign_cntr(r, d, evtid: mevt->evtid, rmid: rdtgrp->mon.rmid, closid: rdtgrp->closid, cntr_id, assign: false); |
| 1232 | |
| 1233 | mbm_cntr_free(d, cntr_id); |
| 1234 | } |
| 1235 | |
| 1236 | /* |
| 1237 | * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with |
| 1238 | * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign |
| 1239 | * the counters from all the domains if @d is NULL else unassign from @d. |
| 1240 | */ |
| 1241 | static void rdtgroup_unassign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, |
| 1242 | struct mon_evt *mevt) |
| 1243 | { |
| 1244 | struct rdt_resource *r = resctrl_arch_get_resource(l: mevt->rid); |
| 1245 | |
| 1246 | if (!d) { |
| 1247 | list_for_each_entry(d, &r->mon_domains, hdr.list) |
| 1248 | rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt); |
| 1249 | } else { |
| 1250 | rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt); |
| 1251 | } |
| 1252 | } |
| 1253 | |
| 1254 | /* |
| 1255 | * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events. |
| 1256 | * Called when a group is deleted. |
| 1257 | */ |
| 1258 | void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp) |
| 1259 | { |
| 1260 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 1261 | |
| 1262 | if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r)) |
| 1263 | return; |
| 1264 | |
| 1265 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_TOTAL_EVENT_ID)) |
| 1266 | rdtgroup_unassign_cntr_event(NULL, rdtgrp, |
| 1267 | mevt: &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]); |
| 1268 | |
| 1269 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_LOCAL_EVENT_ID)) |
| 1270 | rdtgroup_unassign_cntr_event(NULL, rdtgrp, |
| 1271 | mevt: &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); |
| 1272 | } |
| 1273 | |
| 1274 | static int resctrl_parse_mem_transactions(char *tok, u32 *val) |
| 1275 | { |
| 1276 | u32 temp_val = 0; |
| 1277 | char *evt_str; |
| 1278 | bool found; |
| 1279 | int i; |
| 1280 | |
| 1281 | next_config: |
| 1282 | if (!tok || tok[0] == '\0') { |
| 1283 | *val = temp_val; |
| 1284 | return 0; |
| 1285 | } |
| 1286 | |
| 1287 | /* Start processing the strings for each memory transaction type */ |
| 1288 | evt_str = strim(strsep(&tok, "," )); |
| 1289 | found = false; |
| 1290 | for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) { |
| 1291 | if (!strcmp(mbm_transactions[i].name, evt_str)) { |
| 1292 | temp_val |= mbm_transactions[i].val; |
| 1293 | found = true; |
| 1294 | break; |
| 1295 | } |
| 1296 | } |
| 1297 | |
| 1298 | if (!found) { |
| 1299 | rdt_last_cmd_printf(fmt: "Invalid memory transaction type %s\n" , evt_str); |
| 1300 | return -EINVAL; |
| 1301 | } |
| 1302 | |
| 1303 | goto next_config; |
| 1304 | } |
| 1305 | |
| 1306 | /* |
| 1307 | * rdtgroup_update_cntr_event - Update the counter assignments for the event |
| 1308 | * in a group. |
| 1309 | * @r: Resource to which update needs to be done. |
| 1310 | * @rdtgrp: Resctrl group. |
| 1311 | * @evtid: MBM monitor event. |
| 1312 | */ |
| 1313 | static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp, |
| 1314 | enum resctrl_event_id evtid) |
| 1315 | { |
| 1316 | struct rdt_mon_domain *d; |
| 1317 | int cntr_id; |
| 1318 | |
| 1319 | list_for_each_entry(d, &r->mon_domains, hdr.list) { |
| 1320 | cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid); |
| 1321 | if (cntr_id >= 0) |
| 1322 | rdtgroup_assign_cntr(r, d, evtid, rmid: rdtgrp->mon.rmid, |
| 1323 | closid: rdtgrp->closid, cntr_id, assign: true); |
| 1324 | } |
| 1325 | } |
| 1326 | |
| 1327 | /* |
| 1328 | * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event |
| 1329 | * for all the groups. |
| 1330 | * @mevt MBM Monitor event. |
| 1331 | */ |
| 1332 | static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt) |
| 1333 | { |
| 1334 | struct rdt_resource *r = resctrl_arch_get_resource(l: mevt->rid); |
| 1335 | struct rdtgroup *prgrp, *crgrp; |
| 1336 | |
| 1337 | /* |
| 1338 | * Find all the groups where the event is assigned and update the |
| 1339 | * configuration of existing assignments. |
| 1340 | */ |
| 1341 | list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { |
| 1342 | rdtgroup_update_cntr_event(r, rdtgrp: prgrp, evtid: mevt->evtid); |
| 1343 | |
| 1344 | list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) |
| 1345 | rdtgroup_update_cntr_event(r, rdtgrp: crgrp, evtid: mevt->evtid); |
| 1346 | } |
| 1347 | } |
| 1348 | |
| 1349 | ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes, |
| 1350 | loff_t off) |
| 1351 | { |
| 1352 | struct mon_evt *mevt = rdt_kn_parent_priv(kn: of->kn); |
| 1353 | struct rdt_resource *r; |
| 1354 | u32 evt_cfg = 0; |
| 1355 | int ret = 0; |
| 1356 | |
| 1357 | /* Valid input requires a trailing newline */ |
| 1358 | if (nbytes == 0 || buf[nbytes - 1] != '\n') |
| 1359 | return -EINVAL; |
| 1360 | |
| 1361 | buf[nbytes - 1] = '\0'; |
| 1362 | |
| 1363 | cpus_read_lock(); |
| 1364 | mutex_lock(&rdtgroup_mutex); |
| 1365 | |
| 1366 | rdt_last_cmd_clear(); |
| 1367 | |
| 1368 | r = resctrl_arch_get_resource(l: mevt->rid); |
| 1369 | if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1370 | rdt_last_cmd_puts(s: "mbm_event counter assignment mode is not enabled\n" ); |
| 1371 | ret = -EINVAL; |
| 1372 | goto out_unlock; |
| 1373 | } |
| 1374 | |
| 1375 | ret = resctrl_parse_mem_transactions(tok: buf, val: &evt_cfg); |
| 1376 | if (!ret && mevt->evt_cfg != evt_cfg) { |
| 1377 | mevt->evt_cfg = evt_cfg; |
| 1378 | resctrl_update_cntr_allrdtgrp(mevt); |
| 1379 | } |
| 1380 | |
| 1381 | out_unlock: |
| 1382 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1383 | cpus_read_unlock(); |
| 1384 | |
| 1385 | return ret ?: nbytes; |
| 1386 | } |
| 1387 | |
| 1388 | int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, |
| 1389 | struct seq_file *s, void *v) |
| 1390 | { |
| 1391 | struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn); |
| 1392 | bool enabled; |
| 1393 | |
| 1394 | mutex_lock(&rdtgroup_mutex); |
| 1395 | enabled = resctrl_arch_mbm_cntr_assign_enabled(r); |
| 1396 | |
| 1397 | if (r->mon.mbm_cntr_assignable) { |
| 1398 | if (enabled) |
| 1399 | seq_puts(m: s, s: "[mbm_event]\n" ); |
| 1400 | else |
| 1401 | seq_puts(m: s, s: "[default]\n" ); |
| 1402 | |
| 1403 | if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) { |
| 1404 | if (enabled) |
| 1405 | seq_puts(m: s, s: "default\n" ); |
| 1406 | else |
| 1407 | seq_puts(m: s, s: "mbm_event\n" ); |
| 1408 | } |
| 1409 | } else { |
| 1410 | seq_puts(m: s, s: "[default]\n" ); |
| 1411 | } |
| 1412 | |
| 1413 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1414 | |
| 1415 | return 0; |
| 1416 | } |
| 1417 | |
| 1418 | ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf, |
| 1419 | size_t nbytes, loff_t off) |
| 1420 | { |
| 1421 | struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn); |
| 1422 | struct rdt_mon_domain *d; |
| 1423 | int ret = 0; |
| 1424 | bool enable; |
| 1425 | |
| 1426 | /* Valid input requires a trailing newline */ |
| 1427 | if (nbytes == 0 || buf[nbytes - 1] != '\n') |
| 1428 | return -EINVAL; |
| 1429 | |
| 1430 | buf[nbytes - 1] = '\0'; |
| 1431 | |
| 1432 | cpus_read_lock(); |
| 1433 | mutex_lock(&rdtgroup_mutex); |
| 1434 | |
| 1435 | rdt_last_cmd_clear(); |
| 1436 | |
| 1437 | if (!strcmp(buf, "default" )) { |
| 1438 | enable = 0; |
| 1439 | } else if (!strcmp(buf, "mbm_event" )) { |
| 1440 | if (r->mon.mbm_cntr_assignable) { |
| 1441 | enable = 1; |
| 1442 | } else { |
| 1443 | ret = -EINVAL; |
| 1444 | rdt_last_cmd_puts(s: "mbm_event mode is not supported\n" ); |
| 1445 | goto out_unlock; |
| 1446 | } |
| 1447 | } else { |
| 1448 | ret = -EINVAL; |
| 1449 | rdt_last_cmd_puts(s: "Unsupported assign mode\n" ); |
| 1450 | goto out_unlock; |
| 1451 | } |
| 1452 | |
| 1453 | if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1454 | ret = resctrl_arch_mbm_cntr_assign_set(r, enable); |
| 1455 | if (ret) |
| 1456 | goto out_unlock; |
| 1457 | |
| 1458 | /* Update the visibility of BMEC related files */ |
| 1459 | resctrl_bmec_files_show(r, NULL, show: !enable); |
| 1460 | |
| 1461 | /* |
| 1462 | * Initialize the default memory transaction values for |
| 1463 | * total and local events. |
| 1464 | */ |
| 1465 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_TOTAL_EVENT_ID)) |
| 1466 | mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; |
| 1467 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_LOCAL_EVENT_ID)) |
| 1468 | mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & |
| 1469 | (READS_TO_LOCAL_MEM | |
| 1470 | READS_TO_LOCAL_S_MEM | |
| 1471 | NON_TEMP_WRITE_TO_LOCAL_MEM); |
| 1472 | /* Enable auto assignment when switching to "mbm_event" mode */ |
| 1473 | if (enable) |
| 1474 | r->mon.mbm_assign_on_mkdir = true; |
| 1475 | /* |
| 1476 | * Reset all the non-achitectural RMID state and assignable counters. |
| 1477 | */ |
| 1478 | list_for_each_entry(d, &r->mon_domains, hdr.list) { |
| 1479 | mbm_cntr_free_all(r, d); |
| 1480 | resctrl_reset_rmid_all(r, d); |
| 1481 | } |
| 1482 | } |
| 1483 | |
| 1484 | out_unlock: |
| 1485 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1486 | cpus_read_unlock(); |
| 1487 | |
| 1488 | return ret ?: nbytes; |
| 1489 | } |
| 1490 | |
| 1491 | int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, |
| 1492 | struct seq_file *s, void *v) |
| 1493 | { |
| 1494 | struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn); |
| 1495 | struct rdt_mon_domain *dom; |
| 1496 | bool sep = false; |
| 1497 | |
| 1498 | cpus_read_lock(); |
| 1499 | mutex_lock(&rdtgroup_mutex); |
| 1500 | |
| 1501 | list_for_each_entry(dom, &r->mon_domains, hdr.list) { |
| 1502 | if (sep) |
| 1503 | seq_putc(m: s, c: ';'); |
| 1504 | |
| 1505 | seq_printf(m: s, fmt: "%d=%d" , dom->hdr.id, r->mon.num_mbm_cntrs); |
| 1506 | sep = true; |
| 1507 | } |
| 1508 | seq_putc(m: s, c: '\n'); |
| 1509 | |
| 1510 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1511 | cpus_read_unlock(); |
| 1512 | return 0; |
| 1513 | } |
| 1514 | |
| 1515 | int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, |
| 1516 | struct seq_file *s, void *v) |
| 1517 | { |
| 1518 | struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn); |
| 1519 | struct rdt_mon_domain *dom; |
| 1520 | bool sep = false; |
| 1521 | u32 cntrs, i; |
| 1522 | int ret = 0; |
| 1523 | |
| 1524 | cpus_read_lock(); |
| 1525 | mutex_lock(&rdtgroup_mutex); |
| 1526 | |
| 1527 | rdt_last_cmd_clear(); |
| 1528 | |
| 1529 | if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1530 | rdt_last_cmd_puts(s: "mbm_event counter assignment mode is not enabled\n" ); |
| 1531 | ret = -EINVAL; |
| 1532 | goto out_unlock; |
| 1533 | } |
| 1534 | |
| 1535 | list_for_each_entry(dom, &r->mon_domains, hdr.list) { |
| 1536 | if (sep) |
| 1537 | seq_putc(m: s, c: ';'); |
| 1538 | |
| 1539 | cntrs = 0; |
| 1540 | for (i = 0; i < r->mon.num_mbm_cntrs; i++) { |
| 1541 | if (!dom->cntr_cfg[i].rdtgrp) |
| 1542 | cntrs++; |
| 1543 | } |
| 1544 | |
| 1545 | seq_printf(m: s, fmt: "%d=%u" , dom->hdr.id, cntrs); |
| 1546 | sep = true; |
| 1547 | } |
| 1548 | seq_putc(m: s, c: '\n'); |
| 1549 | |
| 1550 | out_unlock: |
| 1551 | mutex_unlock(lock: &rdtgroup_mutex); |
| 1552 | cpus_read_unlock(); |
| 1553 | |
| 1554 | return ret; |
| 1555 | } |
| 1556 | |
| 1557 | int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v) |
| 1558 | { |
| 1559 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 1560 | struct rdt_mon_domain *d; |
| 1561 | struct rdtgroup *rdtgrp; |
| 1562 | struct mon_evt *mevt; |
| 1563 | int ret = 0; |
| 1564 | bool sep; |
| 1565 | |
| 1566 | rdtgrp = rdtgroup_kn_lock_live(kn: of->kn); |
| 1567 | if (!rdtgrp) { |
| 1568 | ret = -ENOENT; |
| 1569 | goto out_unlock; |
| 1570 | } |
| 1571 | |
| 1572 | rdt_last_cmd_clear(); |
| 1573 | if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1574 | rdt_last_cmd_puts(s: "mbm_event counter assignment mode is not enabled\n" ); |
| 1575 | ret = -EINVAL; |
| 1576 | goto out_unlock; |
| 1577 | } |
| 1578 | |
| 1579 | for_each_mon_event(mevt) { |
| 1580 | if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(eventid: mevt->evtid)) |
| 1581 | continue; |
| 1582 | |
| 1583 | sep = false; |
| 1584 | seq_printf(m: s, fmt: "%s:" , mevt->name); |
| 1585 | list_for_each_entry(d, &r->mon_domains, hdr.list) { |
| 1586 | if (sep) |
| 1587 | seq_putc(m: s, c: ';'); |
| 1588 | |
| 1589 | if (mbm_cntr_get(r, d, rdtgrp, evtid: mevt->evtid) < 0) |
| 1590 | seq_printf(m: s, fmt: "%d=_" , d->hdr.id); |
| 1591 | else |
| 1592 | seq_printf(m: s, fmt: "%d=e" , d->hdr.id); |
| 1593 | |
| 1594 | sep = true; |
| 1595 | } |
| 1596 | seq_putc(m: s, c: '\n'); |
| 1597 | } |
| 1598 | |
| 1599 | out_unlock: |
| 1600 | rdtgroup_kn_unlock(kn: of->kn); |
| 1601 | |
| 1602 | return ret; |
| 1603 | } |
| 1604 | |
| 1605 | /* |
| 1606 | * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching |
| 1607 | * event name. |
| 1608 | */ |
| 1609 | static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name) |
| 1610 | { |
| 1611 | struct mon_evt *mevt; |
| 1612 | |
| 1613 | for_each_mon_event(mevt) { |
| 1614 | if (mevt->rid == r->rid && mevt->enabled && |
| 1615 | resctrl_is_mbm_event(eventid: mevt->evtid) && |
| 1616 | !strcmp(mevt->name, name)) |
| 1617 | return mevt; |
| 1618 | } |
| 1619 | |
| 1620 | return NULL; |
| 1621 | } |
| 1622 | |
| 1623 | static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d, |
| 1624 | struct rdtgroup *rdtgrp, struct mon_evt *mevt) |
| 1625 | { |
| 1626 | int ret = 0; |
| 1627 | |
| 1628 | if (!assign || strlen(assign) != 1) |
| 1629 | return -EINVAL; |
| 1630 | |
| 1631 | switch (*assign) { |
| 1632 | case 'e': |
| 1633 | ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt); |
| 1634 | break; |
| 1635 | case '_': |
| 1636 | rdtgroup_unassign_cntr_event(d, rdtgrp, mevt); |
| 1637 | break; |
| 1638 | default: |
| 1639 | ret = -EINVAL; |
| 1640 | break; |
| 1641 | } |
| 1642 | |
| 1643 | return ret; |
| 1644 | } |
| 1645 | |
| 1646 | static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp, |
| 1647 | char *event, char *tok) |
| 1648 | { |
| 1649 | struct rdt_mon_domain *d; |
| 1650 | unsigned long dom_id = 0; |
| 1651 | char *dom_str, *id_str; |
| 1652 | struct mon_evt *mevt; |
| 1653 | int ret; |
| 1654 | |
| 1655 | mevt = mbm_get_mon_event_by_name(r, name: event); |
| 1656 | if (!mevt) { |
| 1657 | rdt_last_cmd_printf(fmt: "Invalid event %s\n" , event); |
| 1658 | return -ENOENT; |
| 1659 | } |
| 1660 | |
| 1661 | next: |
| 1662 | if (!tok || tok[0] == '\0') |
| 1663 | return 0; |
| 1664 | |
| 1665 | /* Start processing the strings for each domain */ |
| 1666 | dom_str = strim(strsep(&tok, ";" )); |
| 1667 | |
| 1668 | id_str = strsep(&dom_str, "=" ); |
| 1669 | |
| 1670 | /* Check for domain id '*' which means all domains */ |
| 1671 | if (id_str && *id_str == '*') { |
| 1672 | ret = rdtgroup_modify_assign_state(assign: dom_str, NULL, rdtgrp, mevt); |
| 1673 | if (ret) |
| 1674 | rdt_last_cmd_printf(fmt: "Assign operation '%s:*=%s' failed\n" , |
| 1675 | event, dom_str); |
| 1676 | return ret; |
| 1677 | } else if (!id_str || kstrtoul(s: id_str, base: 10, res: &dom_id)) { |
| 1678 | rdt_last_cmd_puts(s: "Missing domain id\n" ); |
| 1679 | return -EINVAL; |
| 1680 | } |
| 1681 | |
| 1682 | /* Verify if the dom_id is valid */ |
| 1683 | list_for_each_entry(d, &r->mon_domains, hdr.list) { |
| 1684 | if (d->hdr.id == dom_id) { |
| 1685 | ret = rdtgroup_modify_assign_state(assign: dom_str, d, rdtgrp, mevt); |
| 1686 | if (ret) { |
| 1687 | rdt_last_cmd_printf(fmt: "Assign operation '%s:%ld=%s' failed\n" , |
| 1688 | event, dom_id, dom_str); |
| 1689 | return ret; |
| 1690 | } |
| 1691 | goto next; |
| 1692 | } |
| 1693 | } |
| 1694 | |
| 1695 | rdt_last_cmd_printf(fmt: "Invalid domain id %ld\n" , dom_id); |
| 1696 | return -EINVAL; |
| 1697 | } |
| 1698 | |
| 1699 | ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, |
| 1700 | size_t nbytes, loff_t off) |
| 1701 | { |
| 1702 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 1703 | struct rdtgroup *rdtgrp; |
| 1704 | char *token, *event; |
| 1705 | int ret = 0; |
| 1706 | |
| 1707 | /* Valid input requires a trailing newline */ |
| 1708 | if (nbytes == 0 || buf[nbytes - 1] != '\n') |
| 1709 | return -EINVAL; |
| 1710 | |
| 1711 | buf[nbytes - 1] = '\0'; |
| 1712 | |
| 1713 | rdtgrp = rdtgroup_kn_lock_live(kn: of->kn); |
| 1714 | if (!rdtgrp) { |
| 1715 | rdtgroup_kn_unlock(kn: of->kn); |
| 1716 | return -ENOENT; |
| 1717 | } |
| 1718 | rdt_last_cmd_clear(); |
| 1719 | |
| 1720 | if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { |
| 1721 | rdt_last_cmd_puts(s: "mbm_event mode is not enabled\n" ); |
| 1722 | rdtgroup_kn_unlock(kn: of->kn); |
| 1723 | return -EINVAL; |
| 1724 | } |
| 1725 | |
| 1726 | while ((token = strsep(&buf, "\n" )) != NULL) { |
| 1727 | /* |
| 1728 | * The write command follows the following format: |
| 1729 | * "<Event>:<Domain ID>=<Assignment state>" |
| 1730 | * Extract the event name first. |
| 1731 | */ |
| 1732 | event = strsep(&token, ":" ); |
| 1733 | |
| 1734 | ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, tok: token); |
| 1735 | if (ret) |
| 1736 | break; |
| 1737 | } |
| 1738 | |
| 1739 | rdtgroup_kn_unlock(kn: of->kn); |
| 1740 | |
| 1741 | return ret ?: nbytes; |
| 1742 | } |
| 1743 | |
| 1744 | /** |
| 1745 | * resctrl_mon_resource_init() - Initialise global monitoring structures. |
| 1746 | * |
| 1747 | * Allocate and initialise global monitor resources that do not belong to a |
| 1748 | * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists. |
| 1749 | * Called once during boot after the struct rdt_resource's have been configured |
| 1750 | * but before the filesystem is mounted. |
| 1751 | * Resctrl's cpuhp callbacks may be called before this point to bring a domain |
| 1752 | * online. |
| 1753 | * |
| 1754 | * Returns 0 for success, or -ENOMEM. |
| 1755 | */ |
| 1756 | int resctrl_mon_resource_init(void) |
| 1757 | { |
| 1758 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 1759 | int ret; |
| 1760 | |
| 1761 | if (!r->mon_capable) |
| 1762 | return 0; |
| 1763 | |
| 1764 | ret = dom_data_init(r); |
| 1765 | if (ret) |
| 1766 | return ret; |
| 1767 | |
| 1768 | if (resctrl_arch_is_evt_configurable(evt: QOS_L3_MBM_TOTAL_EVENT_ID)) { |
| 1769 | mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true; |
| 1770 | resctrl_file_fflags_init(config: "mbm_total_bytes_config" , |
| 1771 | RFTYPE_MON_INFO | RFTYPE_RES_CACHE); |
| 1772 | } |
| 1773 | if (resctrl_arch_is_evt_configurable(evt: QOS_L3_MBM_LOCAL_EVENT_ID)) { |
| 1774 | mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true; |
| 1775 | resctrl_file_fflags_init(config: "mbm_local_bytes_config" , |
| 1776 | RFTYPE_MON_INFO | RFTYPE_RES_CACHE); |
| 1777 | } |
| 1778 | |
| 1779 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_LOCAL_EVENT_ID)) |
| 1780 | mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; |
| 1781 | else if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_TOTAL_EVENT_ID)) |
| 1782 | mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; |
| 1783 | |
| 1784 | if (r->mon.mbm_cntr_assignable) { |
| 1785 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_TOTAL_EVENT_ID)) |
| 1786 | mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; |
| 1787 | if (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_LOCAL_EVENT_ID)) |
| 1788 | mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & |
| 1789 | (READS_TO_LOCAL_MEM | |
| 1790 | READS_TO_LOCAL_S_MEM | |
| 1791 | NON_TEMP_WRITE_TO_LOCAL_MEM); |
| 1792 | r->mon.mbm_assign_on_mkdir = true; |
| 1793 | resctrl_file_fflags_init(config: "num_mbm_cntrs" , |
| 1794 | RFTYPE_MON_INFO | RFTYPE_RES_CACHE); |
| 1795 | resctrl_file_fflags_init(config: "available_mbm_cntrs" , |
| 1796 | RFTYPE_MON_INFO | RFTYPE_RES_CACHE); |
| 1797 | resctrl_file_fflags_init(config: "event_filter" , RFTYPE_ASSIGN_CONFIG); |
| 1798 | resctrl_file_fflags_init(config: "mbm_assign_on_mkdir" , RFTYPE_MON_INFO | |
| 1799 | RFTYPE_RES_CACHE); |
| 1800 | resctrl_file_fflags_init(config: "mbm_L3_assignments" , RFTYPE_MON_BASE); |
| 1801 | } |
| 1802 | |
| 1803 | return 0; |
| 1804 | } |
| 1805 | |
| 1806 | void resctrl_mon_resource_exit(void) |
| 1807 | { |
| 1808 | struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3); |
| 1809 | |
| 1810 | dom_data_exit(r); |
| 1811 | } |
| 1812 | |