| 1 | /* |
| 2 | * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | * SOFTWARE. |
| 22 | * |
| 23 | * Authors: |
| 24 | * Anhua Xu |
| 25 | * Kevin Tian <kevin.tian@intel.com> |
| 26 | * |
| 27 | * Contributors: |
| 28 | * Min He <min.he@intel.com> |
| 29 | * Bing Niu <bing.niu@intel.com> |
| 30 | * Zhi Wang <zhi.a.wang@intel.com> |
| 31 | * |
| 32 | */ |
| 33 | |
| 34 | #include "i915_drv.h" |
| 35 | #include "gvt.h" |
| 36 | |
| 37 | static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) |
| 38 | { |
| 39 | enum intel_engine_id i; |
| 40 | struct intel_engine_cs *engine; |
| 41 | |
| 42 | for_each_engine(engine, vgpu->gvt->gt, i) { |
| 43 | if (!list_empty(workload_q_head(vgpu, engine))) |
| 44 | return true; |
| 45 | } |
| 46 | |
| 47 | return false; |
| 48 | } |
| 49 | |
| 50 | /* We give 2 seconds higher prio for vGPU during start */ |
| 51 | #define GVT_SCHED_VGPU_PRI_TIME 2 |
| 52 | |
| 53 | struct vgpu_sched_data { |
| 54 | struct list_head lru_list; |
| 55 | struct intel_vgpu *vgpu; |
| 56 | bool active; |
| 57 | bool pri_sched; |
| 58 | ktime_t pri_time; |
| 59 | ktime_t sched_in_time; |
| 60 | ktime_t sched_time; |
| 61 | ktime_t left_ts; |
| 62 | ktime_t allocated_ts; |
| 63 | |
| 64 | struct vgpu_sched_ctl sched_ctl; |
| 65 | }; |
| 66 | |
| 67 | struct gvt_sched_data { |
| 68 | struct intel_gvt *gvt; |
| 69 | struct hrtimer timer; |
| 70 | unsigned long period; |
| 71 | struct list_head lru_runq_head; |
| 72 | ktime_t expire_time; |
| 73 | }; |
| 74 | |
| 75 | static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time) |
| 76 | { |
| 77 | ktime_t delta_ts; |
| 78 | struct vgpu_sched_data *vgpu_data; |
| 79 | |
| 80 | if (!vgpu || vgpu == vgpu->gvt->idle_vgpu) |
| 81 | return; |
| 82 | |
| 83 | vgpu_data = vgpu->sched_data; |
| 84 | delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time); |
| 85 | vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts); |
| 86 | vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts); |
| 87 | vgpu_data->sched_in_time = cur_time; |
| 88 | } |
| 89 | |
| 90 | #define GVT_TS_BALANCE_PERIOD_MS 100 |
| 91 | #define GVT_TS_BALANCE_STAGE_NUM 10 |
| 92 | |
| 93 | static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) |
| 94 | { |
| 95 | struct vgpu_sched_data *vgpu_data; |
| 96 | struct list_head *pos; |
| 97 | static u64 stage_check; |
| 98 | int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM; |
| 99 | |
| 100 | /* The timeslice accumulation reset at stage 0, which is |
| 101 | * allocated again without adding previous debt. |
| 102 | */ |
| 103 | if (stage == 0) { |
| 104 | int total_weight = 0; |
| 105 | ktime_t fair_timeslice; |
| 106 | |
| 107 | list_for_each(pos, &sched_data->lru_runq_head) { |
| 108 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
| 109 | total_weight += vgpu_data->sched_ctl.weight; |
| 110 | } |
| 111 | |
| 112 | list_for_each(pos, &sched_data->lru_runq_head) { |
| 113 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
| 114 | fair_timeslice = ktime_divns(kt: ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS), |
| 115 | div: total_weight) * vgpu_data->sched_ctl.weight; |
| 116 | |
| 117 | vgpu_data->allocated_ts = fair_timeslice; |
| 118 | vgpu_data->left_ts = vgpu_data->allocated_ts; |
| 119 | } |
| 120 | } else { |
| 121 | list_for_each(pos, &sched_data->lru_runq_head) { |
| 122 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
| 123 | |
| 124 | /* timeslice for next 100ms should add the left/debt |
| 125 | * slice of previous stages. |
| 126 | */ |
| 127 | vgpu_data->left_ts += vgpu_data->allocated_ts; |
| 128 | } |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) |
| 133 | { |
| 134 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
| 135 | enum intel_engine_id i; |
| 136 | struct intel_engine_cs *engine; |
| 137 | struct vgpu_sched_data *vgpu_data; |
| 138 | ktime_t cur_time; |
| 139 | |
| 140 | /* no need to schedule if next_vgpu is the same with current_vgpu, |
| 141 | * let scheduler chose next_vgpu again by setting it to NULL. |
| 142 | */ |
| 143 | if (scheduler->next_vgpu == scheduler->current_vgpu) { |
| 144 | scheduler->next_vgpu = NULL; |
| 145 | return; |
| 146 | } |
| 147 | |
| 148 | /* |
| 149 | * after the flag is set, workload dispatch thread will |
| 150 | * stop dispatching workload for current vgpu |
| 151 | */ |
| 152 | scheduler->need_reschedule = true; |
| 153 | |
| 154 | /* still have uncompleted workload? */ |
| 155 | for_each_engine(engine, gvt->gt, i) { |
| 156 | if (scheduler->current_workload[engine->id]) |
| 157 | return; |
| 158 | } |
| 159 | |
| 160 | cur_time = ktime_get(); |
| 161 | vgpu_update_timeslice(vgpu: scheduler->current_vgpu, cur_time); |
| 162 | vgpu_data = scheduler->next_vgpu->sched_data; |
| 163 | vgpu_data->sched_in_time = cur_time; |
| 164 | |
| 165 | /* switch current vgpu */ |
| 166 | scheduler->current_vgpu = scheduler->next_vgpu; |
| 167 | scheduler->next_vgpu = NULL; |
| 168 | |
| 169 | scheduler->need_reschedule = false; |
| 170 | |
| 171 | /* wake up workload dispatch thread */ |
| 172 | for_each_engine(engine, gvt->gt, i) |
| 173 | wake_up(&scheduler->waitq[engine->id]); |
| 174 | } |
| 175 | |
| 176 | static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data) |
| 177 | { |
| 178 | struct vgpu_sched_data *vgpu_data; |
| 179 | struct intel_vgpu *vgpu = NULL; |
| 180 | struct list_head *head = &sched_data->lru_runq_head; |
| 181 | struct list_head *pos; |
| 182 | |
| 183 | /* search a vgpu with pending workload */ |
| 184 | list_for_each(pos, head) { |
| 185 | |
| 186 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
| 187 | if (!vgpu_has_pending_workload(vgpu: vgpu_data->vgpu)) |
| 188 | continue; |
| 189 | |
| 190 | if (vgpu_data->pri_sched) { |
| 191 | if (ktime_before(cmp1: ktime_get(), cmp2: vgpu_data->pri_time)) { |
| 192 | vgpu = vgpu_data->vgpu; |
| 193 | break; |
| 194 | } else |
| 195 | vgpu_data->pri_sched = false; |
| 196 | } |
| 197 | |
| 198 | /* Return the vGPU only if it has time slice left */ |
| 199 | if (vgpu_data->left_ts > 0) { |
| 200 | vgpu = vgpu_data->vgpu; |
| 201 | break; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | return vgpu; |
| 206 | } |
| 207 | |
| 208 | /* in nanosecond */ |
| 209 | #define GVT_DEFAULT_TIME_SLICE 1000000 |
| 210 | |
| 211 | static void tbs_sched_func(struct gvt_sched_data *sched_data) |
| 212 | { |
| 213 | struct intel_gvt *gvt = sched_data->gvt; |
| 214 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
| 215 | struct vgpu_sched_data *vgpu_data; |
| 216 | struct intel_vgpu *vgpu = NULL; |
| 217 | |
| 218 | /* no active vgpu or has already had a target */ |
| 219 | if (list_empty(head: &sched_data->lru_runq_head) || scheduler->next_vgpu) |
| 220 | goto out; |
| 221 | |
| 222 | vgpu = find_busy_vgpu(sched_data); |
| 223 | if (vgpu) { |
| 224 | scheduler->next_vgpu = vgpu; |
| 225 | vgpu_data = vgpu->sched_data; |
| 226 | if (!vgpu_data->pri_sched) { |
| 227 | /* Move the last used vGPU to the tail of lru_list */ |
| 228 | list_del_init(entry: &vgpu_data->lru_list); |
| 229 | list_add_tail(new: &vgpu_data->lru_list, |
| 230 | head: &sched_data->lru_runq_head); |
| 231 | } |
| 232 | } else { |
| 233 | scheduler->next_vgpu = gvt->idle_vgpu; |
| 234 | } |
| 235 | out: |
| 236 | if (scheduler->next_vgpu) |
| 237 | try_to_schedule_next_vgpu(gvt); |
| 238 | } |
| 239 | |
| 240 | void intel_gvt_schedule(struct intel_gvt *gvt) |
| 241 | { |
| 242 | struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; |
| 243 | ktime_t cur_time; |
| 244 | |
| 245 | mutex_lock(&gvt->sched_lock); |
| 246 | cur_time = ktime_get(); |
| 247 | |
| 248 | if (test_and_clear_bit(nr: INTEL_GVT_REQUEST_SCHED, |
| 249 | addr: (void *)&gvt->service_request)) { |
| 250 | if (cur_time >= sched_data->expire_time) { |
| 251 | gvt_balance_timeslice(sched_data); |
| 252 | sched_data->expire_time = ktime_add_ms( |
| 253 | kt: cur_time, GVT_TS_BALANCE_PERIOD_MS); |
| 254 | } |
| 255 | } |
| 256 | clear_bit(nr: INTEL_GVT_REQUEST_EVENT_SCHED, addr: (void *)&gvt->service_request); |
| 257 | |
| 258 | vgpu_update_timeslice(vgpu: gvt->scheduler.current_vgpu, cur_time); |
| 259 | tbs_sched_func(sched_data); |
| 260 | |
| 261 | mutex_unlock(lock: &gvt->sched_lock); |
| 262 | } |
| 263 | |
| 264 | static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data) |
| 265 | { |
| 266 | struct gvt_sched_data *data; |
| 267 | |
| 268 | data = container_of(timer_data, struct gvt_sched_data, timer); |
| 269 | |
| 270 | intel_gvt_request_service(gvt: data->gvt, service: INTEL_GVT_REQUEST_SCHED); |
| 271 | |
| 272 | hrtimer_add_expires_ns(timer: &data->timer, ns: data->period); |
| 273 | |
| 274 | return HRTIMER_RESTART; |
| 275 | } |
| 276 | |
| 277 | static int tbs_sched_init(struct intel_gvt *gvt) |
| 278 | { |
| 279 | struct intel_gvt_workload_scheduler *scheduler = |
| 280 | &gvt->scheduler; |
| 281 | |
| 282 | struct gvt_sched_data *data; |
| 283 | |
| 284 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
| 285 | if (!data) |
| 286 | return -ENOMEM; |
| 287 | |
| 288 | INIT_LIST_HEAD(list: &data->lru_runq_head); |
| 289 | hrtimer_setup(timer: &data->timer, function: tbs_timer_fn, CLOCK_MONOTONIC, mode: HRTIMER_MODE_ABS); |
| 290 | data->period = GVT_DEFAULT_TIME_SLICE; |
| 291 | data->gvt = gvt; |
| 292 | |
| 293 | scheduler->sched_data = data; |
| 294 | |
| 295 | return 0; |
| 296 | } |
| 297 | |
| 298 | static void tbs_sched_clean(struct intel_gvt *gvt) |
| 299 | { |
| 300 | struct intel_gvt_workload_scheduler *scheduler = |
| 301 | &gvt->scheduler; |
| 302 | struct gvt_sched_data *data = scheduler->sched_data; |
| 303 | |
| 304 | hrtimer_cancel(timer: &data->timer); |
| 305 | |
| 306 | kfree(objp: data); |
| 307 | scheduler->sched_data = NULL; |
| 308 | } |
| 309 | |
| 310 | static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) |
| 311 | { |
| 312 | struct vgpu_sched_data *data; |
| 313 | |
| 314 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
| 315 | if (!data) |
| 316 | return -ENOMEM; |
| 317 | |
| 318 | data->sched_ctl.weight = vgpu->sched_ctl.weight; |
| 319 | data->vgpu = vgpu; |
| 320 | INIT_LIST_HEAD(list: &data->lru_list); |
| 321 | |
| 322 | vgpu->sched_data = data; |
| 323 | |
| 324 | return 0; |
| 325 | } |
| 326 | |
| 327 | static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) |
| 328 | { |
| 329 | struct intel_gvt *gvt = vgpu->gvt; |
| 330 | struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; |
| 331 | |
| 332 | kfree(objp: vgpu->sched_data); |
| 333 | vgpu->sched_data = NULL; |
| 334 | |
| 335 | /* this vgpu id has been removed */ |
| 336 | if (idr_is_empty(idr: &gvt->vgpu_idr)) |
| 337 | hrtimer_cancel(timer: &sched_data->timer); |
| 338 | } |
| 339 | |
| 340 | static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) |
| 341 | { |
| 342 | struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; |
| 343 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
| 344 | ktime_t now; |
| 345 | |
| 346 | if (!list_empty(head: &vgpu_data->lru_list)) |
| 347 | return; |
| 348 | |
| 349 | now = ktime_get(); |
| 350 | vgpu_data->pri_time = ktime_add(now, |
| 351 | ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0)); |
| 352 | vgpu_data->pri_sched = true; |
| 353 | |
| 354 | list_add(new: &vgpu_data->lru_list, head: &sched_data->lru_runq_head); |
| 355 | |
| 356 | if (!hrtimer_active(timer: &sched_data->timer)) |
| 357 | hrtimer_start(timer: &sched_data->timer, ktime_add_ns(ktime_get(), |
| 358 | sched_data->period), mode: HRTIMER_MODE_ABS); |
| 359 | vgpu_data->active = true; |
| 360 | } |
| 361 | |
| 362 | static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) |
| 363 | { |
| 364 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
| 365 | |
| 366 | list_del_init(entry: &vgpu_data->lru_list); |
| 367 | vgpu_data->active = false; |
| 368 | } |
| 369 | |
| 370 | static const struct intel_gvt_sched_policy_ops tbs_schedule_ops = { |
| 371 | .init = tbs_sched_init, |
| 372 | .clean = tbs_sched_clean, |
| 373 | .init_vgpu = tbs_sched_init_vgpu, |
| 374 | .clean_vgpu = tbs_sched_clean_vgpu, |
| 375 | .start_schedule = tbs_sched_start_schedule, |
| 376 | .stop_schedule = tbs_sched_stop_schedule, |
| 377 | }; |
| 378 | |
| 379 | int intel_gvt_init_sched_policy(struct intel_gvt *gvt) |
| 380 | { |
| 381 | int ret; |
| 382 | |
| 383 | mutex_lock(&gvt->sched_lock); |
| 384 | gvt->scheduler.sched_ops = &tbs_schedule_ops; |
| 385 | ret = gvt->scheduler.sched_ops->init(gvt); |
| 386 | mutex_unlock(lock: &gvt->sched_lock); |
| 387 | |
| 388 | return ret; |
| 389 | } |
| 390 | |
| 391 | void intel_gvt_clean_sched_policy(struct intel_gvt *gvt) |
| 392 | { |
| 393 | mutex_lock(&gvt->sched_lock); |
| 394 | gvt->scheduler.sched_ops->clean(gvt); |
| 395 | mutex_unlock(lock: &gvt->sched_lock); |
| 396 | } |
| 397 | |
| 398 | /* for per-vgpu scheduler policy, there are 2 per-vgpu data: |
| 399 | * sched_data, and sched_ctl. We see these 2 data as part of |
| 400 | * the global scheduler which are proteced by gvt->sched_lock. |
| 401 | * Caller should make their decision if the vgpu_lock should |
| 402 | * be hold outside. |
| 403 | */ |
| 404 | |
| 405 | int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu) |
| 406 | { |
| 407 | int ret; |
| 408 | |
| 409 | mutex_lock(&vgpu->gvt->sched_lock); |
| 410 | ret = vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu); |
| 411 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
| 412 | |
| 413 | return ret; |
| 414 | } |
| 415 | |
| 416 | void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu) |
| 417 | { |
| 418 | mutex_lock(&vgpu->gvt->sched_lock); |
| 419 | vgpu->gvt->scheduler.sched_ops->clean_vgpu(vgpu); |
| 420 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
| 421 | } |
| 422 | |
| 423 | void intel_vgpu_start_schedule(struct intel_vgpu *vgpu) |
| 424 | { |
| 425 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
| 426 | |
| 427 | mutex_lock(&vgpu->gvt->sched_lock); |
| 428 | if (!vgpu_data->active) { |
| 429 | gvt_dbg_core("vgpu%d: start schedule\n" , vgpu->id); |
| 430 | vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu); |
| 431 | } |
| 432 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
| 433 | } |
| 434 | |
| 435 | void intel_gvt_kick_schedule(struct intel_gvt *gvt) |
| 436 | { |
| 437 | mutex_lock(&gvt->sched_lock); |
| 438 | intel_gvt_request_service(gvt, service: INTEL_GVT_REQUEST_EVENT_SCHED); |
| 439 | mutex_unlock(lock: &gvt->sched_lock); |
| 440 | } |
| 441 | |
| 442 | void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) |
| 443 | { |
| 444 | struct intel_gvt_workload_scheduler *scheduler = |
| 445 | &vgpu->gvt->scheduler; |
| 446 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
| 447 | struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; |
| 448 | struct intel_engine_cs *engine; |
| 449 | enum intel_engine_id id; |
| 450 | intel_wakeref_t wakeref; |
| 451 | |
| 452 | if (!vgpu_data->active) |
| 453 | return; |
| 454 | |
| 455 | gvt_dbg_core("vgpu%d: stop schedule\n" , vgpu->id); |
| 456 | |
| 457 | mutex_lock(&vgpu->gvt->sched_lock); |
| 458 | scheduler->sched_ops->stop_schedule(vgpu); |
| 459 | |
| 460 | if (scheduler->next_vgpu == vgpu) |
| 461 | scheduler->next_vgpu = NULL; |
| 462 | |
| 463 | if (scheduler->current_vgpu == vgpu) { |
| 464 | /* stop workload dispatching */ |
| 465 | scheduler->need_reschedule = true; |
| 466 | scheduler->current_vgpu = NULL; |
| 467 | } |
| 468 | |
| 469 | wakeref = intel_runtime_pm_get(rpm: &dev_priv->runtime_pm); |
| 470 | spin_lock_bh(lock: &scheduler->mmio_context_lock); |
| 471 | for_each_engine(engine, vgpu->gvt->gt, id) { |
| 472 | if (scheduler->engine_owner[engine->id] == vgpu) { |
| 473 | intel_gvt_switch_mmio(pre: vgpu, NULL, engine); |
| 474 | scheduler->engine_owner[engine->id] = NULL; |
| 475 | } |
| 476 | } |
| 477 | spin_unlock_bh(lock: &scheduler->mmio_context_lock); |
| 478 | intel_runtime_pm_put(rpm: &dev_priv->runtime_pm, wref: wakeref); |
| 479 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
| 480 | } |
| 481 | |