| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2021 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include "xe_sync.h" |
| 7 | |
| 8 | #include <linux/dma-fence-array.h> |
| 9 | #include <linux/kthread.h> |
| 10 | #include <linux/sched/mm.h> |
| 11 | #include <linux/uaccess.h> |
| 12 | |
| 13 | #include <drm/drm_print.h> |
| 14 | #include <drm/drm_syncobj.h> |
| 15 | #include <uapi/drm/xe_drm.h> |
| 16 | |
| 17 | #include "xe_device.h" |
| 18 | #include "xe_exec_queue.h" |
| 19 | #include "xe_macros.h" |
| 20 | #include "xe_sched_job_types.h" |
| 21 | |
| 22 | struct xe_user_fence { |
| 23 | struct xe_device *xe; |
| 24 | struct kref refcount; |
| 25 | struct dma_fence_cb cb; |
| 26 | struct work_struct worker; |
| 27 | struct mm_struct *mm; |
| 28 | u64 __user *addr; |
| 29 | u64 value; |
| 30 | int signalled; |
| 31 | }; |
| 32 | |
| 33 | static void user_fence_destroy(struct kref *kref) |
| 34 | { |
| 35 | struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, |
| 36 | refcount); |
| 37 | |
| 38 | mmdrop(mm: ufence->mm); |
| 39 | kfree(objp: ufence); |
| 40 | } |
| 41 | |
| 42 | static void user_fence_get(struct xe_user_fence *ufence) |
| 43 | { |
| 44 | kref_get(kref: &ufence->refcount); |
| 45 | } |
| 46 | |
| 47 | static void user_fence_put(struct xe_user_fence *ufence) |
| 48 | { |
| 49 | kref_put(kref: &ufence->refcount, release: user_fence_destroy); |
| 50 | } |
| 51 | |
| 52 | static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, |
| 53 | u64 value) |
| 54 | { |
| 55 | struct xe_user_fence *ufence; |
| 56 | u64 __user *ptr = u64_to_user_ptr(addr); |
| 57 | u64 __maybe_unused prefetch_val; |
| 58 | |
| 59 | if (get_user(prefetch_val, ptr)) |
| 60 | return ERR_PTR(error: -EFAULT); |
| 61 | |
| 62 | ufence = kzalloc(sizeof(*ufence), GFP_KERNEL); |
| 63 | if (!ufence) |
| 64 | return ERR_PTR(error: -ENOMEM); |
| 65 | |
| 66 | ufence->xe = xe; |
| 67 | kref_init(kref: &ufence->refcount); |
| 68 | ufence->addr = ptr; |
| 69 | ufence->value = value; |
| 70 | ufence->mm = current->mm; |
| 71 | mmgrab(mm: ufence->mm); |
| 72 | |
| 73 | return ufence; |
| 74 | } |
| 75 | |
| 76 | static void user_fence_worker(struct work_struct *w) |
| 77 | { |
| 78 | struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); |
| 79 | |
| 80 | WRITE_ONCE(ufence->signalled, 1); |
| 81 | if (mmget_not_zero(mm: ufence->mm)) { |
| 82 | kthread_use_mm(mm: ufence->mm); |
| 83 | if (copy_to_user(to: ufence->addr, from: &ufence->value, n: sizeof(ufence->value))) |
| 84 | XE_WARN_ON("Copy to user failed" ); |
| 85 | kthread_unuse_mm(mm: ufence->mm); |
| 86 | mmput(ufence->mm); |
| 87 | } else { |
| 88 | drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n" ); |
| 89 | } |
| 90 | |
| 91 | /* |
| 92 | * Wake up waiters only after updating the ufence state, allowing the UMD |
| 93 | * to safely reuse the same ufence without encountering -EBUSY errors. |
| 94 | */ |
| 95 | wake_up_all(&ufence->xe->ufence_wq); |
| 96 | user_fence_put(ufence); |
| 97 | } |
| 98 | |
| 99 | static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) |
| 100 | { |
| 101 | INIT_WORK(&ufence->worker, user_fence_worker); |
| 102 | queue_work(wq: ufence->xe->ordered_wq, work: &ufence->worker); |
| 103 | dma_fence_put(fence); |
| 104 | } |
| 105 | |
| 106 | static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) |
| 107 | { |
| 108 | struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); |
| 109 | |
| 110 | kick_ufence(ufence, fence); |
| 111 | } |
| 112 | |
| 113 | int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, |
| 114 | struct xe_sync_entry *sync, |
| 115 | struct drm_xe_sync __user *sync_user, |
| 116 | struct drm_syncobj *ufence_syncobj, |
| 117 | u64 ufence_timeline_value, |
| 118 | unsigned int flags) |
| 119 | { |
| 120 | struct drm_xe_sync sync_in; |
| 121 | int err; |
| 122 | bool exec = flags & SYNC_PARSE_FLAG_EXEC; |
| 123 | bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE; |
| 124 | bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE; |
| 125 | bool signal; |
| 126 | |
| 127 | if (copy_from_user(to: &sync_in, from: sync_user, n: sizeof(*sync_user))) |
| 128 | return -EFAULT; |
| 129 | |
| 130 | if (XE_IOCTL_DBG(xe, sync_in.flags & ~DRM_XE_SYNC_FLAG_SIGNAL) || |
| 131 | XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1])) |
| 132 | return -EINVAL; |
| 133 | |
| 134 | signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL; |
| 135 | switch (sync_in.type) { |
| 136 | case DRM_XE_SYNC_TYPE_SYNCOBJ: |
| 137 | if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) |
| 138 | return -EOPNOTSUPP; |
| 139 | |
| 140 | if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) |
| 141 | return -EINVAL; |
| 142 | |
| 143 | sync->syncobj = drm_syncobj_find(file_private: xef->drm, handle: sync_in.handle); |
| 144 | if (XE_IOCTL_DBG(xe, !sync->syncobj)) |
| 145 | return -ENOENT; |
| 146 | |
| 147 | if (!signal) { |
| 148 | sync->fence = drm_syncobj_fence_get(syncobj: sync->syncobj); |
| 149 | if (XE_IOCTL_DBG(xe, !sync->fence)) |
| 150 | return -EINVAL; |
| 151 | } |
| 152 | break; |
| 153 | |
| 154 | case DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ: |
| 155 | if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) |
| 156 | return -EOPNOTSUPP; |
| 157 | |
| 158 | if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) |
| 159 | return -EINVAL; |
| 160 | |
| 161 | if (XE_IOCTL_DBG(xe, sync_in.timeline_value == 0)) |
| 162 | return -EINVAL; |
| 163 | |
| 164 | sync->syncobj = drm_syncobj_find(file_private: xef->drm, handle: sync_in.handle); |
| 165 | if (XE_IOCTL_DBG(xe, !sync->syncobj)) |
| 166 | return -ENOENT; |
| 167 | |
| 168 | if (signal) { |
| 169 | sync->chain_fence = dma_fence_chain_alloc(); |
| 170 | if (!sync->chain_fence) |
| 171 | return -ENOMEM; |
| 172 | } else { |
| 173 | sync->fence = drm_syncobj_fence_get(syncobj: sync->syncobj); |
| 174 | if (XE_IOCTL_DBG(xe, !sync->fence)) |
| 175 | return -EINVAL; |
| 176 | |
| 177 | err = dma_fence_chain_find_seqno(pfence: &sync->fence, |
| 178 | seqno: sync_in.timeline_value); |
| 179 | if (err) |
| 180 | return err; |
| 181 | } |
| 182 | break; |
| 183 | |
| 184 | case DRM_XE_SYNC_TYPE_USER_FENCE: |
| 185 | if (XE_IOCTL_DBG(xe, disallow_user_fence)) |
| 186 | return -EOPNOTSUPP; |
| 187 | |
| 188 | if (XE_IOCTL_DBG(xe, !signal)) |
| 189 | return -EOPNOTSUPP; |
| 190 | |
| 191 | if (XE_IOCTL_DBG(xe, sync_in.addr & 0x7)) |
| 192 | return -EINVAL; |
| 193 | |
| 194 | if (exec) { |
| 195 | sync->addr = sync_in.addr; |
| 196 | } else { |
| 197 | sync->ufence_timeline_value = ufence_timeline_value; |
| 198 | sync->ufence = user_fence_create(xe, addr: sync_in.addr, |
| 199 | value: sync_in.timeline_value); |
| 200 | if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) |
| 201 | return PTR_ERR(ptr: sync->ufence); |
| 202 | sync->ufence_chain_fence = dma_fence_chain_alloc(); |
| 203 | if (!sync->ufence_chain_fence) |
| 204 | return -ENOMEM; |
| 205 | sync->ufence_syncobj = ufence_syncobj; |
| 206 | } |
| 207 | |
| 208 | break; |
| 209 | |
| 210 | default: |
| 211 | return -EINVAL; |
| 212 | } |
| 213 | |
| 214 | sync->type = sync_in.type; |
| 215 | sync->flags = sync_in.flags; |
| 216 | sync->timeline_value = sync_in.timeline_value; |
| 217 | |
| 218 | return 0; |
| 219 | } |
| 220 | ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO); |
| 221 | |
| 222 | int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) |
| 223 | { |
| 224 | if (sync->fence) |
| 225 | return drm_sched_job_add_dependency(job: &job->drm, |
| 226 | fence: dma_fence_get(fence: sync->fence)); |
| 227 | |
| 228 | return 0; |
| 229 | } |
| 230 | |
| 231 | void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) |
| 232 | { |
| 233 | if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)) |
| 234 | return; |
| 235 | |
| 236 | if (sync->chain_fence) { |
| 237 | drm_syncobj_add_point(syncobj: sync->syncobj, chain: sync->chain_fence, |
| 238 | fence, point: sync->timeline_value); |
| 239 | /* |
| 240 | * The chain's ownership is transferred to the |
| 241 | * timeline. |
| 242 | */ |
| 243 | sync->chain_fence = NULL; |
| 244 | } else if (sync->syncobj) { |
| 245 | drm_syncobj_replace_fence(syncobj: sync->syncobj, fence); |
| 246 | } else if (sync->ufence) { |
| 247 | int err; |
| 248 | |
| 249 | drm_syncobj_add_point(syncobj: sync->ufence_syncobj, |
| 250 | chain: sync->ufence_chain_fence, |
| 251 | fence, point: sync->ufence_timeline_value); |
| 252 | sync->ufence_chain_fence = NULL; |
| 253 | |
| 254 | fence = drm_syncobj_fence_get(syncobj: sync->ufence_syncobj); |
| 255 | user_fence_get(ufence: sync->ufence); |
| 256 | err = dma_fence_add_callback(fence, cb: &sync->ufence->cb, |
| 257 | func: user_fence_cb); |
| 258 | if (err == -ENOENT) { |
| 259 | kick_ufence(ufence: sync->ufence, fence); |
| 260 | } else if (err) { |
| 261 | XE_WARN_ON("failed to add user fence" ); |
| 262 | user_fence_put(ufence: sync->ufence); |
| 263 | dma_fence_put(fence); |
| 264 | } |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | void xe_sync_entry_cleanup(struct xe_sync_entry *sync) |
| 269 | { |
| 270 | if (sync->syncobj) |
| 271 | drm_syncobj_put(obj: sync->syncobj); |
| 272 | dma_fence_put(fence: sync->fence); |
| 273 | dma_fence_chain_free(chain: sync->chain_fence); |
| 274 | dma_fence_chain_free(chain: sync->ufence_chain_fence); |
| 275 | if (!IS_ERR_OR_NULL(ptr: sync->ufence)) |
| 276 | user_fence_put(ufence: sync->ufence); |
| 277 | } |
| 278 | |
| 279 | /** |
| 280 | * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and VM |
| 281 | * @sync: input syncs |
| 282 | * @num_sync: number of syncs |
| 283 | * @q: exec queue |
| 284 | * @vm: VM |
| 285 | * |
| 286 | * Get a fence from syncs, exec queue, and VM. If syncs contain in-fences create |
| 287 | * and return a composite fence of all in-fences + last fence. If no in-fences |
| 288 | * return last fence on input exec queue. Caller must drop reference to |
| 289 | * returned fence. |
| 290 | * |
| 291 | * Return: fence on success, ERR_PTR(-ENOMEM) on failure |
| 292 | */ |
| 293 | struct dma_fence * |
| 294 | xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, |
| 295 | struct xe_exec_queue *q, struct xe_vm *vm) |
| 296 | { |
| 297 | struct dma_fence **fences = NULL; |
| 298 | struct dma_fence_array *cf = NULL; |
| 299 | struct dma_fence *fence; |
| 300 | int i, num_fence = 0, current_fence = 0; |
| 301 | |
| 302 | lockdep_assert_held(&vm->lock); |
| 303 | |
| 304 | /* Reject in fences */ |
| 305 | for (i = 0; i < num_sync; ++i) |
| 306 | if (sync[i].fence) |
| 307 | return ERR_PTR(error: -EOPNOTSUPP); |
| 308 | |
| 309 | if (q->flags & EXEC_QUEUE_FLAG_VM) { |
| 310 | struct xe_exec_queue *__q; |
| 311 | struct xe_tile *tile; |
| 312 | u8 id; |
| 313 | |
| 314 | for_each_tile(tile, vm->xe, id) |
| 315 | num_fence += (1 + XE_MAX_GT_PER_TILE); |
| 316 | |
| 317 | fences = kmalloc_array(num_fence, sizeof(*fences), |
| 318 | GFP_KERNEL); |
| 319 | if (!fences) |
| 320 | return ERR_PTR(error: -ENOMEM); |
| 321 | |
| 322 | fences[current_fence++] = |
| 323 | xe_exec_queue_last_fence_get(e: q, vm); |
| 324 | for_each_tlb_inval(i) |
| 325 | fences[current_fence++] = |
| 326 | xe_exec_queue_tlb_inval_last_fence_get(q, vm, type: i); |
| 327 | list_for_each_entry(__q, &q->multi_gt_list, |
| 328 | multi_gt_link) { |
| 329 | fences[current_fence++] = |
| 330 | xe_exec_queue_last_fence_get(e: __q, vm); |
| 331 | for_each_tlb_inval(i) |
| 332 | fences[current_fence++] = |
| 333 | xe_exec_queue_tlb_inval_last_fence_get(q: __q, vm, type: i); |
| 334 | } |
| 335 | |
| 336 | xe_assert(vm->xe, current_fence == num_fence); |
| 337 | cf = dma_fence_array_create(num_fences: num_fence, fences, |
| 338 | context: dma_fence_context_alloc(num: 1), |
| 339 | seqno: 1, signal_on_any: false); |
| 340 | if (!cf) |
| 341 | goto err_out; |
| 342 | |
| 343 | return &cf->base; |
| 344 | } |
| 345 | |
| 346 | fence = xe_exec_queue_last_fence_get(e: q, vm); |
| 347 | return fence; |
| 348 | |
| 349 | err_out: |
| 350 | while (current_fence) |
| 351 | dma_fence_put(fence: fences[--current_fence]); |
| 352 | kfree(objp: fences); |
| 353 | |
| 354 | return ERR_PTR(error: -ENOMEM); |
| 355 | } |
| 356 | |
| 357 | /** |
| 358 | * __xe_sync_ufence_get() - Get user fence from user fence |
| 359 | * @ufence: input user fence |
| 360 | * |
| 361 | * Get a user fence reference from user fence |
| 362 | * |
| 363 | * Return: xe_user_fence pointer with reference |
| 364 | */ |
| 365 | struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence) |
| 366 | { |
| 367 | user_fence_get(ufence); |
| 368 | |
| 369 | return ufence; |
| 370 | } |
| 371 | |
| 372 | /** |
| 373 | * xe_sync_ufence_get() - Get user fence from sync |
| 374 | * @sync: input sync |
| 375 | * |
| 376 | * Get a user fence reference from sync. |
| 377 | * |
| 378 | * Return: xe_user_fence pointer with reference |
| 379 | */ |
| 380 | struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) |
| 381 | { |
| 382 | user_fence_get(ufence: sync->ufence); |
| 383 | |
| 384 | return sync->ufence; |
| 385 | } |
| 386 | |
| 387 | /** |
| 388 | * xe_sync_ufence_put() - Put user fence reference |
| 389 | * @ufence: user fence reference |
| 390 | * |
| 391 | */ |
| 392 | void xe_sync_ufence_put(struct xe_user_fence *ufence) |
| 393 | { |
| 394 | user_fence_put(ufence); |
| 395 | } |
| 396 | |
| 397 | /** |
| 398 | * xe_sync_ufence_get_status() - Get user fence status |
| 399 | * @ufence: user fence |
| 400 | * |
| 401 | * Return: 1 if signalled, 0 not signalled, <0 on error |
| 402 | */ |
| 403 | int xe_sync_ufence_get_status(struct xe_user_fence *ufence) |
| 404 | { |
| 405 | return READ_ONCE(ufence->signalled); |
| 406 | } |
| 407 | |