1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11#include "linux/virtio_net.h"
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/cdev.h>
15#include <linux/device.h>
16#include <linux/eventfd.h>
17#include <linux/slab.h>
18#include <linux/wait.h>
19#include <linux/dma-map-ops.h>
20#include <linux/poll.h>
21#include <linux/file.h>
22#include <linux/uio.h>
23#include <linux/vdpa.h>
24#include <linux/nospec.h>
25#include <linux/vmalloc.h>
26#include <linux/sched/mm.h>
27#include <uapi/linux/vduse.h>
28#include <uapi/linux/vdpa.h>
29#include <uapi/linux/virtio_config.h>
30#include <uapi/linux/virtio_ids.h>
31#include <uapi/linux/virtio_blk.h>
32#include <uapi/linux/virtio_ring.h>
33#include <linux/mod_devicetable.h>
34
35#include "iova_domain.h"
36
37#define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
38#define DRV_DESC "vDPA Device in Userspace"
39#define DRV_LICENSE "GPL v2"
40
41#define VDUSE_DEV_MAX (1U << MINORBITS)
42#define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43#define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45/* 128 MB reserved for virtqueue creation */
46#define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47#define VDUSE_MSG_DEFAULT_TIMEOUT 30
48
49#define IRQ_UNBOUND -1
50
51struct vduse_virtqueue {
52 u16 index;
53 u16 num_max;
54 u32 num;
55 u64 desc_addr;
56 u64 driver_addr;
57 u64 device_addr;
58 struct vdpa_vq_state state;
59 bool ready;
60 bool kicked;
61 spinlock_t kick_lock;
62 spinlock_t irq_lock;
63 struct eventfd_ctx *kickfd;
64 struct vdpa_callback cb;
65 struct work_struct inject;
66 struct work_struct kick;
67 int irq_effective_cpu;
68 struct cpumask irq_affinity;
69 struct kobject kobj;
70};
71
72struct vduse_dev;
73
74struct vduse_vdpa {
75 struct vdpa_device vdpa;
76 struct vduse_dev *dev;
77};
78
79struct vduse_umem {
80 unsigned long iova;
81 unsigned long npages;
82 struct page **pages;
83 struct mm_struct *mm;
84};
85
86struct vduse_dev {
87 struct vduse_vdpa *vdev;
88 struct device *dev;
89 struct vduse_virtqueue **vqs;
90 struct vduse_iova_domain *domain;
91 char *name;
92 struct mutex lock;
93 spinlock_t msg_lock;
94 u64 msg_unique;
95 u32 msg_timeout;
96 wait_queue_head_t waitq;
97 struct list_head send_list;
98 struct list_head recv_list;
99 struct vdpa_callback config_cb;
100 struct work_struct inject;
101 spinlock_t irq_lock;
102 struct rw_semaphore rwsem;
103 int minor;
104 bool broken;
105 bool connected;
106 u64 api_version;
107 u64 device_features;
108 u64 driver_features;
109 u32 device_id;
110 u32 vendor_id;
111 u32 generation;
112 u32 config_size;
113 void *config;
114 u8 status;
115 u32 vq_num;
116 u32 vq_align;
117 struct vduse_umem *umem;
118 struct mutex mem_lock;
119 unsigned int bounce_size;
120 struct mutex domain_lock;
121};
122
123struct vduse_dev_msg {
124 struct vduse_dev_request req;
125 struct vduse_dev_response resp;
126 struct list_head list;
127 wait_queue_head_t waitq;
128 bool completed;
129};
130
131struct vduse_control {
132 u64 api_version;
133};
134
135static DEFINE_MUTEX(vduse_lock);
136static DEFINE_IDR(vduse_idr);
137
138static dev_t vduse_major;
139static struct cdev vduse_ctrl_cdev;
140static struct cdev vduse_cdev;
141static struct workqueue_struct *vduse_irq_wq;
142static struct workqueue_struct *vduse_irq_bound_wq;
143
144static u32 allowed_device_id[] = {
145 VIRTIO_ID_BLOCK,
146 VIRTIO_ID_NET,
147 VIRTIO_ID_FS,
148};
149
150static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
151{
152 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
153
154 return vdev->dev;
155}
156
157static inline struct vduse_dev *dev_to_vduse(struct device *dev)
158{
159 struct vdpa_device *vdpa = dev_to_vdpa(dev: dev);
160
161 return vdpa_to_vduse(vdpa);
162}
163
164static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
165 uint32_t request_id)
166{
167 struct vduse_dev_msg *msg;
168
169 list_for_each_entry(msg, head, list) {
170 if (msg->req.request_id == request_id) {
171 list_del(entry: &msg->list);
172 return msg;
173 }
174 }
175
176 return NULL;
177}
178
179static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
180{
181 struct vduse_dev_msg *msg = NULL;
182
183 if (!list_empty(head)) {
184 msg = list_first_entry(head, struct vduse_dev_msg, list);
185 list_del(entry: &msg->list);
186 }
187
188 return msg;
189}
190
191static void vduse_enqueue_msg(struct list_head *head,
192 struct vduse_dev_msg *msg)
193{
194 list_add_tail(new: &msg->list, head);
195}
196
197static void vduse_dev_broken(struct vduse_dev *dev)
198{
199 struct vduse_dev_msg *msg, *tmp;
200
201 if (unlikely(dev->broken))
202 return;
203
204 list_splice_init(list: &dev->recv_list, head: &dev->send_list);
205 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
206 list_del(entry: &msg->list);
207 msg->completed = 1;
208 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
209 wake_up(&msg->waitq);
210 }
211 dev->broken = true;
212 wake_up(&dev->waitq);
213}
214
215static int vduse_dev_msg_sync(struct vduse_dev *dev,
216 struct vduse_dev_msg *msg)
217{
218 int ret;
219
220 if (unlikely(dev->broken))
221 return -EIO;
222
223 init_waitqueue_head(&msg->waitq);
224 spin_lock(lock: &dev->msg_lock);
225 if (unlikely(dev->broken)) {
226 spin_unlock(lock: &dev->msg_lock);
227 return -EIO;
228 }
229 msg->req.request_id = dev->msg_unique++;
230 vduse_enqueue_msg(head: &dev->send_list, msg);
231 wake_up(&dev->waitq);
232 spin_unlock(lock: &dev->msg_lock);
233 if (dev->msg_timeout)
234 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
235 (long)dev->msg_timeout * HZ);
236 else
237 ret = wait_event_killable(msg->waitq, msg->completed);
238
239 spin_lock(lock: &dev->msg_lock);
240 if (!msg->completed) {
241 list_del(entry: &msg->list);
242 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
243 /* Mark the device as malfunction when there is a timeout */
244 if (!ret)
245 vduse_dev_broken(dev);
246 }
247 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
248 spin_unlock(lock: &dev->msg_lock);
249
250 return ret;
251}
252
253static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
254 struct vduse_virtqueue *vq,
255 struct vdpa_vq_state_packed *packed)
256{
257 struct vduse_dev_msg msg = { 0 };
258 int ret;
259
260 msg.req.type = VDUSE_GET_VQ_STATE;
261 msg.req.vq_state.index = vq->index;
262
263 ret = vduse_dev_msg_sync(dev, msg: &msg);
264 if (ret)
265 return ret;
266
267 packed->last_avail_counter =
268 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
269 packed->last_avail_idx =
270 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
271 packed->last_used_counter =
272 msg.resp.vq_state.packed.last_used_counter & 0x0001;
273 packed->last_used_idx =
274 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
275
276 return 0;
277}
278
279static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
280 struct vduse_virtqueue *vq,
281 struct vdpa_vq_state_split *split)
282{
283 struct vduse_dev_msg msg = { 0 };
284 int ret;
285
286 msg.req.type = VDUSE_GET_VQ_STATE;
287 msg.req.vq_state.index = vq->index;
288
289 ret = vduse_dev_msg_sync(dev, msg: &msg);
290 if (ret)
291 return ret;
292
293 split->avail_index = msg.resp.vq_state.split.avail_index;
294
295 return 0;
296}
297
298static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
299{
300 struct vduse_dev_msg msg = { 0 };
301
302 msg.req.type = VDUSE_SET_STATUS;
303 msg.req.s.status = status;
304
305 return vduse_dev_msg_sync(dev, msg: &msg);
306}
307
308static int vduse_dev_update_iotlb(struct vduse_dev *dev,
309 u64 start, u64 last)
310{
311 struct vduse_dev_msg msg = { 0 };
312
313 if (last < start)
314 return -EINVAL;
315
316 msg.req.type = VDUSE_UPDATE_IOTLB;
317 msg.req.iova.start = start;
318 msg.req.iova.last = last;
319
320 return vduse_dev_msg_sync(dev, msg: &msg);
321}
322
323static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
324{
325 struct file *file = iocb->ki_filp;
326 struct vduse_dev *dev = file->private_data;
327 struct vduse_dev_msg *msg;
328 int size = sizeof(struct vduse_dev_request);
329 ssize_t ret;
330
331 if (iov_iter_count(i: to) < size)
332 return -EINVAL;
333
334 spin_lock(lock: &dev->msg_lock);
335 while (1) {
336 msg = vduse_dequeue_msg(head: &dev->send_list);
337 if (msg)
338 break;
339
340 ret = -EAGAIN;
341 if (file->f_flags & O_NONBLOCK)
342 goto unlock;
343
344 spin_unlock(lock: &dev->msg_lock);
345 ret = wait_event_interruptible_exclusive(dev->waitq,
346 !list_empty(&dev->send_list));
347 if (ret)
348 return ret;
349
350 spin_lock(lock: &dev->msg_lock);
351 }
352 spin_unlock(lock: &dev->msg_lock);
353 ret = copy_to_iter(addr: &msg->req, bytes: size, i: to);
354 spin_lock(lock: &dev->msg_lock);
355 if (ret != size) {
356 ret = -EFAULT;
357 vduse_enqueue_msg(head: &dev->send_list, msg);
358 goto unlock;
359 }
360 vduse_enqueue_msg(head: &dev->recv_list, msg);
361unlock:
362 spin_unlock(lock: &dev->msg_lock);
363
364 return ret;
365}
366
367static bool is_mem_zero(const char *ptr, int size)
368{
369 int i;
370
371 for (i = 0; i < size; i++) {
372 if (ptr[i])
373 return false;
374 }
375 return true;
376}
377
378static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
379{
380 struct file *file = iocb->ki_filp;
381 struct vduse_dev *dev = file->private_data;
382 struct vduse_dev_response resp;
383 struct vduse_dev_msg *msg;
384 size_t ret;
385
386 ret = copy_from_iter(addr: &resp, bytes: sizeof(resp), i: from);
387 if (ret != sizeof(resp))
388 return -EINVAL;
389
390 if (!is_mem_zero(ptr: (const char *)resp.reserved, size: sizeof(resp.reserved)))
391 return -EINVAL;
392
393 spin_lock(lock: &dev->msg_lock);
394 msg = vduse_find_msg(head: &dev->recv_list, request_id: resp.request_id);
395 if (!msg) {
396 ret = -ENOENT;
397 goto unlock;
398 }
399
400 memcpy(&msg->resp, &resp, sizeof(resp));
401 msg->completed = 1;
402 wake_up(&msg->waitq);
403unlock:
404 spin_unlock(lock: &dev->msg_lock);
405
406 return ret;
407}
408
409static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
410{
411 struct vduse_dev *dev = file->private_data;
412 __poll_t mask = 0;
413
414 poll_wait(filp: file, wait_address: &dev->waitq, p: wait);
415
416 spin_lock(lock: &dev->msg_lock);
417
418 if (unlikely(dev->broken))
419 mask |= EPOLLERR;
420 if (!list_empty(head: &dev->send_list))
421 mask |= EPOLLIN | EPOLLRDNORM;
422 if (!list_empty(head: &dev->recv_list))
423 mask |= EPOLLOUT | EPOLLWRNORM;
424
425 spin_unlock(lock: &dev->msg_lock);
426
427 return mask;
428}
429
430static void vduse_dev_reset(struct vduse_dev *dev)
431{
432 int i;
433 struct vduse_iova_domain *domain = dev->domain;
434
435 /* The coherent mappings are handled in vduse_dev_free_coherent() */
436 if (domain && domain->bounce_map)
437 vduse_domain_reset_bounce_map(domain);
438
439 down_write(sem: &dev->rwsem);
440
441 dev->status = 0;
442 dev->driver_features = 0;
443 dev->generation++;
444 spin_lock(lock: &dev->irq_lock);
445 dev->config_cb.callback = NULL;
446 dev->config_cb.private = NULL;
447 spin_unlock(lock: &dev->irq_lock);
448 flush_work(work: &dev->inject);
449
450 for (i = 0; i < dev->vq_num; i++) {
451 struct vduse_virtqueue *vq = dev->vqs[i];
452
453 vq->ready = false;
454 vq->desc_addr = 0;
455 vq->driver_addr = 0;
456 vq->device_addr = 0;
457 vq->num = 0;
458 memset(&vq->state, 0, sizeof(vq->state));
459
460 spin_lock(lock: &vq->kick_lock);
461 vq->kicked = false;
462 if (vq->kickfd)
463 eventfd_ctx_put(ctx: vq->kickfd);
464 vq->kickfd = NULL;
465 spin_unlock(lock: &vq->kick_lock);
466
467 spin_lock(lock: &vq->irq_lock);
468 vq->cb.callback = NULL;
469 vq->cb.private = NULL;
470 vq->cb.trigger = NULL;
471 spin_unlock(lock: &vq->irq_lock);
472 flush_work(work: &vq->inject);
473 flush_work(work: &vq->kick);
474 }
475
476 up_write(sem: &dev->rwsem);
477}
478
479static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
480 u64 desc_area, u64 driver_area,
481 u64 device_area)
482{
483 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
484 struct vduse_virtqueue *vq = dev->vqs[idx];
485
486 vq->desc_addr = desc_area;
487 vq->driver_addr = driver_area;
488 vq->device_addr = device_area;
489
490 return 0;
491}
492
493static void vduse_vq_kick(struct vduse_virtqueue *vq)
494{
495 spin_lock(lock: &vq->kick_lock);
496 if (!vq->ready)
497 goto unlock;
498
499 if (vq->kickfd)
500 eventfd_signal(ctx: vq->kickfd);
501 else
502 vq->kicked = true;
503unlock:
504 spin_unlock(lock: &vq->kick_lock);
505}
506
507static void vduse_vq_kick_work(struct work_struct *work)
508{
509 struct vduse_virtqueue *vq = container_of(work,
510 struct vduse_virtqueue, kick);
511
512 vduse_vq_kick(vq);
513}
514
515static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
516{
517 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
518 struct vduse_virtqueue *vq = dev->vqs[idx];
519
520 if (!eventfd_signal_allowed()) {
521 schedule_work(work: &vq->kick);
522 return;
523 }
524 vduse_vq_kick(vq);
525}
526
527static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
528 struct vdpa_callback *cb)
529{
530 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
531 struct vduse_virtqueue *vq = dev->vqs[idx];
532
533 spin_lock(lock: &vq->irq_lock);
534 vq->cb.callback = cb->callback;
535 vq->cb.private = cb->private;
536 vq->cb.trigger = cb->trigger;
537 spin_unlock(lock: &vq->irq_lock);
538}
539
540static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
541{
542 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
543 struct vduse_virtqueue *vq = dev->vqs[idx];
544
545 vq->num = num;
546}
547
548static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
549{
550 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
551 struct vduse_virtqueue *vq = dev->vqs[idx];
552
553 if (vq->num)
554 return vq->num;
555 else
556 return vq->num_max;
557}
558
559static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
560 u16 idx, bool ready)
561{
562 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
563 struct vduse_virtqueue *vq = dev->vqs[idx];
564
565 vq->ready = ready;
566}
567
568static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
569{
570 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
571 struct vduse_virtqueue *vq = dev->vqs[idx];
572
573 return vq->ready;
574}
575
576static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
577 const struct vdpa_vq_state *state)
578{
579 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
580 struct vduse_virtqueue *vq = dev->vqs[idx];
581
582 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
583 vq->state.packed.last_avail_counter =
584 state->packed.last_avail_counter;
585 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
586 vq->state.packed.last_used_counter =
587 state->packed.last_used_counter;
588 vq->state.packed.last_used_idx = state->packed.last_used_idx;
589 } else
590 vq->state.split.avail_index = state->split.avail_index;
591
592 return 0;
593}
594
595static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
596 struct vdpa_vq_state *state)
597{
598 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
599 struct vduse_virtqueue *vq = dev->vqs[idx];
600
601 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
602 return vduse_dev_get_vq_state_packed(dev, vq, packed: &state->packed);
603
604 return vduse_dev_get_vq_state_split(dev, vq, split: &state->split);
605}
606
607static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
608{
609 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
610
611 return dev->vq_align;
612}
613
614static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
615{
616 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
617
618 return dev->device_features;
619}
620
621static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
622{
623 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
624
625 dev->driver_features = features;
626 return 0;
627}
628
629static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
630{
631 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
632
633 return dev->driver_features;
634}
635
636static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
637 struct vdpa_callback *cb)
638{
639 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
640
641 spin_lock(lock: &dev->irq_lock);
642 dev->config_cb.callback = cb->callback;
643 dev->config_cb.private = cb->private;
644 spin_unlock(lock: &dev->irq_lock);
645}
646
647static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
648{
649 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
650 u16 num_max = 0;
651 int i;
652
653 for (i = 0; i < dev->vq_num; i++)
654 if (num_max < dev->vqs[i]->num_max)
655 num_max = dev->vqs[i]->num_max;
656
657 return num_max;
658}
659
660static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
661{
662 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
663
664 return dev->device_id;
665}
666
667static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
668{
669 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
670
671 return dev->vendor_id;
672}
673
674static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
675{
676 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
677
678 return dev->status;
679}
680
681static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
682{
683 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
684
685 if (vduse_dev_set_status(dev, status))
686 return;
687
688 dev->status = status;
689}
690
691static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
692{
693 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
694
695 return dev->config_size;
696}
697
698static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
699 void *buf, unsigned int len)
700{
701 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
702
703 /* Initialize the buffer in case of partial copy. */
704 memset(buf, 0, len);
705
706 if (offset > dev->config_size)
707 return;
708
709 if (len > dev->config_size - offset)
710 len = dev->config_size - offset;
711
712 memcpy(buf, dev->config + offset, len);
713}
714
715static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
716 const void *buf, unsigned int len)
717{
718 /* Now we only support read-only configuration space */
719}
720
721static int vduse_vdpa_reset(struct vdpa_device *vdpa)
722{
723 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
724 int ret = vduse_dev_set_status(dev, status: 0);
725
726 vduse_dev_reset(dev);
727
728 return ret;
729}
730
731static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
732{
733 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
734
735 return dev->generation;
736}
737
738static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
739 const struct cpumask *cpu_mask)
740{
741 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
742
743 if (cpu_mask)
744 cpumask_copy(dstp: &dev->vqs[idx]->irq_affinity, srcp: cpu_mask);
745 else
746 cpumask_setall(dstp: &dev->vqs[idx]->irq_affinity);
747
748 return 0;
749}
750
751static const struct cpumask *
752vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
753{
754 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
755
756 return &dev->vqs[idx]->irq_affinity;
757}
758
759static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
760 unsigned int asid,
761 struct vhost_iotlb *iotlb)
762{
763 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
764 int ret;
765
766 ret = vduse_domain_set_map(domain: dev->domain, iotlb);
767 if (ret)
768 return ret;
769
770 ret = vduse_dev_update_iotlb(dev, start: 0ULL, ULLONG_MAX);
771 if (ret) {
772 vduse_domain_clear_map(domain: dev->domain, iotlb);
773 return ret;
774 }
775
776 return 0;
777}
778
779static void vduse_vdpa_free(struct vdpa_device *vdpa)
780{
781 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
782
783 dev->vdev = NULL;
784}
785
786static const struct vdpa_config_ops vduse_vdpa_config_ops = {
787 .set_vq_address = vduse_vdpa_set_vq_address,
788 .kick_vq = vduse_vdpa_kick_vq,
789 .set_vq_cb = vduse_vdpa_set_vq_cb,
790 .set_vq_num = vduse_vdpa_set_vq_num,
791 .get_vq_size = vduse_vdpa_get_vq_size,
792 .set_vq_ready = vduse_vdpa_set_vq_ready,
793 .get_vq_ready = vduse_vdpa_get_vq_ready,
794 .set_vq_state = vduse_vdpa_set_vq_state,
795 .get_vq_state = vduse_vdpa_get_vq_state,
796 .get_vq_align = vduse_vdpa_get_vq_align,
797 .get_device_features = vduse_vdpa_get_device_features,
798 .set_driver_features = vduse_vdpa_set_driver_features,
799 .get_driver_features = vduse_vdpa_get_driver_features,
800 .set_config_cb = vduse_vdpa_set_config_cb,
801 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
802 .get_device_id = vduse_vdpa_get_device_id,
803 .get_vendor_id = vduse_vdpa_get_vendor_id,
804 .get_status = vduse_vdpa_get_status,
805 .set_status = vduse_vdpa_set_status,
806 .get_config_size = vduse_vdpa_get_config_size,
807 .get_config = vduse_vdpa_get_config,
808 .set_config = vduse_vdpa_set_config,
809 .get_generation = vduse_vdpa_get_generation,
810 .set_vq_affinity = vduse_vdpa_set_vq_affinity,
811 .get_vq_affinity = vduse_vdpa_get_vq_affinity,
812 .reset = vduse_vdpa_reset,
813 .set_map = vduse_vdpa_set_map,
814 .free = vduse_vdpa_free,
815};
816
817static void vduse_dev_sync_single_for_device(union virtio_map token,
818 dma_addr_t dma_addr, size_t size,
819 enum dma_data_direction dir)
820{
821 struct vduse_iova_domain *domain = token.iova_domain;
822
823 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
824}
825
826static void vduse_dev_sync_single_for_cpu(union virtio_map token,
827 dma_addr_t dma_addr, size_t size,
828 enum dma_data_direction dir)
829{
830 struct vduse_iova_domain *domain = token.iova_domain;
831
832 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
833}
834
835static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page,
836 unsigned long offset, size_t size,
837 enum dma_data_direction dir,
838 unsigned long attrs)
839{
840 struct vduse_iova_domain *domain = token.iova_domain;
841
842 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
843}
844
845static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr,
846 size_t size, enum dma_data_direction dir,
847 unsigned long attrs)
848{
849 struct vduse_iova_domain *domain = token.iova_domain;
850
851 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
852}
853
854static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size,
855 dma_addr_t *dma_addr, gfp_t flag)
856{
857 struct vduse_iova_domain *domain = token.iova_domain;
858 unsigned long iova;
859 void *addr;
860
861 *dma_addr = DMA_MAPPING_ERROR;
862 addr = vduse_domain_alloc_coherent(domain, size,
863 dma_addr: (dma_addr_t *)&iova, flag);
864 if (!addr)
865 return NULL;
866
867 *dma_addr = (dma_addr_t)iova;
868
869 return addr;
870}
871
872static void vduse_dev_free_coherent(union virtio_map token, size_t size,
873 void *vaddr, dma_addr_t dma_addr,
874 unsigned long attrs)
875{
876 struct vduse_iova_domain *domain = token.iova_domain;
877
878 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
879}
880
881static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr)
882{
883 struct vduse_iova_domain *domain = token.iova_domain;
884
885 return dma_addr < domain->bounce_size;
886}
887
888static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr)
889{
890 if (unlikely(dma_addr == DMA_MAPPING_ERROR))
891 return -ENOMEM;
892 return 0;
893}
894
895static size_t vduse_dev_max_mapping_size(union virtio_map token)
896{
897 struct vduse_iova_domain *domain = token.iova_domain;
898
899 return domain->bounce_size;
900}
901
902static const struct virtio_map_ops vduse_map_ops = {
903 .sync_single_for_device = vduse_dev_sync_single_for_device,
904 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
905 .map_page = vduse_dev_map_page,
906 .unmap_page = vduse_dev_unmap_page,
907 .alloc = vduse_dev_alloc_coherent,
908 .free = vduse_dev_free_coherent,
909 .need_sync = vduse_dev_need_sync,
910 .mapping_error = vduse_dev_mapping_error,
911 .max_mapping_size = vduse_dev_max_mapping_size,
912};
913
914static unsigned int perm_to_file_flags(u8 perm)
915{
916 unsigned int flags = 0;
917
918 switch (perm) {
919 case VDUSE_ACCESS_WO:
920 flags |= O_WRONLY;
921 break;
922 case VDUSE_ACCESS_RO:
923 flags |= O_RDONLY;
924 break;
925 case VDUSE_ACCESS_RW:
926 flags |= O_RDWR;
927 break;
928 default:
929 WARN(1, "invalidate vhost IOTLB permission\n");
930 break;
931 }
932
933 return flags;
934}
935
936static int vduse_kickfd_setup(struct vduse_dev *dev,
937 struct vduse_vq_eventfd *eventfd)
938{
939 struct eventfd_ctx *ctx = NULL;
940 struct vduse_virtqueue *vq;
941 u32 index;
942
943 if (eventfd->index >= dev->vq_num)
944 return -EINVAL;
945
946 index = array_index_nospec(eventfd->index, dev->vq_num);
947 vq = dev->vqs[index];
948 if (eventfd->fd >= 0) {
949 ctx = eventfd_ctx_fdget(fd: eventfd->fd);
950 if (IS_ERR(ptr: ctx))
951 return PTR_ERR(ptr: ctx);
952 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
953 return 0;
954
955 spin_lock(lock: &vq->kick_lock);
956 if (vq->kickfd)
957 eventfd_ctx_put(ctx: vq->kickfd);
958 vq->kickfd = ctx;
959 if (vq->ready && vq->kicked && vq->kickfd) {
960 eventfd_signal(ctx: vq->kickfd);
961 vq->kicked = false;
962 }
963 spin_unlock(lock: &vq->kick_lock);
964
965 return 0;
966}
967
968static bool vduse_dev_is_ready(struct vduse_dev *dev)
969{
970 int i;
971
972 for (i = 0; i < dev->vq_num; i++)
973 if (!dev->vqs[i]->num_max)
974 return false;
975
976 return true;
977}
978
979static void vduse_dev_irq_inject(struct work_struct *work)
980{
981 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
982
983 spin_lock_bh(lock: &dev->irq_lock);
984 if (dev->config_cb.callback)
985 dev->config_cb.callback(dev->config_cb.private);
986 spin_unlock_bh(lock: &dev->irq_lock);
987}
988
989static void vduse_vq_irq_inject(struct work_struct *work)
990{
991 struct vduse_virtqueue *vq = container_of(work,
992 struct vduse_virtqueue, inject);
993
994 spin_lock_bh(lock: &vq->irq_lock);
995 if (vq->ready && vq->cb.callback)
996 vq->cb.callback(vq->cb.private);
997 spin_unlock_bh(lock: &vq->irq_lock);
998}
999
1000static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
1001{
1002 bool signal = false;
1003
1004 if (!vq->cb.trigger)
1005 return false;
1006
1007 spin_lock_irq(lock: &vq->irq_lock);
1008 if (vq->ready && vq->cb.trigger) {
1009 eventfd_signal(ctx: vq->cb.trigger);
1010 signal = true;
1011 }
1012 spin_unlock_irq(lock: &vq->irq_lock);
1013
1014 return signal;
1015}
1016
1017static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
1018 struct work_struct *irq_work,
1019 int irq_effective_cpu)
1020{
1021 int ret = -EINVAL;
1022
1023 down_read(sem: &dev->rwsem);
1024 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1025 goto unlock;
1026
1027 ret = 0;
1028 if (irq_effective_cpu == IRQ_UNBOUND)
1029 queue_work(wq: vduse_irq_wq, work: irq_work);
1030 else
1031 queue_work_on(cpu: irq_effective_cpu,
1032 wq: vduse_irq_bound_wq, work: irq_work);
1033unlock:
1034 up_read(sem: &dev->rwsem);
1035
1036 return ret;
1037}
1038
1039static int vduse_dev_dereg_umem(struct vduse_dev *dev,
1040 u64 iova, u64 size)
1041{
1042 int ret;
1043
1044 mutex_lock(&dev->mem_lock);
1045 ret = -ENOENT;
1046 if (!dev->umem)
1047 goto unlock;
1048
1049 ret = -EINVAL;
1050 if (!dev->domain)
1051 goto unlock;
1052
1053 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1054 goto unlock;
1055
1056 vduse_domain_remove_user_bounce_pages(domain: dev->domain);
1057 unpin_user_pages_dirty_lock(pages: dev->umem->pages,
1058 npages: dev->umem->npages, make_dirty: true);
1059 atomic64_sub(i: dev->umem->npages, v: &dev->umem->mm->pinned_vm);
1060 mmdrop(mm: dev->umem->mm);
1061 vfree(addr: dev->umem->pages);
1062 kfree(objp: dev->umem);
1063 dev->umem = NULL;
1064 ret = 0;
1065unlock:
1066 mutex_unlock(lock: &dev->mem_lock);
1067 return ret;
1068}
1069
1070static int vduse_dev_reg_umem(struct vduse_dev *dev,
1071 u64 iova, u64 uaddr, u64 size)
1072{
1073 struct page **page_list = NULL;
1074 struct vduse_umem *umem = NULL;
1075 long pinned = 0;
1076 unsigned long npages, lock_limit;
1077 int ret;
1078
1079 if (!dev->domain || !dev->domain->bounce_map ||
1080 size != dev->domain->bounce_size ||
1081 iova != 0 || uaddr & ~PAGE_MASK)
1082 return -EINVAL;
1083
1084 mutex_lock(&dev->mem_lock);
1085 ret = -EEXIST;
1086 if (dev->umem)
1087 goto unlock;
1088
1089 ret = -ENOMEM;
1090 npages = size >> PAGE_SHIFT;
1091 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1092 GFP_KERNEL_ACCOUNT);
1093 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1094 if (!page_list || !umem)
1095 goto unlock;
1096
1097 mmap_read_lock(current->mm);
1098
1099 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1100 if (npages + atomic64_read(v: &current->mm->pinned_vm) > lock_limit)
1101 goto out;
1102
1103 pinned = pin_user_pages(start: uaddr, nr_pages: npages, gup_flags: FOLL_LONGTERM | FOLL_WRITE,
1104 pages: page_list);
1105 if (pinned != npages) {
1106 ret = pinned < 0 ? pinned : -ENOMEM;
1107 goto out;
1108 }
1109
1110 ret = vduse_domain_add_user_bounce_pages(domain: dev->domain,
1111 pages: page_list, count: pinned);
1112 if (ret)
1113 goto out;
1114
1115 atomic64_add(i: npages, v: &current->mm->pinned_vm);
1116
1117 umem->pages = page_list;
1118 umem->npages = pinned;
1119 umem->iova = iova;
1120 umem->mm = current->mm;
1121 mmgrab(current->mm);
1122
1123 dev->umem = umem;
1124out:
1125 if (ret && pinned > 0)
1126 unpin_user_pages(pages: page_list, npages: pinned);
1127
1128 mmap_read_unlock(current->mm);
1129unlock:
1130 if (ret) {
1131 vfree(addr: page_list);
1132 kfree(objp: umem);
1133 }
1134 mutex_unlock(lock: &dev->mem_lock);
1135 return ret;
1136}
1137
1138static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1139{
1140 int curr_cpu = vq->irq_effective_cpu;
1141
1142 while (true) {
1143 curr_cpu = cpumask_next(n: curr_cpu, srcp: &vq->irq_affinity);
1144 if (cpu_online(cpu: curr_cpu))
1145 break;
1146
1147 if (curr_cpu >= nr_cpu_ids)
1148 curr_cpu = IRQ_UNBOUND;
1149 }
1150
1151 vq->irq_effective_cpu = curr_cpu;
1152}
1153
1154static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1155 unsigned long arg)
1156{
1157 struct vduse_dev *dev = file->private_data;
1158 void __user *argp = (void __user *)arg;
1159 int ret;
1160
1161 if (unlikely(dev->broken))
1162 return -EPERM;
1163
1164 switch (cmd) {
1165 case VDUSE_IOTLB_GET_FD: {
1166 struct vduse_iotlb_entry entry;
1167 struct vhost_iotlb_map *map;
1168 struct vdpa_map_file *map_file;
1169 struct file *f = NULL;
1170
1171 ret = -EFAULT;
1172 if (copy_from_user(to: &entry, from: argp, n: sizeof(entry)))
1173 break;
1174
1175 ret = -EINVAL;
1176 if (entry.start > entry.last)
1177 break;
1178
1179 mutex_lock(&dev->domain_lock);
1180 if (!dev->domain) {
1181 mutex_unlock(lock: &dev->domain_lock);
1182 break;
1183 }
1184 spin_lock(lock: &dev->domain->iotlb_lock);
1185 map = vhost_iotlb_itree_first(iotlb: dev->domain->iotlb,
1186 start: entry.start, last: entry.last);
1187 if (map) {
1188 map_file = (struct vdpa_map_file *)map->opaque;
1189 f = get_file(f: map_file->file);
1190 entry.offset = map_file->offset;
1191 entry.start = map->start;
1192 entry.last = map->last;
1193 entry.perm = map->perm;
1194 }
1195 spin_unlock(lock: &dev->domain->iotlb_lock);
1196 mutex_unlock(lock: &dev->domain_lock);
1197 ret = -EINVAL;
1198 if (!f)
1199 break;
1200
1201 ret = -EFAULT;
1202 if (copy_to_user(to: argp, from: &entry, n: sizeof(entry))) {
1203 fput(f);
1204 break;
1205 }
1206 ret = receive_fd(file: f, NULL, o_flags: perm_to_file_flags(perm: entry.perm));
1207 fput(f);
1208 break;
1209 }
1210 case VDUSE_DEV_GET_FEATURES:
1211 /*
1212 * Just mirror what driver wrote here.
1213 * The driver is expected to check FEATURE_OK later.
1214 */
1215 ret = put_user(dev->driver_features, (u64 __user *)argp);
1216 break;
1217 case VDUSE_DEV_SET_CONFIG: {
1218 struct vduse_config_data config;
1219 unsigned long size = offsetof(struct vduse_config_data,
1220 buffer);
1221
1222 ret = -EFAULT;
1223 if (copy_from_user(to: &config, from: argp, n: size))
1224 break;
1225
1226 ret = -EINVAL;
1227 if (config.offset > dev->config_size ||
1228 config.length == 0 ||
1229 config.length > dev->config_size - config.offset)
1230 break;
1231
1232 ret = -EFAULT;
1233 if (copy_from_user(to: dev->config + config.offset, from: argp + size,
1234 n: config.length))
1235 break;
1236
1237 ret = 0;
1238 break;
1239 }
1240 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1241 ret = vduse_dev_queue_irq_work(dev, irq_work: &dev->inject, IRQ_UNBOUND);
1242 break;
1243 case VDUSE_VQ_SETUP: {
1244 struct vduse_vq_config config;
1245 u32 index;
1246
1247 ret = -EFAULT;
1248 if (copy_from_user(to: &config, from: argp, n: sizeof(config)))
1249 break;
1250
1251 ret = -EINVAL;
1252 if (config.index >= dev->vq_num)
1253 break;
1254
1255 if (!is_mem_zero(ptr: (const char *)config.reserved,
1256 size: sizeof(config.reserved)))
1257 break;
1258
1259 index = array_index_nospec(config.index, dev->vq_num);
1260 dev->vqs[index]->num_max = config.max_size;
1261 ret = 0;
1262 break;
1263 }
1264 case VDUSE_VQ_GET_INFO: {
1265 struct vduse_vq_info vq_info;
1266 struct vduse_virtqueue *vq;
1267 u32 index;
1268
1269 ret = -EFAULT;
1270 if (copy_from_user(to: &vq_info, from: argp, n: sizeof(vq_info)))
1271 break;
1272
1273 ret = -EINVAL;
1274 if (vq_info.index >= dev->vq_num)
1275 break;
1276
1277 index = array_index_nospec(vq_info.index, dev->vq_num);
1278 vq = dev->vqs[index];
1279 vq_info.desc_addr = vq->desc_addr;
1280 vq_info.driver_addr = vq->driver_addr;
1281 vq_info.device_addr = vq->device_addr;
1282 vq_info.num = vq->num;
1283
1284 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1285 vq_info.packed.last_avail_counter =
1286 vq->state.packed.last_avail_counter;
1287 vq_info.packed.last_avail_idx =
1288 vq->state.packed.last_avail_idx;
1289 vq_info.packed.last_used_counter =
1290 vq->state.packed.last_used_counter;
1291 vq_info.packed.last_used_idx =
1292 vq->state.packed.last_used_idx;
1293 } else
1294 vq_info.split.avail_index =
1295 vq->state.split.avail_index;
1296
1297 vq_info.ready = vq->ready;
1298
1299 ret = -EFAULT;
1300 if (copy_to_user(to: argp, from: &vq_info, n: sizeof(vq_info)))
1301 break;
1302
1303 ret = 0;
1304 break;
1305 }
1306 case VDUSE_VQ_SETUP_KICKFD: {
1307 struct vduse_vq_eventfd eventfd;
1308
1309 ret = -EFAULT;
1310 if (copy_from_user(to: &eventfd, from: argp, n: sizeof(eventfd)))
1311 break;
1312
1313 ret = vduse_kickfd_setup(dev, eventfd: &eventfd);
1314 break;
1315 }
1316 case VDUSE_VQ_INJECT_IRQ: {
1317 u32 index;
1318
1319 ret = -EFAULT;
1320 if (get_user(index, (u32 __user *)argp))
1321 break;
1322
1323 ret = -EINVAL;
1324 if (index >= dev->vq_num)
1325 break;
1326
1327 ret = 0;
1328 index = array_index_nospec(index, dev->vq_num);
1329 if (!vduse_vq_signal_irqfd(vq: dev->vqs[index])) {
1330 vduse_vq_update_effective_cpu(vq: dev->vqs[index]);
1331 ret = vduse_dev_queue_irq_work(dev,
1332 irq_work: &dev->vqs[index]->inject,
1333 irq_effective_cpu: dev->vqs[index]->irq_effective_cpu);
1334 }
1335 break;
1336 }
1337 case VDUSE_IOTLB_REG_UMEM: {
1338 struct vduse_iova_umem umem;
1339
1340 ret = -EFAULT;
1341 if (copy_from_user(to: &umem, from: argp, n: sizeof(umem)))
1342 break;
1343
1344 ret = -EINVAL;
1345 if (!is_mem_zero(ptr: (const char *)umem.reserved,
1346 size: sizeof(umem.reserved)))
1347 break;
1348
1349 mutex_lock(&dev->domain_lock);
1350 ret = vduse_dev_reg_umem(dev, iova: umem.iova,
1351 uaddr: umem.uaddr, size: umem.size);
1352 mutex_unlock(lock: &dev->domain_lock);
1353 break;
1354 }
1355 case VDUSE_IOTLB_DEREG_UMEM: {
1356 struct vduse_iova_umem umem;
1357
1358 ret = -EFAULT;
1359 if (copy_from_user(to: &umem, from: argp, n: sizeof(umem)))
1360 break;
1361
1362 ret = -EINVAL;
1363 if (!is_mem_zero(ptr: (const char *)umem.reserved,
1364 size: sizeof(umem.reserved)))
1365 break;
1366 mutex_lock(&dev->domain_lock);
1367 ret = vduse_dev_dereg_umem(dev, iova: umem.iova,
1368 size: umem.size);
1369 mutex_unlock(lock: &dev->domain_lock);
1370 break;
1371 }
1372 case VDUSE_IOTLB_GET_INFO: {
1373 struct vduse_iova_info info;
1374 struct vhost_iotlb_map *map;
1375
1376 ret = -EFAULT;
1377 if (copy_from_user(to: &info, from: argp, n: sizeof(info)))
1378 break;
1379
1380 ret = -EINVAL;
1381 if (info.start > info.last)
1382 break;
1383
1384 if (!is_mem_zero(ptr: (const char *)info.reserved,
1385 size: sizeof(info.reserved)))
1386 break;
1387
1388 mutex_lock(&dev->domain_lock);
1389 if (!dev->domain) {
1390 mutex_unlock(lock: &dev->domain_lock);
1391 break;
1392 }
1393 spin_lock(lock: &dev->domain->iotlb_lock);
1394 map = vhost_iotlb_itree_first(iotlb: dev->domain->iotlb,
1395 start: info.start, last: info.last);
1396 if (map) {
1397 info.start = map->start;
1398 info.last = map->last;
1399 info.capability = 0;
1400 if (dev->domain->bounce_map && map->start == 0 &&
1401 map->last == dev->domain->bounce_size - 1)
1402 info.capability |= VDUSE_IOVA_CAP_UMEM;
1403 }
1404 spin_unlock(lock: &dev->domain->iotlb_lock);
1405 mutex_unlock(lock: &dev->domain_lock);
1406 if (!map)
1407 break;
1408
1409 ret = -EFAULT;
1410 if (copy_to_user(to: argp, from: &info, n: sizeof(info)))
1411 break;
1412
1413 ret = 0;
1414 break;
1415 }
1416 default:
1417 ret = -ENOIOCTLCMD;
1418 break;
1419 }
1420
1421 return ret;
1422}
1423
1424static int vduse_dev_release(struct inode *inode, struct file *file)
1425{
1426 struct vduse_dev *dev = file->private_data;
1427
1428 mutex_lock(&dev->domain_lock);
1429 if (dev->domain)
1430 vduse_dev_dereg_umem(dev, iova: 0, size: dev->domain->bounce_size);
1431 mutex_unlock(lock: &dev->domain_lock);
1432 spin_lock(lock: &dev->msg_lock);
1433 /* Make sure the inflight messages can processed after reconncection */
1434 list_splice_init(list: &dev->recv_list, head: &dev->send_list);
1435 spin_unlock(lock: &dev->msg_lock);
1436 dev->connected = false;
1437
1438 return 0;
1439}
1440
1441static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1442{
1443 struct vduse_dev *dev;
1444
1445 mutex_lock(&vduse_lock);
1446 dev = idr_find(&vduse_idr, id: minor);
1447 mutex_unlock(lock: &vduse_lock);
1448
1449 return dev;
1450}
1451
1452static int vduse_dev_open(struct inode *inode, struct file *file)
1453{
1454 int ret;
1455 struct vduse_dev *dev = vduse_dev_get_from_minor(minor: iminor(inode));
1456
1457 if (!dev)
1458 return -ENODEV;
1459
1460 ret = -EBUSY;
1461 mutex_lock(&dev->lock);
1462 if (dev->connected)
1463 goto unlock;
1464
1465 ret = 0;
1466 dev->connected = true;
1467 file->private_data = dev;
1468unlock:
1469 mutex_unlock(lock: &dev->lock);
1470
1471 return ret;
1472}
1473
1474static const struct file_operations vduse_dev_fops = {
1475 .owner = THIS_MODULE,
1476 .open = vduse_dev_open,
1477 .release = vduse_dev_release,
1478 .read_iter = vduse_dev_read_iter,
1479 .write_iter = vduse_dev_write_iter,
1480 .poll = vduse_dev_poll,
1481 .unlocked_ioctl = vduse_dev_ioctl,
1482 .compat_ioctl = compat_ptr_ioctl,
1483 .llseek = noop_llseek,
1484};
1485
1486static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1487{
1488 return sprintf(buf, fmt: "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1489}
1490
1491static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1492 const char *buf, size_t count)
1493{
1494 cpumask_var_t new_value;
1495 int ret;
1496
1497 if (!zalloc_cpumask_var(mask: &new_value, GFP_KERNEL))
1498 return -ENOMEM;
1499
1500 ret = cpumask_parse(buf, dstp: new_value);
1501 if (ret)
1502 goto free_mask;
1503
1504 ret = -EINVAL;
1505 if (!cpumask_intersects(src1p: new_value, cpu_online_mask))
1506 goto free_mask;
1507
1508 cpumask_copy(dstp: &vq->irq_affinity, srcp: new_value);
1509 ret = count;
1510free_mask:
1511 free_cpumask_var(mask: new_value);
1512 return ret;
1513}
1514
1515struct vq_sysfs_entry {
1516 struct attribute attr;
1517 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1518 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1519 size_t count);
1520};
1521
1522static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1523
1524static struct attribute *vq_attrs[] = {
1525 &irq_cb_affinity_attr.attr,
1526 NULL,
1527};
1528ATTRIBUTE_GROUPS(vq);
1529
1530static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1531 char *buf)
1532{
1533 struct vduse_virtqueue *vq = container_of(kobj,
1534 struct vduse_virtqueue, kobj);
1535 struct vq_sysfs_entry *entry = container_of(attr,
1536 struct vq_sysfs_entry, attr);
1537
1538 if (!entry->show)
1539 return -EIO;
1540
1541 return entry->show(vq, buf);
1542}
1543
1544static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1545 const char *buf, size_t count)
1546{
1547 struct vduse_virtqueue *vq = container_of(kobj,
1548 struct vduse_virtqueue, kobj);
1549 struct vq_sysfs_entry *entry = container_of(attr,
1550 struct vq_sysfs_entry, attr);
1551
1552 if (!entry->store)
1553 return -EIO;
1554
1555 return entry->store(vq, buf, count);
1556}
1557
1558static const struct sysfs_ops vq_sysfs_ops = {
1559 .show = vq_attr_show,
1560 .store = vq_attr_store,
1561};
1562
1563static void vq_release(struct kobject *kobj)
1564{
1565 struct vduse_virtqueue *vq = container_of(kobj,
1566 struct vduse_virtqueue, kobj);
1567 kfree(objp: vq);
1568}
1569
1570static const struct kobj_type vq_type = {
1571 .release = vq_release,
1572 .sysfs_ops = &vq_sysfs_ops,
1573 .default_groups = vq_groups,
1574};
1575
1576static char *vduse_devnode(const struct device *dev, umode_t *mode)
1577{
1578 return kasprintf(GFP_KERNEL, fmt: "vduse/%s", dev_name(dev));
1579}
1580
1581static const struct class vduse_class = {
1582 .name = "vduse",
1583 .devnode = vduse_devnode,
1584};
1585
1586static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1587{
1588 int i;
1589
1590 if (!dev->vqs)
1591 return;
1592
1593 for (i = 0; i < dev->vq_num; i++)
1594 kobject_put(kobj: &dev->vqs[i]->kobj);
1595 kfree(objp: dev->vqs);
1596}
1597
1598static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1599{
1600 int ret, i;
1601
1602 dev->vq_align = vq_align;
1603 dev->vq_num = vq_num;
1604 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1605 if (!dev->vqs)
1606 return -ENOMEM;
1607
1608 for (i = 0; i < vq_num; i++) {
1609 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1610 if (!dev->vqs[i]) {
1611 ret = -ENOMEM;
1612 goto err;
1613 }
1614
1615 dev->vqs[i]->index = i;
1616 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1617 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1618 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1619 spin_lock_init(&dev->vqs[i]->kick_lock);
1620 spin_lock_init(&dev->vqs[i]->irq_lock);
1621 cpumask_setall(dstp: &dev->vqs[i]->irq_affinity);
1622
1623 kobject_init(kobj: &dev->vqs[i]->kobj, ktype: &vq_type);
1624 ret = kobject_add(kobj: &dev->vqs[i]->kobj,
1625 parent: &dev->dev->kobj, fmt: "vq%d", i);
1626 if (ret) {
1627 kfree(objp: dev->vqs[i]);
1628 goto err;
1629 }
1630 }
1631
1632 return 0;
1633err:
1634 while (i--)
1635 kobject_put(kobj: &dev->vqs[i]->kobj);
1636 kfree(objp: dev->vqs);
1637 dev->vqs = NULL;
1638 return ret;
1639}
1640
1641static struct vduse_dev *vduse_dev_create(void)
1642{
1643 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1644
1645 if (!dev)
1646 return NULL;
1647
1648 mutex_init(&dev->lock);
1649 mutex_init(&dev->mem_lock);
1650 mutex_init(&dev->domain_lock);
1651 spin_lock_init(&dev->msg_lock);
1652 INIT_LIST_HEAD(list: &dev->send_list);
1653 INIT_LIST_HEAD(list: &dev->recv_list);
1654 spin_lock_init(&dev->irq_lock);
1655 init_rwsem(&dev->rwsem);
1656
1657 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1658 init_waitqueue_head(&dev->waitq);
1659
1660 return dev;
1661}
1662
1663static void vduse_dev_destroy(struct vduse_dev *dev)
1664{
1665 kfree(objp: dev);
1666}
1667
1668static struct vduse_dev *vduse_find_dev(const char *name)
1669{
1670 struct vduse_dev *dev;
1671 int id;
1672
1673 idr_for_each_entry(&vduse_idr, dev, id)
1674 if (!strcmp(dev->name, name))
1675 return dev;
1676
1677 return NULL;
1678}
1679
1680static int vduse_destroy_dev(char *name)
1681{
1682 struct vduse_dev *dev = vduse_find_dev(name);
1683
1684 if (!dev)
1685 return -EINVAL;
1686
1687 mutex_lock(&dev->lock);
1688 if (dev->vdev || dev->connected) {
1689 mutex_unlock(lock: &dev->lock);
1690 return -EBUSY;
1691 }
1692 dev->connected = true;
1693 mutex_unlock(lock: &dev->lock);
1694
1695 vduse_dev_reset(dev);
1696 device_destroy(cls: &vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1697 idr_remove(&vduse_idr, id: dev->minor);
1698 kvfree(addr: dev->config);
1699 vduse_dev_deinit_vqs(dev);
1700 if (dev->domain)
1701 vduse_domain_destroy(domain: dev->domain);
1702 kfree(objp: dev->name);
1703 vduse_dev_destroy(dev);
1704 module_put(THIS_MODULE);
1705
1706 return 0;
1707}
1708
1709static bool device_is_allowed(u32 device_id)
1710{
1711 int i;
1712
1713 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1714 if (allowed_device_id[i] == device_id)
1715 return true;
1716
1717 return false;
1718}
1719
1720static bool features_is_valid(struct vduse_dev_config *config)
1721{
1722 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1723 return false;
1724
1725 /* Now we only support read-only configuration space */
1726 if ((config->device_id == VIRTIO_ID_BLOCK) &&
1727 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1728 return false;
1729 else if ((config->device_id == VIRTIO_ID_NET) &&
1730 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1731 return false;
1732
1733 if ((config->device_id == VIRTIO_ID_NET) &&
1734 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
1735 return false;
1736
1737 return true;
1738}
1739
1740static bool vduse_validate_config(struct vduse_dev_config *config)
1741{
1742 if (!is_mem_zero(ptr: (const char *)config->reserved,
1743 size: sizeof(config->reserved)))
1744 return false;
1745
1746 if (config->vq_align > PAGE_SIZE)
1747 return false;
1748
1749 if (config->config_size > PAGE_SIZE)
1750 return false;
1751
1752 if (config->vq_num > 0xffff)
1753 return false;
1754
1755 if (!config->name[0])
1756 return false;
1757
1758 if (!device_is_allowed(device_id: config->device_id))
1759 return false;
1760
1761 if (!features_is_valid(config))
1762 return false;
1763
1764 return true;
1765}
1766
1767static ssize_t msg_timeout_show(struct device *device,
1768 struct device_attribute *attr, char *buf)
1769{
1770 struct vduse_dev *dev = dev_get_drvdata(dev: device);
1771
1772 return sysfs_emit(buf, fmt: "%u\n", dev->msg_timeout);
1773}
1774
1775static ssize_t msg_timeout_store(struct device *device,
1776 struct device_attribute *attr,
1777 const char *buf, size_t count)
1778{
1779 struct vduse_dev *dev = dev_get_drvdata(dev: device);
1780 int ret;
1781
1782 ret = kstrtouint(s: buf, base: 10, res: &dev->msg_timeout);
1783 if (ret < 0)
1784 return ret;
1785
1786 return count;
1787}
1788
1789static DEVICE_ATTR_RW(msg_timeout);
1790
1791static ssize_t bounce_size_show(struct device *device,
1792 struct device_attribute *attr, char *buf)
1793{
1794 struct vduse_dev *dev = dev_get_drvdata(dev: device);
1795
1796 return sysfs_emit(buf, fmt: "%u\n", dev->bounce_size);
1797}
1798
1799static ssize_t bounce_size_store(struct device *device,
1800 struct device_attribute *attr,
1801 const char *buf, size_t count)
1802{
1803 struct vduse_dev *dev = dev_get_drvdata(dev: device);
1804 unsigned int bounce_size;
1805 int ret;
1806
1807 ret = -EPERM;
1808 mutex_lock(&dev->domain_lock);
1809 if (dev->domain)
1810 goto unlock;
1811
1812 ret = kstrtouint(s: buf, base: 10, res: &bounce_size);
1813 if (ret < 0)
1814 goto unlock;
1815
1816 ret = -EINVAL;
1817 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1818 bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1819 goto unlock;
1820
1821 dev->bounce_size = bounce_size & PAGE_MASK;
1822 ret = count;
1823unlock:
1824 mutex_unlock(lock: &dev->domain_lock);
1825 return ret;
1826}
1827
1828static DEVICE_ATTR_RW(bounce_size);
1829
1830static struct attribute *vduse_dev_attrs[] = {
1831 &dev_attr_msg_timeout.attr,
1832 &dev_attr_bounce_size.attr,
1833 NULL
1834};
1835
1836ATTRIBUTE_GROUPS(vduse_dev);
1837
1838static int vduse_create_dev(struct vduse_dev_config *config,
1839 void *config_buf, u64 api_version)
1840{
1841 int ret;
1842 struct vduse_dev *dev;
1843
1844 ret = -EPERM;
1845 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
1846 goto err;
1847
1848 ret = -EEXIST;
1849 if (vduse_find_dev(name: config->name))
1850 goto err;
1851
1852 ret = -ENOMEM;
1853 dev = vduse_dev_create();
1854 if (!dev)
1855 goto err;
1856
1857 dev->api_version = api_version;
1858 dev->device_features = config->features;
1859 dev->device_id = config->device_id;
1860 dev->vendor_id = config->vendor_id;
1861 dev->name = kstrdup(s: config->name, GFP_KERNEL);
1862 if (!dev->name)
1863 goto err_str;
1864
1865 dev->bounce_size = VDUSE_BOUNCE_SIZE;
1866 dev->config = config_buf;
1867 dev->config_size = config->config_size;
1868
1869 ret = idr_alloc(&vduse_idr, ptr: dev, start: 1, VDUSE_DEV_MAX, GFP_KERNEL);
1870 if (ret < 0)
1871 goto err_idr;
1872
1873 dev->minor = ret;
1874 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1875 dev->dev = device_create_with_groups(cls: &vduse_class, NULL,
1876 MKDEV(MAJOR(vduse_major), dev->minor),
1877 drvdata: dev, groups: vduse_dev_groups, fmt: "%s", config->name);
1878 if (IS_ERR(ptr: dev->dev)) {
1879 ret = PTR_ERR(ptr: dev->dev);
1880 goto err_dev;
1881 }
1882
1883 ret = vduse_dev_init_vqs(dev, vq_align: config->vq_align, vq_num: config->vq_num);
1884 if (ret)
1885 goto err_vqs;
1886
1887 __module_get(THIS_MODULE);
1888
1889 return 0;
1890err_vqs:
1891 device_destroy(cls: &vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1892err_dev:
1893 idr_remove(&vduse_idr, id: dev->minor);
1894err_idr:
1895 kfree(objp: dev->name);
1896err_str:
1897 vduse_dev_destroy(dev);
1898err:
1899 return ret;
1900}
1901
1902static long vduse_ioctl(struct file *file, unsigned int cmd,
1903 unsigned long arg)
1904{
1905 int ret;
1906 void __user *argp = (void __user *)arg;
1907 struct vduse_control *control = file->private_data;
1908
1909 mutex_lock(&vduse_lock);
1910 switch (cmd) {
1911 case VDUSE_GET_API_VERSION:
1912 ret = put_user(control->api_version, (u64 __user *)argp);
1913 break;
1914 case VDUSE_SET_API_VERSION: {
1915 u64 api_version;
1916
1917 ret = -EFAULT;
1918 if (get_user(api_version, (u64 __user *)argp))
1919 break;
1920
1921 ret = -EINVAL;
1922 if (api_version > VDUSE_API_VERSION)
1923 break;
1924
1925 ret = 0;
1926 control->api_version = api_version;
1927 break;
1928 }
1929 case VDUSE_CREATE_DEV: {
1930 struct vduse_dev_config config;
1931 unsigned long size = offsetof(struct vduse_dev_config, config);
1932 void *buf;
1933
1934 ret = -EFAULT;
1935 if (copy_from_user(to: &config, from: argp, n: size))
1936 break;
1937
1938 ret = -EINVAL;
1939 if (vduse_validate_config(config: &config) == false)
1940 break;
1941
1942 buf = vmemdup_user(argp + size, config.config_size);
1943 if (IS_ERR(ptr: buf)) {
1944 ret = PTR_ERR(ptr: buf);
1945 break;
1946 }
1947 config.name[VDUSE_NAME_MAX - 1] = '\0';
1948 ret = vduse_create_dev(config: &config, config_buf: buf, api_version: control->api_version);
1949 if (ret)
1950 kvfree(addr: buf);
1951 break;
1952 }
1953 case VDUSE_DESTROY_DEV: {
1954 char name[VDUSE_NAME_MAX];
1955
1956 ret = -EFAULT;
1957 if (copy_from_user(to: name, from: argp, VDUSE_NAME_MAX))
1958 break;
1959
1960 name[VDUSE_NAME_MAX - 1] = '\0';
1961 ret = vduse_destroy_dev(name);
1962 break;
1963 }
1964 default:
1965 ret = -EINVAL;
1966 break;
1967 }
1968 mutex_unlock(lock: &vduse_lock);
1969
1970 return ret;
1971}
1972
1973static int vduse_release(struct inode *inode, struct file *file)
1974{
1975 struct vduse_control *control = file->private_data;
1976
1977 kfree(objp: control);
1978 return 0;
1979}
1980
1981static int vduse_open(struct inode *inode, struct file *file)
1982{
1983 struct vduse_control *control;
1984
1985 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1986 if (!control)
1987 return -ENOMEM;
1988
1989 control->api_version = VDUSE_API_VERSION;
1990 file->private_data = control;
1991
1992 return 0;
1993}
1994
1995static const struct file_operations vduse_ctrl_fops = {
1996 .owner = THIS_MODULE,
1997 .open = vduse_open,
1998 .release = vduse_release,
1999 .unlocked_ioctl = vduse_ioctl,
2000 .compat_ioctl = compat_ptr_ioctl,
2001 .llseek = noop_llseek,
2002};
2003
2004struct vduse_mgmt_dev {
2005 struct vdpa_mgmt_dev mgmt_dev;
2006 struct device dev;
2007};
2008
2009static struct vduse_mgmt_dev *vduse_mgmt;
2010
2011static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
2012{
2013 struct vduse_vdpa *vdev;
2014
2015 if (dev->vdev)
2016 return -EEXIST;
2017
2018 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
2019 &vduse_vdpa_config_ops, &vduse_map_ops,
2020 1, 1, name, true);
2021 if (IS_ERR(ptr: vdev))
2022 return PTR_ERR(ptr: vdev);
2023
2024 dev->vdev = vdev;
2025 vdev->dev = dev;
2026 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
2027
2028 return 0;
2029}
2030
2031static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
2032 const struct vdpa_dev_set_config *config)
2033{
2034 struct vduse_dev *dev;
2035 int ret;
2036
2037 mutex_lock(&vduse_lock);
2038 dev = vduse_find_dev(name);
2039 if (!dev || !vduse_dev_is_ready(dev)) {
2040 mutex_unlock(lock: &vduse_lock);
2041 return -EINVAL;
2042 }
2043 ret = vduse_dev_init_vdpa(dev, name);
2044 mutex_unlock(lock: &vduse_lock);
2045 if (ret)
2046 return ret;
2047
2048 mutex_lock(&dev->domain_lock);
2049 if (!dev->domain)
2050 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2051 bounce_size: dev->bounce_size);
2052 mutex_unlock(lock: &dev->domain_lock);
2053 if (!dev->domain) {
2054 put_device(dev: &dev->vdev->vdpa.dev);
2055 return -ENOMEM;
2056 }
2057
2058 dev->vdev->vdpa.vmap.iova_domain = dev->domain;
2059 ret = _vdpa_register_device(vdev: &dev->vdev->vdpa, nvqs: dev->vq_num);
2060 if (ret) {
2061 put_device(dev: &dev->vdev->vdpa.dev);
2062 mutex_lock(&dev->domain_lock);
2063 vduse_domain_destroy(domain: dev->domain);
2064 dev->domain = NULL;
2065 mutex_unlock(lock: &dev->domain_lock);
2066 return ret;
2067 }
2068
2069 return 0;
2070}
2071
2072static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2073{
2074 _vdpa_unregister_device(vdev: dev);
2075}
2076
2077static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2078 .dev_add = vdpa_dev_add,
2079 .dev_del = vdpa_dev_del,
2080};
2081
2082static struct virtio_device_id id_table[] = {
2083 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2084 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2085 { 0 },
2086};
2087
2088static void vduse_mgmtdev_release(struct device *dev)
2089{
2090 struct vduse_mgmt_dev *mgmt_dev;
2091
2092 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2093 kfree(objp: mgmt_dev);
2094}
2095
2096static int vduse_mgmtdev_init(void)
2097{
2098 int ret;
2099
2100 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2101 if (!vduse_mgmt)
2102 return -ENOMEM;
2103
2104 ret = dev_set_name(dev: &vduse_mgmt->dev, name: "vduse");
2105 if (ret) {
2106 kfree(objp: vduse_mgmt);
2107 return ret;
2108 }
2109
2110 vduse_mgmt->dev.release = vduse_mgmtdev_release;
2111
2112 ret = device_register(dev: &vduse_mgmt->dev);
2113 if (ret)
2114 goto dev_reg_err;
2115
2116 vduse_mgmt->mgmt_dev.id_table = id_table;
2117 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2118 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2119 ret = vdpa_mgmtdev_register(mdev: &vduse_mgmt->mgmt_dev);
2120 if (ret)
2121 device_unregister(dev: &vduse_mgmt->dev);
2122
2123 return ret;
2124
2125dev_reg_err:
2126 put_device(dev: &vduse_mgmt->dev);
2127 return ret;
2128}
2129
2130static void vduse_mgmtdev_exit(void)
2131{
2132 vdpa_mgmtdev_unregister(mdev: &vduse_mgmt->mgmt_dev);
2133 device_unregister(dev: &vduse_mgmt->dev);
2134}
2135
2136static int vduse_init(void)
2137{
2138 int ret;
2139 struct device *dev;
2140
2141 ret = class_register(class: &vduse_class);
2142 if (ret)
2143 return ret;
2144
2145 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2146 if (ret)
2147 goto err_chardev_region;
2148
2149 /* /dev/vduse/control */
2150 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2151 vduse_ctrl_cdev.owner = THIS_MODULE;
2152 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2153 if (ret)
2154 goto err_ctrl_cdev;
2155
2156 dev = device_create(cls: &vduse_class, NULL, devt: vduse_major, NULL, fmt: "control");
2157 if (IS_ERR(ptr: dev)) {
2158 ret = PTR_ERR(ptr: dev);
2159 goto err_device;
2160 }
2161
2162 /* /dev/vduse/$DEVICE */
2163 cdev_init(&vduse_cdev, &vduse_dev_fops);
2164 vduse_cdev.owner = THIS_MODULE;
2165 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2166 VDUSE_DEV_MAX - 1);
2167 if (ret)
2168 goto err_cdev;
2169
2170 ret = -ENOMEM;
2171 vduse_irq_wq = alloc_workqueue("vduse-irq",
2172 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2173 if (!vduse_irq_wq)
2174 goto err_wq;
2175
2176 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound",
2177 WQ_HIGHPRI | WQ_PERCPU, 0);
2178 if (!vduse_irq_bound_wq)
2179 goto err_bound_wq;
2180
2181 ret = vduse_domain_init();
2182 if (ret)
2183 goto err_domain;
2184
2185 ret = vduse_mgmtdev_init();
2186 if (ret)
2187 goto err_mgmtdev;
2188
2189 return 0;
2190err_mgmtdev:
2191 vduse_domain_exit();
2192err_domain:
2193 destroy_workqueue(wq: vduse_irq_bound_wq);
2194err_bound_wq:
2195 destroy_workqueue(wq: vduse_irq_wq);
2196err_wq:
2197 cdev_del(&vduse_cdev);
2198err_cdev:
2199 device_destroy(cls: &vduse_class, devt: vduse_major);
2200err_device:
2201 cdev_del(&vduse_ctrl_cdev);
2202err_ctrl_cdev:
2203 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2204err_chardev_region:
2205 class_unregister(class: &vduse_class);
2206 return ret;
2207}
2208module_init(vduse_init);
2209
2210static void vduse_exit(void)
2211{
2212 vduse_mgmtdev_exit();
2213 vduse_domain_exit();
2214 destroy_workqueue(wq: vduse_irq_bound_wq);
2215 destroy_workqueue(wq: vduse_irq_wq);
2216 cdev_del(&vduse_cdev);
2217 device_destroy(cls: &vduse_class, devt: vduse_major);
2218 cdev_del(&vduse_ctrl_cdev);
2219 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2220 class_unregister(class: &vduse_class);
2221 idr_destroy(&vduse_idr);
2222}
2223module_exit(vduse_exit);
2224
2225MODULE_LICENSE(DRV_LICENSE);
2226MODULE_AUTHOR(DRV_AUTHOR);
2227MODULE_DESCRIPTION(DRV_DESC);
2228

source code of linux/drivers/vdpa/vdpa_user/vduse_dev.c