zloop.c source code [linux/drivers/block/zloop.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (c) 2025, Christoph Hellwig.
4	* Copyright (c) 2025, Western Digital Corporation or its affiliates.
5	*
6	* Zoned Loop Device driver - exports a zoned block device using one file per
7	* zone as backing storage.
8	*/
9	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11	#include <linux/module.h>
12	#include <linux/blk-mq.h>
13	#include <linux/blkzoned.h>
14	#include <linux/pagemap.h>
15	#include <linux/miscdevice.h>
16	#include <linux/falloc.h>
17	#include <linux/mutex.h>
18	#include <linux/parser.h>
19	#include <linux/seq_file.h>
20
21	/*
22	* Options for adding (and removing) a device.
23	*/
24	enum {
25	ZLOOP_OPT_ERR = `0`,
26	ZLOOP_OPT_ID = (`1` << `0`),
27	ZLOOP_OPT_CAPACITY = (`1` << `1`),
28	ZLOOP_OPT_ZONE_SIZE = (`1` << `2`),
29	ZLOOP_OPT_ZONE_CAPACITY = (`1` << `3`),
30	ZLOOP_OPT_NR_CONV_ZONES = (`1` << `4`),
31	ZLOOP_OPT_BASE_DIR = (`1` << `5`),
32	ZLOOP_OPT_NR_QUEUES = (`1` << `6`),
33	ZLOOP_OPT_QUEUE_DEPTH = (`1` << `7`),
34	ZLOOP_OPT_BUFFERED_IO = (`1` << `8`),
35	ZLOOP_OPT_ZONE_APPEND = (`1` << `9`),
36	ZLOOP_OPT_ORDERED_ZONE_APPEND = (`1` << `10`),
37	};
38
39	static const match_table_t zloop_opt_tokens = {
40	{ ZLOOP_OPT_ID, "id=%d" },
41	{ ZLOOP_OPT_CAPACITY, "capacity_mb=%u" },
42	{ ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" },
43	{ ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" },
44	{ ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" },
45	{ ZLOOP_OPT_BASE_DIR, "base_dir=%s" },
46	{ ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" },
47	{ ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" },
48	{ ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
49	{ ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" },
50	{ ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" },
51	{ ZLOOP_OPT_ERR, NULL }
52	};
53
54	/ Default values for the "add" operation. /
55	#define ZLOOP_DEF_ID -1
56	#define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT)
57	#define ZLOOP_DEF_NR_ZONES 64
58	#define ZLOOP_DEF_NR_CONV_ZONES 8
59	#define ZLOOP_DEF_BASE_DIR "/var/local/zloop"
60	#define ZLOOP_DEF_NR_QUEUES 1
61	#define ZLOOP_DEF_QUEUE_DEPTH 128
62	#define ZLOOP_DEF_BUFFERED_IO false
63	#define ZLOOP_DEF_ZONE_APPEND true
64	#define ZLOOP_DEF_ORDERED_ZONE_APPEND false
65
66	/ Arbitrary limit on the zone size (16GB). /
67	#define ZLOOP_MAX_ZONE_SIZE_MB 16384
68
69	struct zloop_options {
70	unsigned int mask;
71	int id;
72	sector_t capacity;
73	sector_t zone_size;
74	sector_t zone_capacity;
75	unsigned int nr_conv_zones;
76	char *base_dir;
77	unsigned int nr_queues;
78	unsigned int queue_depth;
79	bool buffered_io;
80	bool zone_append;
81	bool ordered_zone_append;
82	};
83
84	/*
85	* Device states.
86	*/
87	enum {
88	Zlo_creating = `0`,
89	Zlo_live,
90	Zlo_deleting,
91	};
92
93	enum zloop_zone_flags {
94	ZLOOP_ZONE_CONV = `0`,
95	ZLOOP_ZONE_SEQ_ERROR,
96	};
97
98	struct zloop_zone {
99	struct file *file;
100
101	unsigned long flags;
102	struct mutex lock;
103	spinlock_t wp_lock;
104	enum blk_zone_cond cond;
105	sector_t start;
106	sector_t wp;
107
108	gfp_t old_gfp_mask;
109	};
110
111	struct zloop_device {
112	unsigned int id;
113	unsigned int state;
114
115	struct blk_mq_tag_set tag_set;
116	struct gendisk *disk;
117
118	struct workqueue_struct *workqueue;
119	bool buffered_io;
120	bool zone_append;
121	bool ordered_zone_append;
122
123	const char *base_dir;
124	struct file *data_dir;
125
126	unsigned int zone_shift;
127	sector_t zone_size;
128	sector_t zone_capacity;
129	unsigned int nr_zones;
130	unsigned int nr_conv_zones;
131	unsigned int block_size;
132
133	struct zloop_zone zones[] __counted_by(nr_zones);
134	};
135
136	struct zloop_cmd {
137	struct work_struct work;
138	atomic_t ref;
139	sector_t sector;
140	sector_t nr_sectors;
141	long ret;
142	struct kiocb iocb;
143	struct bio_vec *bvec;
144	};
145
146	static DEFINE_IDR(zloop_index_idr);
147	static DEFINE_MUTEX(zloop_ctl_mutex);
148
149	static unsigned int rq_zone_no(struct request *rq)
150	{
151	struct zloop_device *zlo = rq->q->queuedata;
152
153	return blk_rq_pos(rq) >> zlo->zone_shift;
154	}
155
156	static int zloop_update_seq_zone(struct zloop_device zlo, unsigned* int zone_no)
157	{
158	struct zloop_zone *zone = &zlo->zones[zone_no];
159	struct kstat stat;
160	sector_t file_sectors;
161	unsigned long flags;
162	int ret;
163
164	lockdep_assert_held(&zone->lock);
165
166	ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, `0`);
167	if (ret < `0`) {
168	pr_err("Failed to get zone %u file stat (err=%d)\n",
169	zone_no, ret);
170	set_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags);
171	return ret;
172	}
173
174	file_sectors = stat.size >> SECTOR_SHIFT;
175	if (file_sectors > zlo->zone_capacity) {
176	pr_err("Zone %u file too large (%llu sectors > %llu)\n",
177	zone_no, file_sectors, zlo->zone_capacity);
178	return -EINVAL;
179	}
180
181	if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - `1`)) {
182	pr_err("Zone %u file size not aligned to block size %u\n",
183	zone_no, zlo->block_size);
184	return -EINVAL;
185	}
186
187	spin_lock_irqsave(&zone->wp_lock, flags);
188	if (!file_sectors) {
189	zone->cond = BLK_ZONE_COND_EMPTY;
190	zone->wp = zone->start;
191	} else if (file_sectors == zlo->zone_capacity) {
192	zone->cond = BLK_ZONE_COND_FULL;
193	zone->wp = ULLONG_MAX;
194	} else {
195	zone->cond = BLK_ZONE_COND_CLOSED;
196	zone->wp = zone->start + file_sectors;
197	}
198	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
199
200	return `0`;
201	}
202
203	static int zloop_open_zone(struct zloop_device zlo, unsigned* int zone_no)
204	{
205	struct zloop_zone *zone = &zlo->zones[zone_no];
206	int ret = `0`;
207
208	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
209	return -EIO;
210
211	mutex_lock(&zone->lock);
212
213	if (test_and_clear_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags)) {
214	ret = zloop_update_seq_zone(zlo, zone_no);
215	if (ret)
216	goto unlock;
217	}
218
219	switch (zone->cond) {
220	case BLK_ZONE_COND_EXP_OPEN:
221	break;
222	case BLK_ZONE_COND_EMPTY:
223	case BLK_ZONE_COND_CLOSED:
224	case BLK_ZONE_COND_IMP_OPEN:
225	zone->cond = BLK_ZONE_COND_EXP_OPEN;
226	break;
227	case BLK_ZONE_COND_FULL:
228	default:
229	ret = -EIO;
230	break;
231	}
232
233	unlock:
234	mutex_unlock(lock: &zone->lock);
235
236	return ret;
237	}
238
239	static int zloop_close_zone(struct zloop_device zlo, unsigned* int zone_no)
240	{
241	struct zloop_zone *zone = &zlo->zones[zone_no];
242	unsigned long flags;
243	int ret = `0`;
244
245	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
246	return -EIO;
247
248	mutex_lock(&zone->lock);
249
250	if (test_and_clear_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags)) {
251	ret = zloop_update_seq_zone(zlo, zone_no);
252	if (ret)
253	goto unlock;
254	}
255
256	switch (zone->cond) {
257	case BLK_ZONE_COND_CLOSED:
258	break;
259	case BLK_ZONE_COND_IMP_OPEN:
260	case BLK_ZONE_COND_EXP_OPEN:
261	spin_lock_irqsave(&zone->wp_lock, flags);
262	if (zone->wp == zone->start)
263	zone->cond = BLK_ZONE_COND_EMPTY;
264	else
265	zone->cond = BLK_ZONE_COND_CLOSED;
266	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
267	break;
268	case BLK_ZONE_COND_EMPTY:
269	case BLK_ZONE_COND_FULL:
270	default:
271	ret = -EIO;
272	break;
273	}
274
275	unlock:
276	mutex_unlock(lock: &zone->lock);
277
278	return ret;
279	}
280
281	static int zloop_reset_zone(struct zloop_device zlo, unsigned* int zone_no)
282	{
283	struct zloop_zone *zone = &zlo->zones[zone_no];
284	unsigned long flags;
285	int ret = `0`;
286
287	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
288	return -EIO;
289
290	mutex_lock(&zone->lock);
291
292	if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
293	zone->cond == BLK_ZONE_COND_EMPTY)
294	goto unlock;
295
296	if (vfs_truncate(&zone->file->f_path, `0`)) {
297	set_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags);
298	ret = -EIO;
299	goto unlock;
300	}
301
302	spin_lock_irqsave(&zone->wp_lock, flags);
303	zone->cond = BLK_ZONE_COND_EMPTY;
304	zone->wp = zone->start;
305	clear_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags);
306	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
307
308	unlock:
309	mutex_unlock(lock: &zone->lock);
310
311	return ret;
312	}
313
314	static int zloop_reset_all_zones(struct zloop_device *zlo)
315	{
316	unsigned int i;
317	int ret;
318
319	for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) {
320	ret = zloop_reset_zone(zlo, zone_no: i);
321	if (ret)
322	return ret;
323	}
324
325	return `0`;
326	}
327
328	static int zloop_finish_zone(struct zloop_device zlo, unsigned* int zone_no)
329	{
330	struct zloop_zone *zone = &zlo->zones[zone_no];
331	unsigned long flags;
332	int ret = `0`;
333
334	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
335	return -EIO;
336
337	mutex_lock(&zone->lock);
338
339	if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
340	zone->cond == BLK_ZONE_COND_FULL)
341	goto unlock;
342
343	if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) {
344	set_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags);
345	ret = -EIO;
346	goto unlock;
347	}
348
349	spin_lock_irqsave(&zone->wp_lock, flags);
350	zone->cond = BLK_ZONE_COND_FULL;
351	zone->wp = ULLONG_MAX;
352	clear_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags);
353	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
354
355	unlock:
356	mutex_unlock(lock: &zone->lock);
357
358	return ret;
359	}
360
361	static void zloop_put_cmd(struct zloop_cmd *cmd)
362	{
363	struct request *rq = blk_mq_rq_from_pdu(pdu: cmd);
364
365	if (!atomic_dec_and_test(v: &cmd->ref))
366	return;
367	kfree(objp: cmd->bvec);
368	cmd->bvec = NULL;
369	if (likely(!blk_should_fake_timeout(rq->q)))
370	blk_mq_complete_request(rq);
371	}
372
373	static void zloop_rw_complete(struct kiocb iocb, long* ret)
374	{
375	struct zloop_cmd cmd = container_of(iocb, struct* zloop_cmd, iocb);
376
377	cmd->ret = ret;
378	zloop_put_cmd(cmd);
379	}
380
381	static void zloop_rw(struct zloop_cmd *cmd)
382	{
383	struct request *rq = blk_mq_rq_from_pdu(pdu: cmd);
384	struct zloop_device *zlo = rq->q->queuedata;
385	unsigned int zone_no = rq_zone_no(rq);
386	sector_t sector = blk_rq_pos(rq);
387	sector_t nr_sectors = blk_rq_sectors(rq);
388	bool is_append = req_op(req: rq) == REQ_OP_ZONE_APPEND;
389	bool is_write = req_op(req: rq) == REQ_OP_WRITE \|\| is_append;
390	int rw = is_write ? ITER_SOURCE : ITER_DEST;
391	struct req_iterator rq_iter;
392	struct zloop_zone *zone;
393	struct iov_iter iter;
394	struct bio_vec tmp;
395	unsigned long flags;
396	sector_t zone_end;
397	unsigned int nr_bvec;
398	int ret;
399
400	atomic_set(v: &cmd->ref, i: `2`);
401	cmd->sector = sector;
402	cmd->nr_sectors = nr_sectors;
403	cmd->ret = `0`;
404
405	if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
406	ret = -EIO;
407	goto out;
408	}
409
410	/ We should never get an I/O beyond the device capacity. /
411	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
412	ret = -EIO;
413	goto out;
414	}
415	zone = &zlo->zones[zone_no];
416	zone_end = zone->start + zlo->zone_capacity;
417
418	/*
419	* The block layer should never send requests that are not fully
420	* contained within the zone.
421	*/
422	if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
423	ret = -EIO;
424	goto out;
425	}
426
427	if (test_and_clear_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags)) {
428	mutex_lock(&zone->lock);
429	ret = zloop_update_seq_zone(zlo, zone_no);
430	mutex_unlock(lock: &zone->lock);
431	if (ret)
432	goto out;
433	}
434
435	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
436	mutex_lock(&zone->lock);
437
438	spin_lock_irqsave(&zone->wp_lock, flags);
439
440	/*
441	* Zone append operations always go at the current write
442	* pointer, but regular write operations must already be
443	* aligned to the write pointer when submitted.
444	*/
445	if (is_append) {
446	/*
447	* If ordered zone append is in use, we already checked
448	* and set the target sector in zloop_queue_rq().
449	*/
450	if (!zlo->ordered_zone_append) {
451	if (zone->cond == BLK_ZONE_COND_FULL \|\|
452	zone->wp + nr_sectors > zone_end) {
453	spin_unlock_irqrestore(lock: &zone->wp_lock,
454	flags);
455	ret = -EIO;
456	goto unlock;
457	}
458	sector = zone->wp;
459	}
460	cmd->sector = sector;
461	} else if (sector != zone->wp) {
462	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
463	pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
464	zone_no, sector, zone->wp);
465	ret = -EIO;
466	goto unlock;
467	}
468
469	/ Implicitly open the target zone. /
470	if (zone->cond == BLK_ZONE_COND_CLOSED \|\|
471	zone->cond == BLK_ZONE_COND_EMPTY)
472	zone->cond = BLK_ZONE_COND_IMP_OPEN;
473
474	/*
475	* Advance the write pointer, unless ordered zone append is in
476	* use. If the write fails, the write pointer position will be
477	* corrected when the next I/O starts execution.
478	*/
479	if (!is_append \|\| !zlo->ordered_zone_append) {
480	zone->wp += nr_sectors;
481	if (zone->wp == zone_end) {
482	zone->cond = BLK_ZONE_COND_FULL;
483	zone->wp = ULLONG_MAX;
484	}
485	}
486
487	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
488	}
489
490	nr_bvec = blk_rq_nr_bvec(rq);
491
492	if (rq->bio != rq->biotail) {
493	struct bio_vec *bvec;
494
495	cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO);
496	if (!cmd->bvec) {
497	ret = -EIO;
498	goto unlock;
499	}
500
501	/*
502	* The bios of the request may be started from the middle of
503	* the 'bvec' because of bio splitting, so we can't directly
504	* copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
505	* API will take care of all details for us.
506	*/
507	bvec = cmd->bvec;
508	rq_for_each_bvec(tmp, rq, rq_iter) {
509	*bvec = tmp;
510	bvec++;
511	}
512	iov_iter_bvec(i: &iter, direction: rw, bvec: cmd->bvec, nr_segs: nr_bvec, count: blk_rq_bytes(rq));
513	} else {
514	/*
515	* Same here, this bio may be started from the middle of the
516	* 'bvec' because of bio splitting, so offset from the bvec
517	* must be passed to iov iterator
518	*/
519	iov_iter_bvec(i: &iter, direction: rw,
520	__bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter),
521	nr_segs: nr_bvec, count: blk_rq_bytes(rq));
522	iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
523	}
524
525	cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
526	cmd->iocb.ki_filp = zone->file;
527	cmd->iocb.ki_complete = zloop_rw_complete;
528	if (!zlo->buffered_io)
529	cmd->iocb.ki_flags = IOCB_DIRECT;
530	cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, `0`);
531
532	if (rw == ITER_SOURCE)
533	ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
534	else
535	ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
536	unlock:
537	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
538	mutex_unlock(lock: &zone->lock);
539	out:
540	if (ret != -EIOCBQUEUED)
541	zloop_rw_complete(iocb: &cmd->iocb, ret);
542	zloop_put_cmd(cmd);
543	}
544
545	static void zloop_handle_cmd(struct zloop_cmd *cmd)
546	{
547	struct request *rq = blk_mq_rq_from_pdu(pdu: cmd);
548	struct zloop_device *zlo = rq->q->queuedata;
549
550	/ We can block in this context, so ignore REQ_NOWAIT. /
551	if (rq->cmd_flags & REQ_NOWAIT)
552	rq->cmd_flags &= ~REQ_NOWAIT;
553
554	switch (req_op(req: rq)) {
555	case REQ_OP_READ:
556	case REQ_OP_WRITE:
557	case REQ_OP_ZONE_APPEND:
558	/*
559	* zloop_rw() always executes asynchronously or completes
560	* directly.
561	*/
562	zloop_rw(cmd);
563	return;
564	case REQ_OP_FLUSH:
565	/*
566	* Sync the entire FS containing the zone files instead of
567	* walking all files
568	*/
569	cmd->ret = sync_filesystem(file_inode(f: zlo->data_dir)->i_sb);
570	break;
571	case REQ_OP_ZONE_RESET:
572	cmd->ret = zloop_reset_zone(zlo, zone_no: rq_zone_no(rq));
573	break;
574	case REQ_OP_ZONE_RESET_ALL:
575	cmd->ret = zloop_reset_all_zones(zlo);
576	break;
577	case REQ_OP_ZONE_FINISH:
578	cmd->ret = zloop_finish_zone(zlo, zone_no: rq_zone_no(rq));
579	break;
580	case REQ_OP_ZONE_OPEN:
581	cmd->ret = zloop_open_zone(zlo, zone_no: rq_zone_no(rq));
582	break;
583	case REQ_OP_ZONE_CLOSE:
584	cmd->ret = zloop_close_zone(zlo, zone_no: rq_zone_no(rq));
585	break;
586	default:
587	WARN_ON_ONCE(`1`);
588	pr_err("Unsupported operation %d\n", req_op(rq));
589	cmd->ret = -EOPNOTSUPP;
590	break;
591	}
592
593	blk_mq_complete_request(rq);
594	}
595
596	static void zloop_cmd_workfn(struct work_struct *work)
597	{
598	struct zloop_cmd cmd = container_of(work, struct* zloop_cmd, work);
599	int orig_flags = current->flags;
600
601	current->flags \|= PF_LOCAL_THROTTLE \| PF_MEMALLOC_NOIO;
602	zloop_handle_cmd(cmd);
603	current->flags = orig_flags;
604	}
605
606	static void zloop_complete_rq(struct request *rq)
607	{
608	struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
609	struct zloop_device *zlo = rq->q->queuedata;
610	unsigned int zone_no = cmd->sector >> zlo->zone_shift;
611	struct zloop_zone *zone = &zlo->zones[zone_no];
612	blk_status_t sts = BLK_STS_OK;
613
614	switch (req_op(req: rq)) {
615	case REQ_OP_READ:
616	if (cmd->ret < `0`)
617	pr_err("Zone %u: failed read sector %llu, %llu sectors\n",
618	zone_no, cmd->sector, cmd->nr_sectors);
619
620	if (cmd->ret >= `0` && cmd->ret != blk_rq_bytes(rq)) {
621	/ short read /
622	struct bio *bio;
623
624	__rq_for_each_bio(bio, rq)
625	zero_fill_bio(bio);
626	}
627	break;
628	case REQ_OP_WRITE:
629	case REQ_OP_ZONE_APPEND:
630	if (cmd->ret < `0`)
631	pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n",
632	zone_no,
633	req_op(rq) == REQ_OP_WRITE ? "" : "append ",
634	cmd->sector, cmd->nr_sectors);
635
636	if (cmd->ret >= `0` && cmd->ret != blk_rq_bytes(rq)) {
637	pr_err("Zone %u: partial write %ld/%u B\n",
638	zone_no, cmd->ret, blk_rq_bytes(rq));
639	cmd->ret = -EIO;
640	}
641
642	if (cmd->ret < `0` && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
643	/*
644	* A write to a sequential zone file failed: mark the
645	* zone as having an error. This will be corrected and
646	* cleared when the next IO is submitted.
647	*/
648	set_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags);
649	break;
650	}
651	if (req_op(req: rq) == REQ_OP_ZONE_APPEND)
652	rq->__sector = cmd->sector;
653
654	break;
655	default:
656	break;
657	}
658
659	if (cmd->ret < `0`)
660	sts = errno_to_blk_status(errno: cmd->ret);
661	blk_mq_end_request(rq, error: sts);
662	}
663
664	static bool zloop_set_zone_append_sector(struct request *rq)
665	{
666	struct zloop_device *zlo = rq->q->queuedata;
667	unsigned int zone_no = rq_zone_no(rq);
668	struct zloop_zone *zone = &zlo->zones[zone_no];
669	sector_t zone_end = zone->start + zlo->zone_capacity;
670	sector_t nr_sectors = blk_rq_sectors(rq);
671	unsigned long flags;
672
673	spin_lock_irqsave(&zone->wp_lock, flags);
674
675	if (zone->cond == BLK_ZONE_COND_FULL \|\|
676	zone->wp + nr_sectors > zone_end) {
677	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
678	return false;
679	}
680
681	rq->__sector = zone->wp;
682	zone->wp += blk_rq_sectors(rq);
683	if (zone->wp >= zone_end) {
684	zone->cond = BLK_ZONE_COND_FULL;
685	zone->wp = ULLONG_MAX;
686	}
687
688	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
689
690	return true;
691	}
692
693	static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
694	const struct blk_mq_queue_data *bd)
695	{
696	struct request *rq = bd->rq;
697	struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
698	struct zloop_device *zlo = rq->q->queuedata;
699
700	if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting)
701	return BLK_STS_IOERR;
702
703	/*
704	* If we need to strongly order zone append operations, set the request
705	* sector to the zone write pointer location now instead of when the
706	* command work runs.
707	*/
708	if (zlo->ordered_zone_append && req_op(req: rq) == REQ_OP_ZONE_APPEND) {
709	if (!zloop_set_zone_append_sector(rq))
710	return BLK_STS_IOERR;
711	}
712
713	blk_mq_start_request(rq);
714
715	INIT_WORK(&cmd->work, zloop_cmd_workfn);
716	queue_work(wq: zlo->workqueue, work: &cmd->work);
717
718	return BLK_STS_OK;
719	}
720
721	static const struct blk_mq_ops zloop_mq_ops = {
722	.queue_rq = zloop_queue_rq,
723	.complete = zloop_complete_rq,
724	};
725
726	static int zloop_open(struct gendisk *disk, blk_mode_t mode)
727	{
728	struct zloop_device *zlo = disk->private_data;
729	int ret;
730
731	ret = mutex_lock_killable(&zloop_ctl_mutex);
732	if (ret)
733	return ret;
734
735	if (zlo->state != Zlo_live)
736	ret = -ENXIO;
737	mutex_unlock(lock: &zloop_ctl_mutex);
738	return ret;
739	}
740
741	static int zloop_report_zones(struct gendisk *disk, sector_t sector,
742	unsigned int nr_zones, struct blk_report_zones_args *args)
743	{
744	struct zloop_device *zlo = disk->private_data;
745	struct blk_zone blkz = {};
746	unsigned int first, i;
747	unsigned long flags;
748	int ret;
749
750	first = disk_zone_no(disk, sector);
751	if (first >= zlo->nr_zones)
752	return `0`;
753	nr_zones = min(nr_zones, zlo->nr_zones - first);
754
755	for (i = `0`; i < nr_zones; i++) {
756	unsigned int zone_no = first + i;
757	struct zloop_zone *zone = &zlo->zones[zone_no];
758
759	mutex_lock(&zone->lock);
760
761	if (test_and_clear_bit(nr: ZLOOP_ZONE_SEQ_ERROR, addr: &zone->flags)) {
762	ret = zloop_update_seq_zone(zlo, zone_no);
763	if (ret) {
764	mutex_unlock(lock: &zone->lock);
765	return ret;
766	}
767	}
768
769	blkz.start = zone->start;
770	blkz.len = zlo->zone_size;
771	spin_lock_irqsave(&zone->wp_lock, flags);
772	blkz.wp = zone->wp;
773	spin_unlock_irqrestore(lock: &zone->wp_lock, flags);
774	blkz.cond = zone->cond;
775	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
776	blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
777	blkz.capacity = zlo->zone_size;
778	} else {
779	blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
780	blkz.capacity = zlo->zone_capacity;
781	}
782
783	mutex_unlock(lock: &zone->lock);
784
785	ret = disk_report_zone(disk, zone: &blkz, idx: i, args);
786	if (ret)
787	return ret;
788	}
789
790	return nr_zones;
791	}
792
793	static void zloop_free_disk(struct gendisk *disk)
794	{
795	struct zloop_device *zlo = disk->private_data;
796	unsigned int i;
797
798	blk_mq_free_tag_set(set: &zlo->tag_set);
799
800	for (i = `0`; i < zlo->nr_zones; i++) {
801	struct zloop_zone *zone = &zlo->zones[i];
802
803	mapping_set_gfp_mask(m: zone->file->f_mapping,
804	mask: zone->old_gfp_mask);
805	fput(zone->file);
806	}
807
808	fput(zlo->data_dir);
809	destroy_workqueue(wq: zlo->workqueue);
810	kfree(objp: zlo->base_dir);
811	kvfree(addr: zlo);
812	}
813
814	static const struct block_device_operations zloop_fops = {
815	.owner = THIS_MODULE,
816	.open = zloop_open,
817	.report_zones = zloop_report_zones,
818	.free_disk = zloop_free_disk,
819	};
820
821	__printf(`3`, `4`)
822	static struct file zloop_filp_open_fmt(int* oflags, umode_t mode,
823	const char *fmt, ...)
824	{
825	struct file *file;
826	va_list ap;
827	char *p;
828
829	va_start(ap, fmt);
830	p = kvasprintf(GFP_KERNEL, fmt, args: ap);
831	va_end(ap);
832
833	if (!p)
834	return ERR_PTR(error: -ENOMEM);
835	file = filp_open(p, oflags, mode);
836	kfree(objp: p);
837	return file;
838	}
839
840	static int zloop_get_block_size(struct zloop_device *zlo,
841	struct zloop_zone *zone)
842	{
843	struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev;
844	struct kstat st;
845
846	/*
847	* If the FS block size is lower than or equal to 4K, use that as the
848	* device block size. Otherwise, fallback to the FS direct IO alignment
849	* constraint if that is provided, and to the FS underlying device
850	* physical block size if the direct IO alignment is unknown.
851	*/
852	if (file_inode(f: zone->file)->i_sb->s_blocksize <= SZ_4K)
853	zlo->block_size = file_inode(f: zone->file)->i_sb->s_blocksize;
854	else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, `0`) &&
855	(st.result_mask & STATX_DIOALIGN))
856	zlo->block_size = st.dio_offset_align;
857	else if (sb_bdev)
858	zlo->block_size = bdev_physical_block_size(bdev: sb_bdev);
859	else
860	zlo->block_size = SECTOR_SIZE;
861
862	if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - `1`)) {
863	pr_err("Zone capacity is not aligned to block size %u\n",
864	zlo->block_size);
865	return -EINVAL;
866	}
867
868	return `0`;
869	}
870
871	static int zloop_init_zone(struct zloop_device zlo, struct* zloop_options *opts,
872	unsigned int zone_no, bool restore)
873	{
874	struct zloop_zone *zone = &zlo->zones[zone_no];
875	int oflags = O_RDWR;
876	struct kstat stat;
877	sector_t file_sectors;
878	int ret;
879
880	mutex_init(&zone->lock);
881	spin_lock_init(&zone->wp_lock);
882	zone->start = (sector_t)zone_no << zlo->zone_shift;
883
884	if (!restore)
885	oflags \|= O_CREAT;
886
887	if (!opts->buffered_io)
888	oflags \|= O_DIRECT;
889
890	if (zone_no < zlo->nr_conv_zones) {
891	/ Conventional zone file. /
892	set_bit(nr: ZLOOP_ZONE_CONV, addr: &zone->flags);
893	zone->cond = BLK_ZONE_COND_NOT_WP;
894	zone->wp = U64_MAX;
895
896	zone->file = zloop_filp_open_fmt(oflags, mode: `0600`, fmt: "%s/%u/cnv-%06u",
897	zlo->base_dir, zlo->id, zone_no);
898	if (IS_ERR(ptr: zone->file)) {
899	pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)",
900	zone_no, zlo->base_dir, zlo->id, zone_no,
901	PTR_ERR(zone->file));
902	return PTR_ERR(ptr: zone->file);
903	}
904
905	if (!zlo->block_size) {
906	ret = zloop_get_block_size(zlo, zone);
907	if (ret)
908	return ret;
909	}
910
911	ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, `0`);
912	if (ret < `0`) {
913	pr_err("Failed to get zone %u file stat\n", zone_no);
914	return ret;
915	}
916	file_sectors = stat.size >> SECTOR_SHIFT;
917
918	if (restore && file_sectors != zlo->zone_size) {
919	pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n",
920	zone_no, file_sectors, zlo->zone_capacity);
921	return ret;
922	}
923
924	ret = vfs_truncate(&zone->file->f_path,
925	zlo->zone_size << SECTOR_SHIFT);
926	if (ret < `0`) {
927	pr_err("Failed to truncate zone %u file (err=%d)\n",
928	zone_no, ret);
929	return ret;
930	}
931
932	return `0`;
933	}
934
935	/ Sequential zone file. /
936	zone->file = zloop_filp_open_fmt(oflags, mode: `0600`, fmt: "%s/%u/seq-%06u",
937	zlo->base_dir, zlo->id, zone_no);
938	if (IS_ERR(ptr: zone->file)) {
939	pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)",
940	zone_no, zlo->base_dir, zlo->id, zone_no,
941	PTR_ERR(zone->file));
942	return PTR_ERR(ptr: zone->file);
943	}
944
945	if (!zlo->block_size) {
946	ret = zloop_get_block_size(zlo, zone);
947	if (ret)
948	return ret;
949	}
950
951	zloop_get_block_size(zlo, zone);
952
953	mutex_lock(&zone->lock);
954	ret = zloop_update_seq_zone(zlo, zone_no);
955	mutex_unlock(lock: &zone->lock);
956
957	return ret;
958	}
959
960	static bool zloop_dev_exists(struct zloop_device *zlo)
961	{
962	struct file cnv, seq;
963	bool exists;
964
965	cnv = zloop_filp_open_fmt(O_RDONLY, mode: `0600`, fmt: "%s/%u/cnv-%06u",
966	zlo->base_dir, zlo->id, `0`);
967	seq = zloop_filp_open_fmt(O_RDONLY, mode: `0600`, fmt: "%s/%u/seq-%06u",
968	zlo->base_dir, zlo->id, `0`);
969	exists = !IS_ERR(ptr: cnv) \|\| !IS_ERR(ptr: seq);
970
971	if (!IS_ERR(ptr: cnv))
972	fput(cnv);
973	if (!IS_ERR(ptr: seq))
974	fput(seq);
975
976	return exists;
977	}
978
979	static int zloop_ctl_add(struct zloop_options *opts)
980	{
981	struct queue_limits lim = {
982	.max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
983	.chunk_sectors = opts->zone_size,
984	.features = BLK_FEAT_ZONED,
985	};
986	unsigned int nr_zones, i, j;
987	struct zloop_device *zlo;
988	int ret = -EINVAL;
989	bool restore;
990
991	__module_get(THIS_MODULE);
992
993	nr_zones = opts->capacity >> ilog2(opts->zone_size);
994	if (opts->nr_conv_zones >= nr_zones) {
995	pr_err("Invalid number of conventional zones %u\n",
996	opts->nr_conv_zones);
997	goto out;
998	}
999
1000	zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL);
1001	if (!zlo) {
1002	ret = -ENOMEM;
1003	goto out;
1004	}
1005	WRITE_ONCE(zlo->state, Zlo_creating);
1006
1007	ret = mutex_lock_killable(&zloop_ctl_mutex);
1008	if (ret)
1009	goto out_free_dev;
1010
1011	/ Allocate id, if @opts->id >= 0, we're requesting that specific id /
1012	if (opts->id >= `0`) {
1013	ret = idr_alloc(&zloop_index_idr, ptr: zlo,
1014	start: opts->id, end: opts->id + `1`, GFP_KERNEL);
1015	if (ret == -ENOSPC)
1016	ret = -EEXIST;
1017	} else {
1018	ret = idr_alloc(&zloop_index_idr, ptr: zlo, start: `0`, end: `0`, GFP_KERNEL);
1019	}
1020	mutex_unlock(lock: &zloop_ctl_mutex);
1021	if (ret < `0`)
1022	goto out_free_dev;
1023
1024	zlo->id = ret;
1025	zlo->zone_shift = ilog2(opts->zone_size);
1026	zlo->zone_size = opts->zone_size;
1027	if (opts->zone_capacity)
1028	zlo->zone_capacity = opts->zone_capacity;
1029	else
1030	zlo->zone_capacity = zlo->zone_size;
1031	zlo->nr_zones = nr_zones;
1032	zlo->nr_conv_zones = opts->nr_conv_zones;
1033	zlo->buffered_io = opts->buffered_io;
1034	zlo->zone_append = opts->zone_append;
1035	if (zlo->zone_append)
1036	zlo->ordered_zone_append = opts->ordered_zone_append;
1037
1038	zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND \| WQ_FREEZABLE,
1039	opts->nr_queues * opts->queue_depth, zlo->id);
1040	if (!zlo->workqueue) {
1041	ret = -ENOMEM;
1042	goto out_free_idr;
1043	}
1044
1045	if (opts->base_dir)
1046	zlo->base_dir = kstrdup(s: opts->base_dir, GFP_KERNEL);
1047	else
1048	zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL);
1049	if (!zlo->base_dir) {
1050	ret = -ENOMEM;
1051	goto out_destroy_workqueue;
1052	}
1053
1054	zlo->data_dir = zloop_filp_open_fmt(O_RDONLY \| O_DIRECTORY, mode: `0`, fmt: "%s/%u",
1055	zlo->base_dir, zlo->id);
1056	if (IS_ERR(ptr: zlo->data_dir)) {
1057	ret = PTR_ERR(ptr: zlo->data_dir);
1058	pr_warn("Failed to open directory %s/%u (err=%d)\n",
1059	zlo->base_dir, zlo->id, ret);
1060	goto out_free_base_dir;
1061	}
1062
1063	/*
1064	* If we already have zone files, we are restoring a device created by a
1065	* previous add operation. In this case, zloop_init_zone() will check
1066	* that the zone files are consistent with the zone configuration given.
1067	*/
1068	restore = zloop_dev_exists(zlo);
1069	for (i = `0`; i < nr_zones; i++) {
1070	ret = zloop_init_zone(zlo, opts, zone_no: i, restore);
1071	if (ret)
1072	goto out_close_files;
1073	}
1074
1075	lim.physical_block_size = zlo->block_size;
1076	lim.logical_block_size = zlo->block_size;
1077	if (zlo->zone_append)
1078	lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
1079
1080	zlo->tag_set.ops = &zloop_mq_ops;
1081	zlo->tag_set.nr_hw_queues = opts->nr_queues;
1082	zlo->tag_set.queue_depth = opts->queue_depth;
1083	zlo->tag_set.numa_node = NUMA_NO_NODE;
1084	zlo->tag_set.cmd_size = sizeof(struct zloop_cmd);
1085	zlo->tag_set.driver_data = zlo;
1086
1087	ret = blk_mq_alloc_tag_set(set: &zlo->tag_set);
1088	if (ret) {
1089	pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret);
1090	goto out_close_files;
1091	}
1092
1093	zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo);
1094	if (IS_ERR(ptr: zlo->disk)) {
1095	pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret);
1096	ret = PTR_ERR(ptr: zlo->disk);
1097	goto out_cleanup_tags;
1098	}
1099	zlo->disk->flags = GENHD_FL_NO_PART;
1100	zlo->disk->fops = &zloop_fops;
1101	zlo->disk->private_data = zlo;
1102	sprintf(buf: zlo->disk->disk_name, fmt: "zloop%d", zlo->id);
1103	set_capacity(disk: zlo->disk, size: (u64)lim.chunk_sectors * zlo->nr_zones);
1104
1105	ret = blk_revalidate_disk_zones(disk: zlo->disk);
1106	if (ret)
1107	goto out_cleanup_disk;
1108
1109	ret = add_disk(disk: zlo->disk);
1110	if (ret) {
1111	pr_err("add_disk failed (err=%d)\n", ret);
1112	goto out_cleanup_disk;
1113	}
1114
1115	mutex_lock(&zloop_ctl_mutex);
1116	WRITE_ONCE(zlo->state, Zlo_live);
1117	mutex_unlock(lock: &zloop_ctl_mutex);
1118
1119	pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
1120	zlo->id, zlo->nr_zones,
1121	((sector_t)zlo->zone_size << SECTOR_SHIFT) >> `20`,
1122	zlo->block_size);
1123	pr_info("zloop%d: using %s%s zone append\n",
1124	zlo->id,
1125	zlo->ordered_zone_append ? "ordered " : "",
1126	zlo->zone_append ? "native" : "emulated");
1127
1128	return `0`;
1129
1130	out_cleanup_disk:
1131	put_disk(disk: zlo->disk);
1132	out_cleanup_tags:
1133	blk_mq_free_tag_set(set: &zlo->tag_set);
1134	out_close_files:
1135	for (j = `0`; j < i; j++) {
1136	struct zloop_zone *zone = &zlo->zones[j];
1137
1138	if (!IS_ERR_OR_NULL(ptr: zone->file))
1139	fput(zone->file);
1140	}
1141	fput(zlo->data_dir);
1142	out_free_base_dir:
1143	kfree(objp: zlo->base_dir);
1144	out_destroy_workqueue:
1145	destroy_workqueue(wq: zlo->workqueue);
1146	out_free_idr:
1147	mutex_lock(&zloop_ctl_mutex);
1148	idr_remove(&zloop_index_idr, id: zlo->id);
1149	mutex_unlock(lock: &zloop_ctl_mutex);
1150	out_free_dev:
1151	kvfree(addr: zlo);
1152	out:
1153	module_put(THIS_MODULE);
1154	if (ret == -ENOENT)
1155	ret = -EINVAL;
1156	return ret;
1157	}
1158
1159	static int zloop_ctl_remove(struct zloop_options *opts)
1160	{
1161	struct zloop_device *zlo;
1162	int ret;
1163
1164	if (!(opts->mask & ZLOOP_OPT_ID)) {
1165	pr_err("No ID specified\n");
1166	return -EINVAL;
1167	}
1168
1169	ret = mutex_lock_killable(&zloop_ctl_mutex);
1170	if (ret)
1171	return ret;
1172
1173	zlo = idr_find(&zloop_index_idr, id: opts->id);
1174	if (!zlo \|\| zlo->state == Zlo_creating) {
1175	ret = -ENODEV;
1176	} else if (zlo->state == Zlo_deleting) {
1177	ret = -EINVAL;
1178	} else {
1179	idr_remove(&zloop_index_idr, id: zlo->id);
1180	WRITE_ONCE(zlo->state, Zlo_deleting);
1181	}
1182
1183	mutex_unlock(lock: &zloop_ctl_mutex);
1184	if (ret)
1185	return ret;
1186
1187	del_gendisk(gp: zlo->disk);
1188	put_disk(disk: zlo->disk);
1189
1190	pr_info("Removed device %d\n", opts->id);
1191
1192	module_put(THIS_MODULE);
1193
1194	return `0`;
1195	}
1196
1197	static int zloop_parse_options(struct zloop_options opts, const* char *buf)
1198	{
1199	substring_t args[MAX_OPT_ARGS];
1200	char options, o, *p;
1201	unsigned int token;
1202	int ret = `0`;
1203
1204	/ Set defaults. /
1205	opts->mask = `0`;
1206	opts->id = ZLOOP_DEF_ID;
1207	opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES;
1208	opts->zone_size = ZLOOP_DEF_ZONE_SIZE;
1209	opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES;
1210	opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
1211	opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
1212	opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
1213	opts->zone_append = ZLOOP_DEF_ZONE_APPEND;
1214	opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND;
1215
1216	if (!buf)
1217	return `0`;
1218
1219	/ Skip leading spaces before the options. /
1220	while (isspace(*buf))
1221	buf++;
1222
1223	options = o = kstrdup(s: buf, GFP_KERNEL);
1224	if (!options)
1225	return -ENOMEM;
1226
1227	/ Parse the options, doing only some light invalid value checks. /
1228	while ((p = strsep(&o, ",\n")) != NULL) {
1229	if (!*p)
1230	continue;
1231
1232	token = match_token(p, table: zloop_opt_tokens, args);
1233	opts->mask \|= token;
1234	switch (token) {
1235	case ZLOOP_OPT_ID:
1236	if (match_int(args, result: &opts->id)) {
1237	ret = -EINVAL;
1238	goto out;
1239	}
1240	break;
1241	case ZLOOP_OPT_CAPACITY:
1242	if (match_uint(s: args, result: &token)) {
1243	ret = -EINVAL;
1244	goto out;
1245	}
1246	if (!token) {
1247	pr_err("Invalid capacity\n");
1248	ret = -EINVAL;
1249	goto out;
1250	}
1251	opts->capacity =
1252	((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1253	break;
1254	case ZLOOP_OPT_ZONE_SIZE:
1255	if (match_uint(s: args, result: &token)) {
1256	ret = -EINVAL;
1257	goto out;
1258	}
1259	if (!token \|\| token > ZLOOP_MAX_ZONE_SIZE_MB \|\|
1260	!is_power_of_2(n: token)) {
1261	pr_err("Invalid zone size %u\n", token);
1262	ret = -EINVAL;
1263	goto out;
1264	}
1265	opts->zone_size =
1266	((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1267	break;
1268	case ZLOOP_OPT_ZONE_CAPACITY:
1269	if (match_uint(s: args, result: &token)) {
1270	ret = -EINVAL;
1271	goto out;
1272	}
1273	if (!token) {
1274	pr_err("Invalid zone capacity\n");
1275	ret = -EINVAL;
1276	goto out;
1277	}
1278	opts->zone_capacity =
1279	((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1280	break;
1281	case ZLOOP_OPT_NR_CONV_ZONES:
1282	if (match_uint(s: args, result: &token)) {
1283	ret = -EINVAL;
1284	goto out;
1285	}
1286	opts->nr_conv_zones = token;
1287	break;
1288	case ZLOOP_OPT_BASE_DIR:
1289	p = match_strdup(args);
1290	if (!p) {
1291	ret = -ENOMEM;
1292	goto out;
1293	}
1294	kfree(objp: opts->base_dir);
1295	opts->base_dir = p;
1296	break;
1297	case ZLOOP_OPT_NR_QUEUES:
1298	if (match_uint(s: args, result: &token)) {
1299	ret = -EINVAL;
1300	goto out;
1301	}
1302	if (!token) {
1303	pr_err("Invalid number of queues\n");
1304	ret = -EINVAL;
1305	goto out;
1306	}
1307	opts->nr_queues = min(token, num_online_cpus());
1308	break;
1309	case ZLOOP_OPT_QUEUE_DEPTH:
1310	if (match_uint(s: args, result: &token)) {
1311	ret = -EINVAL;
1312	goto out;
1313	}
1314	if (!token) {
1315	pr_err("Invalid queue depth\n");
1316	ret = -EINVAL;
1317	goto out;
1318	}
1319	opts->queue_depth = token;
1320	break;
1321	case ZLOOP_OPT_BUFFERED_IO:
1322	opts->buffered_io = true;
1323	break;
1324	case ZLOOP_OPT_ZONE_APPEND:
1325	if (match_uint(s: args, result: &token)) {
1326	ret = -EINVAL;
1327	goto out;
1328	}
1329	if (token != `0` && token != `1`) {
1330	pr_err("Invalid zone_append value\n");
1331	ret = -EINVAL;
1332	goto out;
1333	}
1334	opts->zone_append = token;
1335	break;
1336	case ZLOOP_OPT_ORDERED_ZONE_APPEND:
1337	opts->ordered_zone_append = true;
1338	break;
1339	case ZLOOP_OPT_ERR:
1340	default:
1341	pr_warn("unknown parameter or missing value '%s'\n", p);
1342	ret = -EINVAL;
1343	goto out;
1344	}
1345	}
1346
1347	ret = -EINVAL;
1348	if (opts->capacity <= opts->zone_size) {
1349	pr_err("Invalid capacity\n");
1350	goto out;
1351	}
1352
1353	if (opts->zone_capacity > opts->zone_size) {
1354	pr_err("Invalid zone capacity\n");
1355	goto out;
1356	}
1357
1358	ret = `0`;
1359	out:
1360	kfree(objp: options);
1361	return ret;
1362	}
1363
1364	enum {
1365	ZLOOP_CTL_ADD,
1366	ZLOOP_CTL_REMOVE,
1367	};
1368
1369	static struct zloop_ctl_op {
1370	int code;
1371	const char *name;
1372	} zloop_ctl_ops[] = {
1373	{ ZLOOP_CTL_ADD, "add" },
1374	{ ZLOOP_CTL_REMOVE, "remove" },
1375	{ -`1`, NULL },
1376	};
1377
1378	static ssize_t zloop_ctl_write(struct file file, const* char __user *ubuf,
1379	size_t count, loff_t *pos)
1380	{
1381	struct zloop_options opts = { };
1382	struct zloop_ctl_op *op;
1383	const char buf, opts_buf;
1384	int i, ret;
1385
1386	if (count > PAGE_SIZE)
1387	return -ENOMEM;
1388
1389	buf = memdup_user_nul(ubuf, count);
1390	if (IS_ERR(ptr: buf))
1391	return PTR_ERR(ptr: buf);
1392
1393	for (i = `0`; i < ARRAY_SIZE(zloop_ctl_ops); i++) {
1394	op = &zloop_ctl_ops[i];
1395	if (!op->name) {
1396	pr_err("Invalid operation\n");
1397	ret = -EINVAL;
1398	goto out;
1399	}
1400	if (!strncmp(buf, op->name, strlen(op->name)))
1401	break;
1402	}
1403
1404	if (count <= strlen(op->name))
1405	opts_buf = NULL;
1406	else
1407	opts_buf = buf + strlen(op->name);
1408
1409	ret = zloop_parse_options(opts: &opts, buf: opts_buf);
1410	if (ret) {
1411	pr_err("Failed to parse options\n");
1412	goto out;
1413	}
1414
1415	switch (op->code) {
1416	case ZLOOP_CTL_ADD:
1417	ret = zloop_ctl_add(opts: &opts);
1418	break;
1419	case ZLOOP_CTL_REMOVE:
1420	ret = zloop_ctl_remove(opts: &opts);
1421	break;
1422	default:
1423	pr_err("Invalid operation\n");
1424	ret = -EINVAL;
1425	goto out;
1426	}
1427
1428	out:
1429	kfree(objp: opts.base_dir);
1430	kfree(objp: buf);
1431	return ret ? ret : count;
1432	}
1433
1434	static int zloop_ctl_show(struct seq_file seq_file, void* *private)
1435	{
1436	const struct match_token *tok;
1437	int i;
1438
1439	/ Add operation /
1440	seq_printf(m: seq_file, fmt: "%s ", zloop_ctl_ops[`0`].name);
1441	for (i = `0`; i < ARRAY_SIZE(zloop_opt_tokens); i++) {
1442	tok = &zloop_opt_tokens[i];
1443	if (!tok->pattern)
1444	break;
1445	if (i)
1446	seq_putc(m: seq_file, c: `','`);
1447	seq_puts(m: seq_file, s: tok->pattern);
1448	}
1449	seq_putc(m: seq_file, c: `'\n'`);
1450
1451	/ Remove operation /
1452	seq_puts(m: seq_file, s: zloop_ctl_ops[`1`].name);
1453	seq_puts(m: seq_file, s: " id=%d\n");
1454
1455	return `0`;
1456	}
1457
1458	static int zloop_ctl_open(struct inode inode, struct* file *file)
1459	{
1460	file->private_data = NULL;
1461	return single_open(file, zloop_ctl_show, NULL);
1462	}
1463
1464	static int zloop_ctl_release(struct inode inode, struct* file *file)
1465	{
1466	return single_release(inode, file);
1467	}
1468
1469	static const struct file_operations zloop_ctl_fops = {
1470	.owner = THIS_MODULE,
1471	.open = zloop_ctl_open,
1472	.release = zloop_ctl_release,
1473	.write = zloop_ctl_write,
1474	.read = seq_read,
1475	};
1476
1477	static struct miscdevice zloop_misc = {
1478	.minor = MISC_DYNAMIC_MINOR,
1479	.name = "zloop-control",
1480	.fops = &zloop_ctl_fops,
1481	};
1482
1483	static int __init zloop_init(void)
1484	{
1485	int ret;
1486
1487	ret = misc_register(misc: &zloop_misc);
1488	if (ret) {
1489	pr_err("Failed to register misc device: %d\n", ret);
1490	return ret;
1491	}
1492	pr_info("Module loaded\n");
1493
1494	return `0`;
1495	}
1496
1497	static void __exit zloop_exit(void)
1498	{
1499	misc_deregister(misc: &zloop_misc);
1500	idr_destroy(&zloop_index_idr);
1501	}
1502
1503	module_init(zloop_init);
1504	module_exit(zloop_exit);
1505
1506	MODULE_DESCRIPTION("Zoned loopback device");
1507	MODULE_LICENSE("GPL");
1508

source code of linux/drivers/block/zloop.c