| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | |
| 3 | #include <linux/blkdev.h> |
| 4 | #include <linux/module.h> |
| 5 | #include <linux/errno.h> |
| 6 | #include <linux/slab.h> |
| 7 | #include <linux/init.h> |
| 8 | #include <linux/timer.h> |
| 9 | #include <linux/sched.h> |
| 10 | #include <linux/list.h> |
| 11 | #include <linux/file.h> |
| 12 | #include <linux/seq_file.h> |
| 13 | #include <trace/events/block.h> |
| 14 | |
| 15 | #include "md.h" |
| 16 | #include "md-bitmap.h" |
| 17 | |
| 18 | /* |
| 19 | * #### Background |
| 20 | * |
| 21 | * Redundant data is used to enhance data fault tolerance, and the storage |
| 22 | * methods for redundant data vary depending on the RAID levels. And it's |
| 23 | * important to maintain the consistency of redundant data. |
| 24 | * |
| 25 | * Bitmap is used to record which data blocks have been synchronized and which |
| 26 | * ones need to be resynchronized or recovered. Each bit in the bitmap |
| 27 | * represents a segment of data in the array. When a bit is set, it indicates |
| 28 | * that the multiple redundant copies of that data segment may not be |
| 29 | * consistent. Data synchronization can be performed based on the bitmap after |
| 30 | * power failure or readding a disk. If there is no bitmap, a full disk |
| 31 | * synchronization is required. |
| 32 | * |
| 33 | * #### Key Features |
| 34 | * |
| 35 | * - IO fastpath is lockless, if user issues lots of write IO to the same |
| 36 | * bitmap bit in a short time, only the first write has additional overhead |
| 37 | * to update bitmap bit, no additional overhead for the following writes; |
| 38 | * - support only resync or recover written data, means in the case creating |
| 39 | * new array or replacing with a new disk, there is no need to do a full disk |
| 40 | * resync/recovery; |
| 41 | * |
| 42 | * #### Key Concept |
| 43 | * |
| 44 | * ##### State Machine |
| 45 | * |
| 46 | * Each bit is one byte, contain 6 different states, see llbitmap_state. And |
| 47 | * there are total 8 different actions, see llbitmap_action, can change state: |
| 48 | * |
| 49 | * llbitmap state machine: transitions between states |
| 50 | * |
| 51 | * | | Startwrite | Startsync | Endsync | Abortsync| |
| 52 | * | --------- | ---------- | --------- | ------- | ------- | |
| 53 | * | Unwritten | Dirty | x | x | x | |
| 54 | * | Clean | Dirty | x | x | x | |
| 55 | * | Dirty | x | x | x | x | |
| 56 | * | NeedSync | x | Syncing | x | x | |
| 57 | * | Syncing | x | Syncing | Dirty | NeedSync | |
| 58 | * |
| 59 | * | | Reload | Daemon | Discard | Stale | |
| 60 | * | --------- | -------- | ------ | --------- | --------- | |
| 61 | * | Unwritten | x | x | x | x | |
| 62 | * | Clean | x | x | Unwritten | NeedSync | |
| 63 | * | Dirty | NeedSync | Clean | Unwritten | NeedSync | |
| 64 | * | NeedSync | x | x | Unwritten | x | |
| 65 | * | Syncing | NeedSync | x | Unwritten | NeedSync | |
| 66 | * |
| 67 | * Typical scenarios: |
| 68 | * |
| 69 | * 1) Create new array |
| 70 | * All bits will be set to Unwritten by default, if --assume-clean is set, |
| 71 | * all bits will be set to Clean instead. |
| 72 | * |
| 73 | * 2) write data, raid1/raid10 have full copy of data, while raid456 doesn't and |
| 74 | * rely on xor data |
| 75 | * |
| 76 | * 2.1) write new data to raid1/raid10: |
| 77 | * Unwritten --StartWrite--> Dirty |
| 78 | * |
| 79 | * 2.2) write new data to raid456: |
| 80 | * Unwritten --StartWrite--> NeedSync |
| 81 | * |
| 82 | * Because the initial recover for raid456 is skipped, the xor data is not built |
| 83 | * yet, the bit must be set to NeedSync first and after lazy initial recover is |
| 84 | * finished, the bit will finally set to Dirty(see 5.1 and 5.4); |
| 85 | * |
| 86 | * 2.3) cover write |
| 87 | * Clean --StartWrite--> Dirty |
| 88 | * |
| 89 | * 3) daemon, if the array is not degraded: |
| 90 | * Dirty --Daemon--> Clean |
| 91 | * |
| 92 | * 4) discard |
| 93 | * {Clean, Dirty, NeedSync, Syncing} --Discard--> Unwritten |
| 94 | * |
| 95 | * 5) resync and recover |
| 96 | * |
| 97 | * 5.1) common process |
| 98 | * NeedSync --Startsync--> Syncing --Endsync--> Dirty --Daemon--> Clean |
| 99 | * |
| 100 | * 5.2) resync after power failure |
| 101 | * Dirty --Reload--> NeedSync |
| 102 | * |
| 103 | * 5.3) recover while replacing with a new disk |
| 104 | * By default, the old bitmap framework will recover all data, and llbitmap |
| 105 | * implements this by a new helper, see llbitmap_skip_sync_blocks: |
| 106 | * |
| 107 | * skip recover for bits other than dirty or clean; |
| 108 | * |
| 109 | * 5.4) lazy initial recover for raid5: |
| 110 | * By default, the old bitmap framework will only allow new recover when there |
| 111 | * are spares(new disk), a new recovery flag MD_RECOVERY_LAZY_RECOVER is added |
| 112 | * to perform raid456 lazy recover for set bits(from 2.2). |
| 113 | * |
| 114 | * 6. special handling for degraded array: |
| 115 | * |
| 116 | * - Dirty bits will never be cleared, daemon will just do nothing, so that if |
| 117 | * a disk is readded, Clean bits can be skipped with recovery; |
| 118 | * - Dirty bits will convert to Syncing from start write, to do data recovery |
| 119 | * for new added disks; |
| 120 | * - New write will convert bits to NeedSync directly; |
| 121 | * |
| 122 | * ##### Bitmap IO |
| 123 | * |
| 124 | * ##### Chunksize |
| 125 | * |
| 126 | * The default bitmap size is 128k, incluing 1k bitmap super block, and |
| 127 | * the default size of segment of data in the array each bit(chunksize) is 64k, |
| 128 | * and chunksize will adjust to twice the old size each time if the total number |
| 129 | * bits is not less than 127k.(see llbitmap_init) |
| 130 | * |
| 131 | * ##### READ |
| 132 | * |
| 133 | * While creating bitmap, all pages will be allocated and read for llbitmap, |
| 134 | * there won't be read afterwards |
| 135 | * |
| 136 | * ##### WRITE |
| 137 | * |
| 138 | * WRITE IO is divided into logical_block_size of the array, the dirty state |
| 139 | * of each block is tracked independently, for example: |
| 140 | * |
| 141 | * each page is 4k, contain 8 blocks; each block is 512 bytes contain 512 bit; |
| 142 | * |
| 143 | * | page0 | page1 | ... | page 31 | |
| 144 | * | | |
| 145 | * | \-----------------------\ |
| 146 | * | | |
| 147 | * | block0 | block1 | ... | block 8| |
| 148 | * | | |
| 149 | * | \-----------------\ |
| 150 | * | | |
| 151 | * | bit0 | bit1 | ... | bit511 | |
| 152 | * |
| 153 | * From IO path, if one bit is changed to Dirty or NeedSync, the corresponding |
| 154 | * subpage will be marked dirty, such block must write first before the IO is |
| 155 | * issued. This behaviour will affect IO performance, to reduce the impact, if |
| 156 | * multiple bits are changed in the same block in a short time, all bits in this |
| 157 | * block will be changed to Dirty/NeedSync, so that there won't be any overhead |
| 158 | * until daemon clears dirty bits. |
| 159 | * |
| 160 | * ##### Dirty Bits synchronization |
| 161 | * |
| 162 | * IO fast path will set bits to dirty, and those dirty bits will be cleared |
| 163 | * by daemon after IO is done. llbitmap_page_ctl is used to synchronize between |
| 164 | * IO path and daemon; |
| 165 | * |
| 166 | * IO path: |
| 167 | * 1) try to grab a reference, if succeed, set expire time after 5s and return; |
| 168 | * 2) if failed to grab a reference, wait for daemon to finish clearing dirty |
| 169 | * bits; |
| 170 | * |
| 171 | * Daemon (Daemon will be woken up every daemon_sleep seconds): |
| 172 | * For each page: |
| 173 | * 1) check if page expired, if not skip this page; for expired page: |
| 174 | * 2) suspend the page and wait for inflight write IO to be done; |
| 175 | * 3) change dirty page to clean; |
| 176 | * 4) resume the page; |
| 177 | */ |
| 178 | |
| 179 | #define BITMAP_DATA_OFFSET 1024 |
| 180 | |
| 181 | /* 64k is the max IO size of sync IO for raid1/raid10 */ |
| 182 | #define MIN_CHUNK_SIZE (64 * 2) |
| 183 | |
| 184 | /* By default, daemon will be woken up every 30s */ |
| 185 | #define DEFAULT_DAEMON_SLEEP 30 |
| 186 | |
| 187 | /* |
| 188 | * Dirtied bits that have not been accessed for more than 5s will be cleared |
| 189 | * by daemon. |
| 190 | */ |
| 191 | #define DEFAULT_BARRIER_IDLE 5 |
| 192 | |
| 193 | enum llbitmap_state { |
| 194 | /* No valid data, init state after assemble the array */ |
| 195 | BitUnwritten = 0, |
| 196 | /* data is consistent */ |
| 197 | BitClean, |
| 198 | /* data will be consistent after IO is done, set directly for writes */ |
| 199 | BitDirty, |
| 200 | /* |
| 201 | * data need to be resynchronized: |
| 202 | * 1) set directly for writes if array is degraded, prevent full disk |
| 203 | * synchronization after readding a disk; |
| 204 | * 2) reassemble the array after power failure, and dirty bits are |
| 205 | * found after reloading the bitmap; |
| 206 | * 3) set for first write for raid5, to build initial xor data lazily |
| 207 | */ |
| 208 | BitNeedSync, |
| 209 | /* data is synchronizing */ |
| 210 | BitSyncing, |
| 211 | BitStateCount, |
| 212 | BitNone = 0xff, |
| 213 | }; |
| 214 | |
| 215 | enum llbitmap_action { |
| 216 | /* User write new data, this is the only action from IO fast path */ |
| 217 | BitmapActionStartwrite = 0, |
| 218 | /* Start recovery */ |
| 219 | BitmapActionStartsync, |
| 220 | /* Finish recovery */ |
| 221 | BitmapActionEndsync, |
| 222 | /* Failed recovery */ |
| 223 | BitmapActionAbortsync, |
| 224 | /* Reassemble the array */ |
| 225 | BitmapActionReload, |
| 226 | /* Daemon thread is trying to clear dirty bits */ |
| 227 | BitmapActionDaemon, |
| 228 | /* Data is deleted */ |
| 229 | BitmapActionDiscard, |
| 230 | /* |
| 231 | * Bitmap is stale, mark all bits in addition to BitUnwritten to |
| 232 | * BitNeedSync. |
| 233 | */ |
| 234 | BitmapActionStale, |
| 235 | BitmapActionCount, |
| 236 | /* Init state is BitUnwritten */ |
| 237 | BitmapActionInit, |
| 238 | }; |
| 239 | |
| 240 | enum llbitmap_page_state { |
| 241 | LLPageFlush = 0, |
| 242 | LLPageDirty, |
| 243 | }; |
| 244 | |
| 245 | struct llbitmap_page_ctl { |
| 246 | char *state; |
| 247 | struct page *page; |
| 248 | unsigned long expire; |
| 249 | unsigned long flags; |
| 250 | wait_queue_head_t wait; |
| 251 | struct percpu_ref active; |
| 252 | /* Per block size dirty state, maximum 64k page / 1 sector = 128 */ |
| 253 | unsigned long dirty[]; |
| 254 | }; |
| 255 | |
| 256 | struct llbitmap { |
| 257 | struct mddev *mddev; |
| 258 | struct llbitmap_page_ctl **pctl; |
| 259 | |
| 260 | unsigned int nr_pages; |
| 261 | unsigned int io_size; |
| 262 | unsigned int blocks_per_page; |
| 263 | |
| 264 | /* shift of one chunk */ |
| 265 | unsigned long chunkshift; |
| 266 | /* size of one chunk in sector */ |
| 267 | unsigned long chunksize; |
| 268 | /* total number of chunks */ |
| 269 | unsigned long chunks; |
| 270 | unsigned long last_end_sync; |
| 271 | /* |
| 272 | * time in seconds that dirty bits will be cleared if the page is not |
| 273 | * accessed. |
| 274 | */ |
| 275 | unsigned long barrier_idle; |
| 276 | /* fires on first BitDirty state */ |
| 277 | struct timer_list pending_timer; |
| 278 | struct work_struct daemon_work; |
| 279 | |
| 280 | unsigned long flags; |
| 281 | __u64 events_cleared; |
| 282 | |
| 283 | /* for slow disks */ |
| 284 | atomic_t behind_writes; |
| 285 | wait_queue_head_t behind_wait; |
| 286 | }; |
| 287 | |
| 288 | struct llbitmap_unplug_work { |
| 289 | struct work_struct work; |
| 290 | struct llbitmap *llbitmap; |
| 291 | struct completion *done; |
| 292 | }; |
| 293 | |
| 294 | static struct workqueue_struct *md_llbitmap_io_wq; |
| 295 | static struct workqueue_struct *md_llbitmap_unplug_wq; |
| 296 | |
| 297 | static char state_machine[BitStateCount][BitmapActionCount] = { |
| 298 | [BitUnwritten] = { |
| 299 | [BitmapActionStartwrite] = BitDirty, |
| 300 | [BitmapActionStartsync] = BitNone, |
| 301 | [BitmapActionEndsync] = BitNone, |
| 302 | [BitmapActionAbortsync] = BitNone, |
| 303 | [BitmapActionReload] = BitNone, |
| 304 | [BitmapActionDaemon] = BitNone, |
| 305 | [BitmapActionDiscard] = BitNone, |
| 306 | [BitmapActionStale] = BitNone, |
| 307 | }, |
| 308 | [BitClean] = { |
| 309 | [BitmapActionStartwrite] = BitDirty, |
| 310 | [BitmapActionStartsync] = BitNone, |
| 311 | [BitmapActionEndsync] = BitNone, |
| 312 | [BitmapActionAbortsync] = BitNone, |
| 313 | [BitmapActionReload] = BitNone, |
| 314 | [BitmapActionDaemon] = BitNone, |
| 315 | [BitmapActionDiscard] = BitUnwritten, |
| 316 | [BitmapActionStale] = BitNeedSync, |
| 317 | }, |
| 318 | [BitDirty] = { |
| 319 | [BitmapActionStartwrite] = BitNone, |
| 320 | [BitmapActionStartsync] = BitNone, |
| 321 | [BitmapActionEndsync] = BitNone, |
| 322 | [BitmapActionAbortsync] = BitNone, |
| 323 | [BitmapActionReload] = BitNeedSync, |
| 324 | [BitmapActionDaemon] = BitClean, |
| 325 | [BitmapActionDiscard] = BitUnwritten, |
| 326 | [BitmapActionStale] = BitNeedSync, |
| 327 | }, |
| 328 | [BitNeedSync] = { |
| 329 | [BitmapActionStartwrite] = BitNone, |
| 330 | [BitmapActionStartsync] = BitSyncing, |
| 331 | [BitmapActionEndsync] = BitNone, |
| 332 | [BitmapActionAbortsync] = BitNone, |
| 333 | [BitmapActionReload] = BitNone, |
| 334 | [BitmapActionDaemon] = BitNone, |
| 335 | [BitmapActionDiscard] = BitUnwritten, |
| 336 | [BitmapActionStale] = BitNone, |
| 337 | }, |
| 338 | [BitSyncing] = { |
| 339 | [BitmapActionStartwrite] = BitNone, |
| 340 | [BitmapActionStartsync] = BitSyncing, |
| 341 | [BitmapActionEndsync] = BitDirty, |
| 342 | [BitmapActionAbortsync] = BitNeedSync, |
| 343 | [BitmapActionReload] = BitNeedSync, |
| 344 | [BitmapActionDaemon] = BitNone, |
| 345 | [BitmapActionDiscard] = BitUnwritten, |
| 346 | [BitmapActionStale] = BitNeedSync, |
| 347 | }, |
| 348 | }; |
| 349 | |
| 350 | static void __llbitmap_flush(struct mddev *mddev); |
| 351 | |
| 352 | static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos) |
| 353 | { |
| 354 | unsigned int idx; |
| 355 | unsigned int offset; |
| 356 | |
| 357 | pos += BITMAP_DATA_OFFSET; |
| 358 | idx = pos >> PAGE_SHIFT; |
| 359 | offset = offset_in_page(pos); |
| 360 | |
| 361 | return llbitmap->pctl[idx]->state[offset]; |
| 362 | } |
| 363 | |
| 364 | /* set all the bits in the subpage as dirty */ |
| 365 | static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap, |
| 366 | struct llbitmap_page_ctl *pctl, |
| 367 | unsigned int block) |
| 368 | { |
| 369 | bool level_456 = raid_is_456(mddev: llbitmap->mddev); |
| 370 | unsigned int io_size = llbitmap->io_size; |
| 371 | int pos; |
| 372 | |
| 373 | for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { |
| 374 | switch (pctl->state[pos]) { |
| 375 | case BitUnwritten: |
| 376 | pctl->state[pos] = level_456 ? BitNeedSync : BitDirty; |
| 377 | break; |
| 378 | case BitClean: |
| 379 | pctl->state[pos] = BitDirty; |
| 380 | break; |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx, |
| 386 | int offset) |
| 387 | { |
| 388 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; |
| 389 | unsigned int io_size = llbitmap->io_size; |
| 390 | int block = offset / io_size; |
| 391 | int pos; |
| 392 | |
| 393 | if (!test_bit(LLPageDirty, &pctl->flags)) |
| 394 | set_bit(nr: LLPageDirty, addr: &pctl->flags); |
| 395 | |
| 396 | /* |
| 397 | * For degraded array, dirty bits will never be cleared, and we must |
| 398 | * resync all the dirty bits, hence skip infect new dirty bits to |
| 399 | * prevent resync unnecessary data. |
| 400 | */ |
| 401 | if (llbitmap->mddev->degraded) { |
| 402 | set_bit(nr: block, addr: pctl->dirty); |
| 403 | return; |
| 404 | } |
| 405 | |
| 406 | /* |
| 407 | * The subpage usually contains a total of 512 bits. If any single bit |
| 408 | * within the subpage is marked as dirty, the entire sector will be |
| 409 | * written. To avoid impacting write performance, when multiple bits |
| 410 | * within the same sector are modified within llbitmap->barrier_idle, |
| 411 | * all bits in the sector will be collectively marked as dirty at once. |
| 412 | */ |
| 413 | if (test_and_set_bit(nr: block, addr: pctl->dirty)) { |
| 414 | llbitmap_infect_dirty_bits(llbitmap, pctl, block); |
| 415 | return; |
| 416 | } |
| 417 | |
| 418 | for (pos = block * io_size; pos < (block + 1) * io_size; pos++) { |
| 419 | if (pos == offset) |
| 420 | continue; |
| 421 | if (pctl->state[pos] == BitDirty || |
| 422 | pctl->state[pos] == BitNeedSync) { |
| 423 | llbitmap_infect_dirty_bits(llbitmap, pctl, block); |
| 424 | return; |
| 425 | } |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state, |
| 430 | loff_t pos) |
| 431 | { |
| 432 | unsigned int idx; |
| 433 | unsigned int bit; |
| 434 | |
| 435 | pos += BITMAP_DATA_OFFSET; |
| 436 | idx = pos >> PAGE_SHIFT; |
| 437 | bit = offset_in_page(pos); |
| 438 | |
| 439 | llbitmap->pctl[idx]->state[bit] = state; |
| 440 | if (state == BitDirty || state == BitNeedSync) |
| 441 | llbitmap_set_page_dirty(llbitmap, idx, offset: bit); |
| 442 | } |
| 443 | |
| 444 | static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx) |
| 445 | { |
| 446 | struct mddev *mddev = llbitmap->mddev; |
| 447 | struct page *page = NULL; |
| 448 | struct md_rdev *rdev; |
| 449 | |
| 450 | if (llbitmap->pctl && llbitmap->pctl[idx]) |
| 451 | page = llbitmap->pctl[idx]->page; |
| 452 | if (page) |
| 453 | return page; |
| 454 | |
| 455 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
| 456 | if (!page) |
| 457 | return ERR_PTR(error: -ENOMEM); |
| 458 | |
| 459 | rdev_for_each(rdev, mddev) { |
| 460 | sector_t sector; |
| 461 | |
| 462 | if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) |
| 463 | continue; |
| 464 | |
| 465 | sector = mddev->bitmap_info.offset + |
| 466 | (idx << PAGE_SECTORS_SHIFT); |
| 467 | |
| 468 | if (sync_page_io(rdev, sector, PAGE_SIZE, page, opf: REQ_OP_READ, |
| 469 | metadata_op: true)) |
| 470 | return page; |
| 471 | |
| 472 | md_error(mddev, rdev); |
| 473 | } |
| 474 | |
| 475 | __free_page(page); |
| 476 | return ERR_PTR(error: -EIO); |
| 477 | } |
| 478 | |
| 479 | static void llbitmap_write_page(struct llbitmap *llbitmap, int idx) |
| 480 | { |
| 481 | struct page *page = llbitmap->pctl[idx]->page; |
| 482 | struct mddev *mddev = llbitmap->mddev; |
| 483 | struct md_rdev *rdev; |
| 484 | int block; |
| 485 | |
| 486 | for (block = 0; block < llbitmap->blocks_per_page; block++) { |
| 487 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; |
| 488 | |
| 489 | if (!test_and_clear_bit(nr: block, addr: pctl->dirty)) |
| 490 | continue; |
| 491 | |
| 492 | rdev_for_each(rdev, mddev) { |
| 493 | sector_t sector; |
| 494 | sector_t bit_sector = llbitmap->io_size >> SECTOR_SHIFT; |
| 495 | |
| 496 | if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) |
| 497 | continue; |
| 498 | |
| 499 | sector = mddev->bitmap_info.offset + rdev->sb_start + |
| 500 | (idx << PAGE_SECTORS_SHIFT) + |
| 501 | block * bit_sector; |
| 502 | md_write_metadata(mddev, rdev, sector, |
| 503 | size: llbitmap->io_size, page, |
| 504 | offset: block * llbitmap->io_size); |
| 505 | } |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | static void active_release(struct percpu_ref *ref) |
| 510 | { |
| 511 | struct llbitmap_page_ctl *pctl = |
| 512 | container_of(ref, struct llbitmap_page_ctl, active); |
| 513 | |
| 514 | wake_up(&pctl->wait); |
| 515 | } |
| 516 | |
| 517 | static void llbitmap_free_pages(struct llbitmap *llbitmap) |
| 518 | { |
| 519 | int i; |
| 520 | |
| 521 | if (!llbitmap->pctl) |
| 522 | return; |
| 523 | |
| 524 | for (i = 0; i < llbitmap->nr_pages; i++) { |
| 525 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; |
| 526 | |
| 527 | if (!pctl || !pctl->page) |
| 528 | break; |
| 529 | |
| 530 | __free_page(pctl->page); |
| 531 | percpu_ref_exit(ref: &pctl->active); |
| 532 | } |
| 533 | |
| 534 | kfree(objp: llbitmap->pctl[0]); |
| 535 | kfree(objp: llbitmap->pctl); |
| 536 | llbitmap->pctl = NULL; |
| 537 | } |
| 538 | |
| 539 | static int llbitmap_cache_pages(struct llbitmap *llbitmap) |
| 540 | { |
| 541 | struct llbitmap_page_ctl *pctl; |
| 542 | unsigned int nr_pages = DIV_ROUND_UP(llbitmap->chunks + |
| 543 | BITMAP_DATA_OFFSET, PAGE_SIZE); |
| 544 | unsigned int size = struct_size(pctl, dirty, BITS_TO_LONGS( |
| 545 | llbitmap->blocks_per_page)); |
| 546 | int i; |
| 547 | |
| 548 | llbitmap->pctl = kmalloc_array(nr_pages, sizeof(void *), |
| 549 | GFP_KERNEL | __GFP_ZERO); |
| 550 | if (!llbitmap->pctl) |
| 551 | return -ENOMEM; |
| 552 | |
| 553 | size = round_up(size, cache_line_size()); |
| 554 | pctl = kmalloc_array(nr_pages, size, GFP_KERNEL | __GFP_ZERO); |
| 555 | if (!pctl) { |
| 556 | kfree(objp: llbitmap->pctl); |
| 557 | return -ENOMEM; |
| 558 | } |
| 559 | |
| 560 | llbitmap->nr_pages = nr_pages; |
| 561 | |
| 562 | for (i = 0; i < nr_pages; i++, pctl = (void *)pctl + size) { |
| 563 | struct page *page = llbitmap_read_page(llbitmap, idx: i); |
| 564 | |
| 565 | llbitmap->pctl[i] = pctl; |
| 566 | |
| 567 | if (IS_ERR(ptr: page)) { |
| 568 | llbitmap_free_pages(llbitmap); |
| 569 | return PTR_ERR(ptr: page); |
| 570 | } |
| 571 | |
| 572 | if (percpu_ref_init(ref: &pctl->active, release: active_release, |
| 573 | flags: PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { |
| 574 | __free_page(page); |
| 575 | llbitmap_free_pages(llbitmap); |
| 576 | return -ENOMEM; |
| 577 | } |
| 578 | |
| 579 | pctl->page = page; |
| 580 | pctl->state = page_address(page); |
| 581 | init_waitqueue_head(&pctl->wait); |
| 582 | } |
| 583 | |
| 584 | return 0; |
| 585 | } |
| 586 | |
| 587 | static void llbitmap_init_state(struct llbitmap *llbitmap) |
| 588 | { |
| 589 | enum llbitmap_state state = BitUnwritten; |
| 590 | unsigned long i; |
| 591 | |
| 592 | if (test_and_clear_bit(nr: BITMAP_CLEAN, addr: &llbitmap->flags)) |
| 593 | state = BitClean; |
| 594 | |
| 595 | for (i = 0; i < llbitmap->chunks; i++) |
| 596 | llbitmap_write(llbitmap, state, pos: i); |
| 597 | } |
| 598 | |
| 599 | /* The return value is only used from resync, where @start == @end. */ |
| 600 | static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap, |
| 601 | unsigned long start, |
| 602 | unsigned long end, |
| 603 | enum llbitmap_action action) |
| 604 | { |
| 605 | struct mddev *mddev = llbitmap->mddev; |
| 606 | enum llbitmap_state state = BitNone; |
| 607 | bool level_456 = raid_is_456(mddev: llbitmap->mddev); |
| 608 | bool need_resync = false; |
| 609 | bool need_recovery = false; |
| 610 | |
| 611 | if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) |
| 612 | return BitNone; |
| 613 | |
| 614 | if (action == BitmapActionInit) { |
| 615 | llbitmap_init_state(llbitmap); |
| 616 | return BitNone; |
| 617 | } |
| 618 | |
| 619 | while (start <= end) { |
| 620 | enum llbitmap_state c = llbitmap_read(llbitmap, pos: start); |
| 621 | |
| 622 | if (c < 0 || c >= BitStateCount) { |
| 623 | pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n" , |
| 624 | __func__, start, c, action); |
| 625 | state = BitNeedSync; |
| 626 | goto write_bitmap; |
| 627 | } |
| 628 | |
| 629 | if (c == BitNeedSync) |
| 630 | need_resync = !mddev->degraded; |
| 631 | |
| 632 | state = state_machine[c][action]; |
| 633 | |
| 634 | write_bitmap: |
| 635 | if (unlikely(mddev->degraded)) { |
| 636 | /* For degraded array, mark new data as need sync. */ |
| 637 | if (state == BitDirty && |
| 638 | action == BitmapActionStartwrite) |
| 639 | state = BitNeedSync; |
| 640 | /* |
| 641 | * For degraded array, resync dirty data as well, noted |
| 642 | * if array is still degraded after resync is done, all |
| 643 | * new data will still be dirty until array is clean. |
| 644 | */ |
| 645 | else if (c == BitDirty && |
| 646 | action == BitmapActionStartsync) |
| 647 | state = BitSyncing; |
| 648 | } else if (c == BitUnwritten && state == BitDirty && |
| 649 | action == BitmapActionStartwrite && level_456) { |
| 650 | /* Delay raid456 initial recovery to first write. */ |
| 651 | state = BitNeedSync; |
| 652 | } |
| 653 | |
| 654 | if (state == BitNone) { |
| 655 | start++; |
| 656 | continue; |
| 657 | } |
| 658 | |
| 659 | llbitmap_write(llbitmap, state, pos: start); |
| 660 | |
| 661 | if (state == BitNeedSync) |
| 662 | need_resync = !mddev->degraded; |
| 663 | else if (state == BitDirty && |
| 664 | !timer_pending(timer: &llbitmap->pending_timer)) |
| 665 | mod_timer(timer: &llbitmap->pending_timer, |
| 666 | expires: jiffies + mddev->bitmap_info.daemon_sleep * HZ); |
| 667 | |
| 668 | start++; |
| 669 | } |
| 670 | |
| 671 | if (need_resync && level_456) |
| 672 | need_recovery = true; |
| 673 | |
| 674 | if (need_recovery) { |
| 675 | set_bit(nr: MD_RECOVERY_NEEDED, addr: &mddev->recovery); |
| 676 | set_bit(nr: MD_RECOVERY_LAZY_RECOVER, addr: &mddev->recovery); |
| 677 | md_wakeup_thread(mddev->thread); |
| 678 | } else if (need_resync) { |
| 679 | set_bit(nr: MD_RECOVERY_NEEDED, addr: &mddev->recovery); |
| 680 | set_bit(nr: MD_RECOVERY_SYNC, addr: &mddev->recovery); |
| 681 | md_wakeup_thread(mddev->thread); |
| 682 | } |
| 683 | |
| 684 | return state; |
| 685 | } |
| 686 | |
| 687 | static void llbitmap_raise_barrier(struct llbitmap *llbitmap, int page_idx) |
| 688 | { |
| 689 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; |
| 690 | |
| 691 | retry: |
| 692 | if (likely(percpu_ref_tryget_live(&pctl->active))) { |
| 693 | WRITE_ONCE(pctl->expire, jiffies + llbitmap->barrier_idle * HZ); |
| 694 | return; |
| 695 | } |
| 696 | |
| 697 | wait_event(pctl->wait, !percpu_ref_is_dying(&pctl->active)); |
| 698 | goto retry; |
| 699 | } |
| 700 | |
| 701 | static void llbitmap_release_barrier(struct llbitmap *llbitmap, int page_idx) |
| 702 | { |
| 703 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; |
| 704 | |
| 705 | percpu_ref_put(ref: &pctl->active); |
| 706 | } |
| 707 | |
| 708 | static int llbitmap_suspend_timeout(struct llbitmap *llbitmap, int page_idx) |
| 709 | { |
| 710 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; |
| 711 | |
| 712 | percpu_ref_kill(ref: &pctl->active); |
| 713 | |
| 714 | if (!wait_event_timeout(pctl->wait, percpu_ref_is_zero(&pctl->active), |
| 715 | llbitmap->mddev->bitmap_info.daemon_sleep * HZ)) |
| 716 | return -ETIMEDOUT; |
| 717 | |
| 718 | return 0; |
| 719 | } |
| 720 | |
| 721 | static void llbitmap_resume(struct llbitmap *llbitmap, int page_idx) |
| 722 | { |
| 723 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx]; |
| 724 | |
| 725 | pctl->expire = LONG_MAX; |
| 726 | percpu_ref_resurrect(ref: &pctl->active); |
| 727 | wake_up(&pctl->wait); |
| 728 | } |
| 729 | |
| 730 | static int llbitmap_check_support(struct mddev *mddev) |
| 731 | { |
| 732 | if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { |
| 733 | pr_notice("md/llbitmap: %s: array with journal cannot have bitmap\n" , |
| 734 | mdname(mddev)); |
| 735 | return -EBUSY; |
| 736 | } |
| 737 | |
| 738 | if (mddev->bitmap_info.space == 0) { |
| 739 | if (mddev->bitmap_info.default_space == 0) { |
| 740 | pr_notice("md/llbitmap: %s: no space for bitmap\n" , |
| 741 | mdname(mddev)); |
| 742 | return -ENOSPC; |
| 743 | } |
| 744 | } |
| 745 | |
| 746 | if (!mddev->persistent) { |
| 747 | pr_notice("md/llbitmap: %s: array must be persistent\n" , |
| 748 | mdname(mddev)); |
| 749 | return -EOPNOTSUPP; |
| 750 | } |
| 751 | |
| 752 | if (mddev->bitmap_info.file) { |
| 753 | pr_notice("md/llbitmap: %s: doesn't support bitmap file\n" , |
| 754 | mdname(mddev)); |
| 755 | return -EOPNOTSUPP; |
| 756 | } |
| 757 | |
| 758 | if (mddev->bitmap_info.external) { |
| 759 | pr_notice("md/llbitmap: %s: doesn't support external metadata\n" , |
| 760 | mdname(mddev)); |
| 761 | return -EOPNOTSUPP; |
| 762 | } |
| 763 | |
| 764 | if (mddev_is_dm(mddev)) { |
| 765 | pr_notice("md/llbitmap: %s: doesn't support dm-raid\n" , |
| 766 | mdname(mddev)); |
| 767 | return -EOPNOTSUPP; |
| 768 | } |
| 769 | |
| 770 | return 0; |
| 771 | } |
| 772 | |
| 773 | static int llbitmap_init(struct llbitmap *llbitmap) |
| 774 | { |
| 775 | struct mddev *mddev = llbitmap->mddev; |
| 776 | sector_t blocks = mddev->resync_max_sectors; |
| 777 | unsigned long chunksize = MIN_CHUNK_SIZE; |
| 778 | unsigned long chunks = DIV_ROUND_UP(blocks, chunksize); |
| 779 | unsigned long space = mddev->bitmap_info.space << SECTOR_SHIFT; |
| 780 | int ret; |
| 781 | |
| 782 | while (chunks > space) { |
| 783 | chunksize = chunksize << 1; |
| 784 | chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); |
| 785 | } |
| 786 | |
| 787 | llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; |
| 788 | llbitmap->chunkshift = ffz(~chunksize); |
| 789 | llbitmap->chunksize = chunksize; |
| 790 | llbitmap->chunks = chunks; |
| 791 | mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP; |
| 792 | |
| 793 | ret = llbitmap_cache_pages(llbitmap); |
| 794 | if (ret) |
| 795 | return ret; |
| 796 | |
| 797 | llbitmap_state_machine(llbitmap, start: 0, end: llbitmap->chunks - 1, |
| 798 | action: BitmapActionInit); |
| 799 | /* flush initial llbitmap to disk */ |
| 800 | __llbitmap_flush(mddev); |
| 801 | |
| 802 | return 0; |
| 803 | } |
| 804 | |
| 805 | static int llbitmap_read_sb(struct llbitmap *llbitmap) |
| 806 | { |
| 807 | struct mddev *mddev = llbitmap->mddev; |
| 808 | unsigned long daemon_sleep; |
| 809 | unsigned long chunksize; |
| 810 | unsigned long events; |
| 811 | struct page *sb_page; |
| 812 | bitmap_super_t *sb; |
| 813 | int ret = -EINVAL; |
| 814 | |
| 815 | if (!mddev->bitmap_info.offset) { |
| 816 | pr_err("md/llbitmap: %s: no super block found" , mdname(mddev)); |
| 817 | return -EINVAL; |
| 818 | } |
| 819 | |
| 820 | sb_page = llbitmap_read_page(llbitmap, idx: 0); |
| 821 | if (IS_ERR(ptr: sb_page)) { |
| 822 | pr_err("md/llbitmap: %s: read super block failed" , |
| 823 | mdname(mddev)); |
| 824 | return -EIO; |
| 825 | } |
| 826 | |
| 827 | sb = kmap_local_page(page: sb_page); |
| 828 | if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) { |
| 829 | pr_err("md/llbitmap: %s: invalid super block magic number" , |
| 830 | mdname(mddev)); |
| 831 | goto out_put_page; |
| 832 | } |
| 833 | |
| 834 | if (sb->version != cpu_to_le32(BITMAP_MAJOR_LOCKLESS)) { |
| 835 | pr_err("md/llbitmap: %s: invalid super block version" , |
| 836 | mdname(mddev)); |
| 837 | goto out_put_page; |
| 838 | } |
| 839 | |
| 840 | if (memcmp(p: sb->uuid, q: mddev->uuid, size: 16)) { |
| 841 | pr_err("md/llbitmap: %s: bitmap superblock UUID mismatch\n" , |
| 842 | mdname(mddev)); |
| 843 | goto out_put_page; |
| 844 | } |
| 845 | |
| 846 | if (mddev->bitmap_info.space == 0) { |
| 847 | int room = le32_to_cpu(sb->sectors_reserved); |
| 848 | |
| 849 | if (room) |
| 850 | mddev->bitmap_info.space = room; |
| 851 | else |
| 852 | mddev->bitmap_info.space = mddev->bitmap_info.default_space; |
| 853 | } |
| 854 | llbitmap->flags = le32_to_cpu(sb->state); |
| 855 | if (test_and_clear_bit(nr: BITMAP_FIRST_USE, addr: &llbitmap->flags)) { |
| 856 | ret = llbitmap_init(llbitmap); |
| 857 | goto out_put_page; |
| 858 | } |
| 859 | |
| 860 | chunksize = le32_to_cpu(sb->chunksize); |
| 861 | if (!is_power_of_2(n: chunksize)) { |
| 862 | pr_err("md/llbitmap: %s: chunksize not a power of 2" , |
| 863 | mdname(mddev)); |
| 864 | goto out_put_page; |
| 865 | } |
| 866 | |
| 867 | if (chunksize < DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, |
| 868 | mddev->bitmap_info.space << SECTOR_SHIFT)) { |
| 869 | pr_err("md/llbitmap: %s: chunksize too small %lu < %llu / %lu" , |
| 870 | mdname(mddev), chunksize, mddev->resync_max_sectors, |
| 871 | mddev->bitmap_info.space); |
| 872 | goto out_put_page; |
| 873 | } |
| 874 | |
| 875 | daemon_sleep = le32_to_cpu(sb->daemon_sleep); |
| 876 | if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) { |
| 877 | pr_err("md/llbitmap: %s: daemon sleep %lu period out of range" , |
| 878 | mdname(mddev), daemon_sleep); |
| 879 | goto out_put_page; |
| 880 | } |
| 881 | |
| 882 | events = le64_to_cpu(sb->events); |
| 883 | if (events < mddev->events) { |
| 884 | pr_warn("md/llbitmap :%s: bitmap file is out of date (%lu < %llu) -- forcing full recovery" , |
| 885 | mdname(mddev), events, mddev->events); |
| 886 | set_bit(nr: BITMAP_STALE, addr: &llbitmap->flags); |
| 887 | } |
| 888 | |
| 889 | sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); |
| 890 | mddev->bitmap_info.chunksize = chunksize; |
| 891 | mddev->bitmap_info.daemon_sleep = daemon_sleep; |
| 892 | |
| 893 | llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE; |
| 894 | llbitmap->chunksize = chunksize; |
| 895 | llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, chunksize); |
| 896 | llbitmap->chunkshift = ffz(~chunksize); |
| 897 | ret = llbitmap_cache_pages(llbitmap); |
| 898 | |
| 899 | out_put_page: |
| 900 | __free_page(sb_page); |
| 901 | kunmap_local(sb); |
| 902 | return ret; |
| 903 | } |
| 904 | |
| 905 | static void llbitmap_pending_timer_fn(struct timer_list *pending_timer) |
| 906 | { |
| 907 | struct llbitmap *llbitmap = |
| 908 | container_of(pending_timer, struct llbitmap, pending_timer); |
| 909 | |
| 910 | if (work_busy(work: &llbitmap->daemon_work)) { |
| 911 | pr_warn("md/llbitmap: %s daemon_work not finished in %lu seconds\n" , |
| 912 | mdname(llbitmap->mddev), |
| 913 | llbitmap->mddev->bitmap_info.daemon_sleep); |
| 914 | set_bit(nr: BITMAP_DAEMON_BUSY, addr: &llbitmap->flags); |
| 915 | return; |
| 916 | } |
| 917 | |
| 918 | queue_work(wq: md_llbitmap_io_wq, work: &llbitmap->daemon_work); |
| 919 | } |
| 920 | |
| 921 | static void md_llbitmap_daemon_fn(struct work_struct *work) |
| 922 | { |
| 923 | struct llbitmap *llbitmap = |
| 924 | container_of(work, struct llbitmap, daemon_work); |
| 925 | unsigned long start; |
| 926 | unsigned long end; |
| 927 | bool restart; |
| 928 | int idx; |
| 929 | |
| 930 | if (llbitmap->mddev->degraded) |
| 931 | return; |
| 932 | retry: |
| 933 | start = 0; |
| 934 | end = min(llbitmap->chunks, PAGE_SIZE - BITMAP_DATA_OFFSET) - 1; |
| 935 | restart = false; |
| 936 | |
| 937 | for (idx = 0; idx < llbitmap->nr_pages; idx++) { |
| 938 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx]; |
| 939 | |
| 940 | if (idx > 0) { |
| 941 | start = end + 1; |
| 942 | end = min(end + PAGE_SIZE, llbitmap->chunks - 1); |
| 943 | } |
| 944 | |
| 945 | if (!test_bit(LLPageFlush, &pctl->flags) && |
| 946 | time_before(jiffies, pctl->expire)) { |
| 947 | restart = true; |
| 948 | continue; |
| 949 | } |
| 950 | |
| 951 | if (llbitmap_suspend_timeout(llbitmap, page_idx: idx) < 0) { |
| 952 | pr_warn("md/llbitmap: %s: %s waiting for page %d timeout\n" , |
| 953 | mdname(llbitmap->mddev), __func__, idx); |
| 954 | continue; |
| 955 | } |
| 956 | |
| 957 | llbitmap_state_machine(llbitmap, start, end, action: BitmapActionDaemon); |
| 958 | llbitmap_resume(llbitmap, page_idx: idx); |
| 959 | } |
| 960 | |
| 961 | /* |
| 962 | * If the daemon took a long time to finish, retry to prevent missing |
| 963 | * clearing dirty bits. |
| 964 | */ |
| 965 | if (test_and_clear_bit(nr: BITMAP_DAEMON_BUSY, addr: &llbitmap->flags)) |
| 966 | goto retry; |
| 967 | |
| 968 | /* If some page is dirty but not expired, setup timer again */ |
| 969 | if (restart) |
| 970 | mod_timer(timer: &llbitmap->pending_timer, |
| 971 | expires: jiffies + llbitmap->mddev->bitmap_info.daemon_sleep * HZ); |
| 972 | } |
| 973 | |
| 974 | static int llbitmap_create(struct mddev *mddev) |
| 975 | { |
| 976 | struct llbitmap *llbitmap; |
| 977 | int ret; |
| 978 | |
| 979 | ret = llbitmap_check_support(mddev); |
| 980 | if (ret) |
| 981 | return ret; |
| 982 | |
| 983 | llbitmap = kzalloc(sizeof(*llbitmap), GFP_KERNEL); |
| 984 | if (!llbitmap) |
| 985 | return -ENOMEM; |
| 986 | |
| 987 | llbitmap->mddev = mddev; |
| 988 | llbitmap->io_size = bdev_logical_block_size(bdev: mddev->gendisk->part0); |
| 989 | llbitmap->blocks_per_page = PAGE_SIZE / llbitmap->io_size; |
| 990 | |
| 991 | timer_setup(&llbitmap->pending_timer, llbitmap_pending_timer_fn, 0); |
| 992 | INIT_WORK(&llbitmap->daemon_work, md_llbitmap_daemon_fn); |
| 993 | atomic_set(v: &llbitmap->behind_writes, i: 0); |
| 994 | init_waitqueue_head(&llbitmap->behind_wait); |
| 995 | |
| 996 | mutex_lock(&mddev->bitmap_info.mutex); |
| 997 | mddev->bitmap = llbitmap; |
| 998 | ret = llbitmap_read_sb(llbitmap); |
| 999 | mutex_unlock(lock: &mddev->bitmap_info.mutex); |
| 1000 | if (ret) { |
| 1001 | kfree(objp: llbitmap); |
| 1002 | mddev->bitmap = NULL; |
| 1003 | } |
| 1004 | |
| 1005 | return ret; |
| 1006 | } |
| 1007 | |
| 1008 | static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize) |
| 1009 | { |
| 1010 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1011 | unsigned long chunks; |
| 1012 | |
| 1013 | if (chunksize == 0) |
| 1014 | chunksize = llbitmap->chunksize; |
| 1015 | |
| 1016 | /* If there is enough space, leave the chunksize unchanged. */ |
| 1017 | chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); |
| 1018 | while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) { |
| 1019 | chunksize = chunksize << 1; |
| 1020 | chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize); |
| 1021 | } |
| 1022 | |
| 1023 | llbitmap->chunkshift = ffz(~chunksize); |
| 1024 | llbitmap->chunksize = chunksize; |
| 1025 | llbitmap->chunks = chunks; |
| 1026 | |
| 1027 | return 0; |
| 1028 | } |
| 1029 | |
| 1030 | static int llbitmap_load(struct mddev *mddev) |
| 1031 | { |
| 1032 | enum llbitmap_action action = BitmapActionReload; |
| 1033 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1034 | |
| 1035 | if (test_and_clear_bit(nr: BITMAP_STALE, addr: &llbitmap->flags)) |
| 1036 | action = BitmapActionStale; |
| 1037 | |
| 1038 | llbitmap_state_machine(llbitmap, start: 0, end: llbitmap->chunks - 1, action); |
| 1039 | return 0; |
| 1040 | } |
| 1041 | |
| 1042 | static void llbitmap_destroy(struct mddev *mddev) |
| 1043 | { |
| 1044 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1045 | |
| 1046 | if (!llbitmap) |
| 1047 | return; |
| 1048 | |
| 1049 | mutex_lock(&mddev->bitmap_info.mutex); |
| 1050 | |
| 1051 | timer_delete_sync(timer: &llbitmap->pending_timer); |
| 1052 | flush_workqueue(md_llbitmap_io_wq); |
| 1053 | flush_workqueue(md_llbitmap_unplug_wq); |
| 1054 | |
| 1055 | mddev->bitmap = NULL; |
| 1056 | llbitmap_free_pages(llbitmap); |
| 1057 | kfree(objp: llbitmap); |
| 1058 | mutex_unlock(lock: &mddev->bitmap_info.mutex); |
| 1059 | } |
| 1060 | |
| 1061 | static void llbitmap_start_write(struct mddev *mddev, sector_t offset, |
| 1062 | unsigned long sectors) |
| 1063 | { |
| 1064 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1065 | unsigned long start = offset >> llbitmap->chunkshift; |
| 1066 | unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; |
| 1067 | int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1068 | int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1069 | |
| 1070 | llbitmap_state_machine(llbitmap, start, end, action: BitmapActionStartwrite); |
| 1071 | |
| 1072 | while (page_start <= page_end) { |
| 1073 | llbitmap_raise_barrier(llbitmap, page_idx: page_start); |
| 1074 | page_start++; |
| 1075 | } |
| 1076 | } |
| 1077 | |
| 1078 | static void llbitmap_end_write(struct mddev *mddev, sector_t offset, |
| 1079 | unsigned long sectors) |
| 1080 | { |
| 1081 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1082 | unsigned long start = offset >> llbitmap->chunkshift; |
| 1083 | unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; |
| 1084 | int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1085 | int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1086 | |
| 1087 | while (page_start <= page_end) { |
| 1088 | llbitmap_release_barrier(llbitmap, page_idx: page_start); |
| 1089 | page_start++; |
| 1090 | } |
| 1091 | } |
| 1092 | |
| 1093 | static void llbitmap_start_discard(struct mddev *mddev, sector_t offset, |
| 1094 | unsigned long sectors) |
| 1095 | { |
| 1096 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1097 | unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); |
| 1098 | unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; |
| 1099 | int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1100 | int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1101 | |
| 1102 | llbitmap_state_machine(llbitmap, start, end, action: BitmapActionDiscard); |
| 1103 | |
| 1104 | while (page_start <= page_end) { |
| 1105 | llbitmap_raise_barrier(llbitmap, page_idx: page_start); |
| 1106 | page_start++; |
| 1107 | } |
| 1108 | } |
| 1109 | |
| 1110 | static void llbitmap_end_discard(struct mddev *mddev, sector_t offset, |
| 1111 | unsigned long sectors) |
| 1112 | { |
| 1113 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1114 | unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize); |
| 1115 | unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift; |
| 1116 | int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1117 | int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; |
| 1118 | |
| 1119 | while (page_start <= page_end) { |
| 1120 | llbitmap_release_barrier(llbitmap, page_idx: page_start); |
| 1121 | page_start++; |
| 1122 | } |
| 1123 | } |
| 1124 | |
| 1125 | static void llbitmap_unplug_fn(struct work_struct *work) |
| 1126 | { |
| 1127 | struct llbitmap_unplug_work *unplug_work = |
| 1128 | container_of(work, struct llbitmap_unplug_work, work); |
| 1129 | struct llbitmap *llbitmap = unplug_work->llbitmap; |
| 1130 | struct blk_plug plug; |
| 1131 | int i; |
| 1132 | |
| 1133 | blk_start_plug(&plug); |
| 1134 | |
| 1135 | for (i = 0; i < llbitmap->nr_pages; i++) { |
| 1136 | if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) || |
| 1137 | !test_and_clear_bit(nr: LLPageDirty, addr: &llbitmap->pctl[i]->flags)) |
| 1138 | continue; |
| 1139 | |
| 1140 | llbitmap_write_page(llbitmap, idx: i); |
| 1141 | } |
| 1142 | |
| 1143 | blk_finish_plug(&plug); |
| 1144 | md_super_wait(mddev: llbitmap->mddev); |
| 1145 | complete(unplug_work->done); |
| 1146 | } |
| 1147 | |
| 1148 | static bool llbitmap_dirty(struct llbitmap *llbitmap) |
| 1149 | { |
| 1150 | int i; |
| 1151 | |
| 1152 | for (i = 0; i < llbitmap->nr_pages; i++) |
| 1153 | if (test_bit(LLPageDirty, &llbitmap->pctl[i]->flags)) |
| 1154 | return true; |
| 1155 | |
| 1156 | return false; |
| 1157 | } |
| 1158 | |
| 1159 | static void llbitmap_unplug(struct mddev *mddev, bool sync) |
| 1160 | { |
| 1161 | DECLARE_COMPLETION_ONSTACK(done); |
| 1162 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1163 | struct llbitmap_unplug_work unplug_work = { |
| 1164 | .llbitmap = llbitmap, |
| 1165 | .done = &done, |
| 1166 | }; |
| 1167 | |
| 1168 | if (!llbitmap_dirty(llbitmap)) |
| 1169 | return; |
| 1170 | |
| 1171 | /* |
| 1172 | * Issue new bitmap IO under submit_bio() context will deadlock: |
| 1173 | * - the bio will wait for bitmap bio to be done, before it can be |
| 1174 | * issued; |
| 1175 | * - bitmap bio will be added to current->bio_list and wait for this |
| 1176 | * bio to be issued; |
| 1177 | */ |
| 1178 | INIT_WORK_ONSTACK(&unplug_work.work, llbitmap_unplug_fn); |
| 1179 | queue_work(wq: md_llbitmap_unplug_wq, work: &unplug_work.work); |
| 1180 | wait_for_completion(&done); |
| 1181 | destroy_work_on_stack(work: &unplug_work.work); |
| 1182 | } |
| 1183 | |
| 1184 | /* |
| 1185 | * Force to write all bitmap pages to disk, called when stopping the array, or |
| 1186 | * every daemon_sleep seconds when sync_thread is running. |
| 1187 | */ |
| 1188 | static void __llbitmap_flush(struct mddev *mddev) |
| 1189 | { |
| 1190 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1191 | struct blk_plug plug; |
| 1192 | int i; |
| 1193 | |
| 1194 | blk_start_plug(&plug); |
| 1195 | for (i = 0; i < llbitmap->nr_pages; i++) { |
| 1196 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; |
| 1197 | |
| 1198 | /* mark all blocks as dirty */ |
| 1199 | set_bit(nr: LLPageDirty, addr: &pctl->flags); |
| 1200 | bitmap_fill(dst: pctl->dirty, nbits: llbitmap->blocks_per_page); |
| 1201 | llbitmap_write_page(llbitmap, idx: i); |
| 1202 | } |
| 1203 | blk_finish_plug(&plug); |
| 1204 | md_super_wait(mddev: llbitmap->mddev); |
| 1205 | } |
| 1206 | |
| 1207 | static void llbitmap_flush(struct mddev *mddev) |
| 1208 | { |
| 1209 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1210 | int i; |
| 1211 | |
| 1212 | for (i = 0; i < llbitmap->nr_pages; i++) |
| 1213 | set_bit(nr: LLPageFlush, addr: &llbitmap->pctl[i]->flags); |
| 1214 | |
| 1215 | timer_delete_sync(timer: &llbitmap->pending_timer); |
| 1216 | queue_work(wq: md_llbitmap_io_wq, work: &llbitmap->daemon_work); |
| 1217 | flush_work(work: &llbitmap->daemon_work); |
| 1218 | |
| 1219 | __llbitmap_flush(mddev); |
| 1220 | } |
| 1221 | |
| 1222 | /* This is used for raid5 lazy initial recovery */ |
| 1223 | static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset) |
| 1224 | { |
| 1225 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1226 | unsigned long p = offset >> llbitmap->chunkshift; |
| 1227 | enum llbitmap_state c = llbitmap_read(llbitmap, pos: p); |
| 1228 | |
| 1229 | return c == BitClean || c == BitDirty; |
| 1230 | } |
| 1231 | |
| 1232 | static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset) |
| 1233 | { |
| 1234 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1235 | unsigned long p = offset >> llbitmap->chunkshift; |
| 1236 | int blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); |
| 1237 | enum llbitmap_state c = llbitmap_read(llbitmap, pos: p); |
| 1238 | |
| 1239 | /* always skip unwritten blocks */ |
| 1240 | if (c == BitUnwritten) |
| 1241 | return blocks; |
| 1242 | |
| 1243 | /* For degraded array, don't skip */ |
| 1244 | if (mddev->degraded) |
| 1245 | return 0; |
| 1246 | |
| 1247 | /* For resync also skip clean/dirty blocks */ |
| 1248 | if ((c == BitClean || c == BitDirty) && |
| 1249 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && |
| 1250 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) |
| 1251 | return blocks; |
| 1252 | |
| 1253 | return 0; |
| 1254 | } |
| 1255 | |
| 1256 | static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset, |
| 1257 | sector_t *blocks, bool degraded) |
| 1258 | { |
| 1259 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1260 | unsigned long p = offset >> llbitmap->chunkshift; |
| 1261 | |
| 1262 | /* |
| 1263 | * Handle one bit at a time, this is much simpler. And it doesn't matter |
| 1264 | * if md_do_sync() loop more times. |
| 1265 | */ |
| 1266 | *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); |
| 1267 | return llbitmap_state_machine(llbitmap, start: p, end: p, |
| 1268 | action: BitmapActionStartsync) == BitSyncing; |
| 1269 | } |
| 1270 | |
| 1271 | /* Something is wrong, sync_thread stop at @offset */ |
| 1272 | static void llbitmap_end_sync(struct mddev *mddev, sector_t offset, |
| 1273 | sector_t *blocks) |
| 1274 | { |
| 1275 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1276 | unsigned long p = offset >> llbitmap->chunkshift; |
| 1277 | |
| 1278 | *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1)); |
| 1279 | llbitmap_state_machine(llbitmap, start: p, end: llbitmap->chunks - 1, |
| 1280 | action: BitmapActionAbortsync); |
| 1281 | } |
| 1282 | |
| 1283 | /* A full sync_thread is finished */ |
| 1284 | static void llbitmap_close_sync(struct mddev *mddev) |
| 1285 | { |
| 1286 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1287 | int i; |
| 1288 | |
| 1289 | for (i = 0; i < llbitmap->nr_pages; i++) { |
| 1290 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; |
| 1291 | |
| 1292 | /* let daemon_fn clear dirty bits immediately */ |
| 1293 | WRITE_ONCE(pctl->expire, jiffies); |
| 1294 | } |
| 1295 | |
| 1296 | llbitmap_state_machine(llbitmap, start: 0, end: llbitmap->chunks - 1, |
| 1297 | action: BitmapActionEndsync); |
| 1298 | } |
| 1299 | |
| 1300 | /* |
| 1301 | * sync_thread have reached @sector, update metadata every daemon_sleep seconds, |
| 1302 | * just in case sync_thread have to restart after power failure. |
| 1303 | */ |
| 1304 | static void llbitmap_cond_end_sync(struct mddev *mddev, sector_t sector, |
| 1305 | bool force) |
| 1306 | { |
| 1307 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1308 | |
| 1309 | if (sector == 0) { |
| 1310 | llbitmap->last_end_sync = jiffies; |
| 1311 | return; |
| 1312 | } |
| 1313 | |
| 1314 | if (time_before(jiffies, llbitmap->last_end_sync + |
| 1315 | HZ * mddev->bitmap_info.daemon_sleep)) |
| 1316 | return; |
| 1317 | |
| 1318 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); |
| 1319 | |
| 1320 | mddev->curr_resync_completed = sector; |
| 1321 | set_bit(nr: MD_SB_CHANGE_CLEAN, addr: &mddev->sb_flags); |
| 1322 | llbitmap_state_machine(llbitmap, start: 0, end: sector >> llbitmap->chunkshift, |
| 1323 | action: BitmapActionEndsync); |
| 1324 | __llbitmap_flush(mddev); |
| 1325 | |
| 1326 | llbitmap->last_end_sync = jiffies; |
| 1327 | sysfs_notify_dirent_safe(sd: mddev->sysfs_completed); |
| 1328 | } |
| 1329 | |
| 1330 | static bool llbitmap_enabled(void *data, bool flush) |
| 1331 | { |
| 1332 | struct llbitmap *llbitmap = data; |
| 1333 | |
| 1334 | return llbitmap && !test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); |
| 1335 | } |
| 1336 | |
| 1337 | static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s, |
| 1338 | unsigned long e) |
| 1339 | { |
| 1340 | llbitmap_state_machine(llbitmap: mddev->bitmap, start: s, end: e, action: BitmapActionStartwrite); |
| 1341 | } |
| 1342 | |
| 1343 | static void llbitmap_write_sb(struct llbitmap *llbitmap) |
| 1344 | { |
| 1345 | int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size); |
| 1346 | |
| 1347 | bitmap_fill(dst: llbitmap->pctl[0]->dirty, nbits: nr_blocks); |
| 1348 | llbitmap_write_page(llbitmap, idx: 0); |
| 1349 | md_super_wait(mddev: llbitmap->mddev); |
| 1350 | } |
| 1351 | |
| 1352 | static void llbitmap_update_sb(void *data) |
| 1353 | { |
| 1354 | struct llbitmap *llbitmap = data; |
| 1355 | struct mddev *mddev = llbitmap->mddev; |
| 1356 | struct page *sb_page; |
| 1357 | bitmap_super_t *sb; |
| 1358 | |
| 1359 | if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) |
| 1360 | return; |
| 1361 | |
| 1362 | sb_page = llbitmap_read_page(llbitmap, idx: 0); |
| 1363 | if (IS_ERR(ptr: sb_page)) { |
| 1364 | pr_err("%s: %s: read super block failed" , __func__, |
| 1365 | mdname(mddev)); |
| 1366 | set_bit(nr: BITMAP_WRITE_ERROR, addr: &llbitmap->flags); |
| 1367 | return; |
| 1368 | } |
| 1369 | |
| 1370 | if (mddev->events < llbitmap->events_cleared) |
| 1371 | llbitmap->events_cleared = mddev->events; |
| 1372 | |
| 1373 | sb = kmap_local_page(page: sb_page); |
| 1374 | sb->events = cpu_to_le64(mddev->events); |
| 1375 | sb->state = cpu_to_le32(llbitmap->flags); |
| 1376 | sb->chunksize = cpu_to_le32(llbitmap->chunksize); |
| 1377 | sb->sync_size = cpu_to_le64(mddev->resync_max_sectors); |
| 1378 | sb->events_cleared = cpu_to_le64(llbitmap->events_cleared); |
| 1379 | sb->sectors_reserved = cpu_to_le32(mddev->bitmap_info.space); |
| 1380 | sb->daemon_sleep = cpu_to_le32(mddev->bitmap_info.daemon_sleep); |
| 1381 | |
| 1382 | kunmap_local(sb); |
| 1383 | llbitmap_write_sb(llbitmap); |
| 1384 | } |
| 1385 | |
| 1386 | static int llbitmap_get_stats(void *data, struct md_bitmap_stats *stats) |
| 1387 | { |
| 1388 | struct llbitmap *llbitmap = data; |
| 1389 | |
| 1390 | memset(stats, 0, sizeof(*stats)); |
| 1391 | |
| 1392 | stats->missing_pages = 0; |
| 1393 | stats->pages = llbitmap->nr_pages; |
| 1394 | stats->file_pages = llbitmap->nr_pages; |
| 1395 | |
| 1396 | stats->behind_writes = atomic_read(v: &llbitmap->behind_writes); |
| 1397 | stats->behind_wait = wq_has_sleeper(wq_head: &llbitmap->behind_wait); |
| 1398 | stats->events_cleared = llbitmap->events_cleared; |
| 1399 | |
| 1400 | return 0; |
| 1401 | } |
| 1402 | |
| 1403 | /* just flag all pages as needing to be written */ |
| 1404 | static void llbitmap_write_all(struct mddev *mddev) |
| 1405 | { |
| 1406 | int i; |
| 1407 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1408 | |
| 1409 | for (i = 0; i < llbitmap->nr_pages; i++) { |
| 1410 | struct llbitmap_page_ctl *pctl = llbitmap->pctl[i]; |
| 1411 | |
| 1412 | set_bit(nr: LLPageDirty, addr: &pctl->flags); |
| 1413 | bitmap_fill(dst: pctl->dirty, nbits: llbitmap->blocks_per_page); |
| 1414 | } |
| 1415 | } |
| 1416 | |
| 1417 | static void llbitmap_start_behind_write(struct mddev *mddev) |
| 1418 | { |
| 1419 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1420 | |
| 1421 | atomic_inc(v: &llbitmap->behind_writes); |
| 1422 | } |
| 1423 | |
| 1424 | static void llbitmap_end_behind_write(struct mddev *mddev) |
| 1425 | { |
| 1426 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1427 | |
| 1428 | if (atomic_dec_and_test(v: &llbitmap->behind_writes)) |
| 1429 | wake_up(&llbitmap->behind_wait); |
| 1430 | } |
| 1431 | |
| 1432 | static void llbitmap_wait_behind_writes(struct mddev *mddev) |
| 1433 | { |
| 1434 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1435 | |
| 1436 | if (!llbitmap) |
| 1437 | return; |
| 1438 | |
| 1439 | wait_event(llbitmap->behind_wait, |
| 1440 | atomic_read(&llbitmap->behind_writes) == 0); |
| 1441 | |
| 1442 | } |
| 1443 | |
| 1444 | static ssize_t bits_show(struct mddev *mddev, char *page) |
| 1445 | { |
| 1446 | struct llbitmap *llbitmap; |
| 1447 | int bits[BitStateCount] = {0}; |
| 1448 | loff_t start = 0; |
| 1449 | |
| 1450 | mutex_lock(&mddev->bitmap_info.mutex); |
| 1451 | llbitmap = mddev->bitmap; |
| 1452 | if (!llbitmap || !llbitmap->pctl) { |
| 1453 | mutex_unlock(lock: &mddev->bitmap_info.mutex); |
| 1454 | return sprintf(buf: page, fmt: "no bitmap\n" ); |
| 1455 | } |
| 1456 | |
| 1457 | if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) { |
| 1458 | mutex_unlock(lock: &mddev->bitmap_info.mutex); |
| 1459 | return sprintf(buf: page, fmt: "bitmap io error\n" ); |
| 1460 | } |
| 1461 | |
| 1462 | while (start < llbitmap->chunks) { |
| 1463 | enum llbitmap_state c = llbitmap_read(llbitmap, pos: start); |
| 1464 | |
| 1465 | if (c < 0 || c >= BitStateCount) |
| 1466 | pr_err("%s: invalid bit %llu state %d\n" , |
| 1467 | __func__, start, c); |
| 1468 | else |
| 1469 | bits[c]++; |
| 1470 | start++; |
| 1471 | } |
| 1472 | |
| 1473 | mutex_unlock(lock: &mddev->bitmap_info.mutex); |
| 1474 | return sprintf(buf: page, fmt: "unwritten %d\nclean %d\ndirty %d\nneed sync %d\nsyncing %d\n" , |
| 1475 | bits[BitUnwritten], bits[BitClean], bits[BitDirty], |
| 1476 | bits[BitNeedSync], bits[BitSyncing]); |
| 1477 | } |
| 1478 | |
| 1479 | static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits); |
| 1480 | |
| 1481 | static ssize_t metadata_show(struct mddev *mddev, char *page) |
| 1482 | { |
| 1483 | struct llbitmap *llbitmap; |
| 1484 | ssize_t ret; |
| 1485 | |
| 1486 | mutex_lock(&mddev->bitmap_info.mutex); |
| 1487 | llbitmap = mddev->bitmap; |
| 1488 | if (!llbitmap) { |
| 1489 | mutex_unlock(lock: &mddev->bitmap_info.mutex); |
| 1490 | return sprintf(buf: page, fmt: "no bitmap\n" ); |
| 1491 | } |
| 1492 | |
| 1493 | ret = sprintf(buf: page, fmt: "chunksize %lu\nchunkshift %lu\nchunks %lu\noffset %llu\ndaemon_sleep %lu\n" , |
| 1494 | llbitmap->chunksize, llbitmap->chunkshift, |
| 1495 | llbitmap->chunks, mddev->bitmap_info.offset, |
| 1496 | llbitmap->mddev->bitmap_info.daemon_sleep); |
| 1497 | mutex_unlock(lock: &mddev->bitmap_info.mutex); |
| 1498 | |
| 1499 | return ret; |
| 1500 | } |
| 1501 | |
| 1502 | static struct md_sysfs_entry llbitmap_metadata = __ATTR_RO(metadata); |
| 1503 | |
| 1504 | static ssize_t |
| 1505 | daemon_sleep_show(struct mddev *mddev, char *page) |
| 1506 | { |
| 1507 | return sprintf(buf: page, fmt: "%lu\n" , mddev->bitmap_info.daemon_sleep); |
| 1508 | } |
| 1509 | |
| 1510 | static ssize_t |
| 1511 | daemon_sleep_store(struct mddev *mddev, const char *buf, size_t len) |
| 1512 | { |
| 1513 | unsigned long timeout; |
| 1514 | int rv = kstrtoul(s: buf, base: 10, res: &timeout); |
| 1515 | |
| 1516 | if (rv) |
| 1517 | return rv; |
| 1518 | |
| 1519 | mddev->bitmap_info.daemon_sleep = timeout; |
| 1520 | return len; |
| 1521 | } |
| 1522 | |
| 1523 | static struct md_sysfs_entry llbitmap_daemon_sleep = __ATTR_RW(daemon_sleep); |
| 1524 | |
| 1525 | static ssize_t |
| 1526 | barrier_idle_show(struct mddev *mddev, char *page) |
| 1527 | { |
| 1528 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1529 | |
| 1530 | return sprintf(buf: page, fmt: "%lu\n" , llbitmap->barrier_idle); |
| 1531 | } |
| 1532 | |
| 1533 | static ssize_t |
| 1534 | barrier_idle_store(struct mddev *mddev, const char *buf, size_t len) |
| 1535 | { |
| 1536 | struct llbitmap *llbitmap = mddev->bitmap; |
| 1537 | unsigned long timeout; |
| 1538 | int rv = kstrtoul(s: buf, base: 10, res: &timeout); |
| 1539 | |
| 1540 | if (rv) |
| 1541 | return rv; |
| 1542 | |
| 1543 | llbitmap->barrier_idle = timeout; |
| 1544 | return len; |
| 1545 | } |
| 1546 | |
| 1547 | static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle); |
| 1548 | |
| 1549 | static struct attribute *md_llbitmap_attrs[] = { |
| 1550 | &llbitmap_bits.attr, |
| 1551 | &llbitmap_metadata.attr, |
| 1552 | &llbitmap_daemon_sleep.attr, |
| 1553 | &llbitmap_barrier_idle.attr, |
| 1554 | NULL |
| 1555 | }; |
| 1556 | |
| 1557 | static struct attribute_group md_llbitmap_group = { |
| 1558 | .name = "llbitmap" , |
| 1559 | .attrs = md_llbitmap_attrs, |
| 1560 | }; |
| 1561 | |
| 1562 | static struct bitmap_operations llbitmap_ops = { |
| 1563 | .head = { |
| 1564 | .type = MD_BITMAP, |
| 1565 | .id = ID_LLBITMAP, |
| 1566 | .name = "llbitmap" , |
| 1567 | }, |
| 1568 | |
| 1569 | .enabled = llbitmap_enabled, |
| 1570 | .create = llbitmap_create, |
| 1571 | .resize = llbitmap_resize, |
| 1572 | .load = llbitmap_load, |
| 1573 | .destroy = llbitmap_destroy, |
| 1574 | |
| 1575 | .start_write = llbitmap_start_write, |
| 1576 | .end_write = llbitmap_end_write, |
| 1577 | .start_discard = llbitmap_start_discard, |
| 1578 | .end_discard = llbitmap_end_discard, |
| 1579 | .unplug = llbitmap_unplug, |
| 1580 | .flush = llbitmap_flush, |
| 1581 | |
| 1582 | .start_behind_write = llbitmap_start_behind_write, |
| 1583 | .end_behind_write = llbitmap_end_behind_write, |
| 1584 | .wait_behind_writes = llbitmap_wait_behind_writes, |
| 1585 | |
| 1586 | .blocks_synced = llbitmap_blocks_synced, |
| 1587 | .skip_sync_blocks = llbitmap_skip_sync_blocks, |
| 1588 | .start_sync = llbitmap_start_sync, |
| 1589 | .end_sync = llbitmap_end_sync, |
| 1590 | .close_sync = llbitmap_close_sync, |
| 1591 | .cond_end_sync = llbitmap_cond_end_sync, |
| 1592 | |
| 1593 | .update_sb = llbitmap_update_sb, |
| 1594 | .get_stats = llbitmap_get_stats, |
| 1595 | .dirty_bits = llbitmap_dirty_bits, |
| 1596 | .write_all = llbitmap_write_all, |
| 1597 | |
| 1598 | .group = &md_llbitmap_group, |
| 1599 | }; |
| 1600 | |
| 1601 | int md_llbitmap_init(void) |
| 1602 | { |
| 1603 | md_llbitmap_io_wq = alloc_workqueue("md_llbitmap_io" , |
| 1604 | WQ_MEM_RECLAIM | WQ_UNBOUND, 0); |
| 1605 | if (!md_llbitmap_io_wq) |
| 1606 | return -ENOMEM; |
| 1607 | |
| 1608 | md_llbitmap_unplug_wq = alloc_workqueue("md_llbitmap_unplug" , |
| 1609 | WQ_MEM_RECLAIM | WQ_UNBOUND, 0); |
| 1610 | if (!md_llbitmap_unplug_wq) { |
| 1611 | destroy_workqueue(wq: md_llbitmap_io_wq); |
| 1612 | md_llbitmap_io_wq = NULL; |
| 1613 | return -ENOMEM; |
| 1614 | } |
| 1615 | |
| 1616 | return register_md_submodule(msh: &llbitmap_ops.head); |
| 1617 | } |
| 1618 | |
| 1619 | void md_llbitmap_exit(void) |
| 1620 | { |
| 1621 | destroy_workqueue(wq: md_llbitmap_io_wq); |
| 1622 | md_llbitmap_io_wq = NULL; |
| 1623 | destroy_workqueue(wq: md_llbitmap_unplug_wq); |
| 1624 | md_llbitmap_unplug_wq = NULL; |
| 1625 | unregister_md_submodule(msh: &llbitmap_ops.head); |
| 1626 | } |
| 1627 | |