| 1 | // SPDX-License-Identifier: GPL-2.0+ |
| 2 | /* |
| 3 | * NILFS dat/inode allocator |
| 4 | * |
| 5 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. |
| 6 | * |
| 7 | * Originally written by Koji Sato. |
| 8 | * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji. |
| 9 | */ |
| 10 | |
| 11 | #include <linux/types.h> |
| 12 | #include <linux/buffer_head.h> |
| 13 | #include <linux/fs.h> |
| 14 | #include <linux/bitops.h> |
| 15 | #include <linux/slab.h> |
| 16 | #include "mdt.h" |
| 17 | #include "alloc.h" |
| 18 | |
| 19 | |
| 20 | /** |
| 21 | * nilfs_palloc_groups_per_desc_block - get the number of groups that a group |
| 22 | * descriptor block can maintain |
| 23 | * @inode: inode of metadata file using this allocator |
| 24 | * |
| 25 | * Return: Number of groups that a group descriptor block can maintain. |
| 26 | */ |
| 27 | static inline unsigned long |
| 28 | nilfs_palloc_groups_per_desc_block(const struct inode *inode) |
| 29 | { |
| 30 | return i_blocksize(node: inode) / |
| 31 | sizeof(struct nilfs_palloc_group_desc); |
| 32 | } |
| 33 | |
| 34 | /** |
| 35 | * nilfs_palloc_groups_count - get maximum number of groups |
| 36 | * @inode: inode of metadata file using this allocator |
| 37 | * |
| 38 | * Return: Maximum number of groups. |
| 39 | */ |
| 40 | static inline unsigned long |
| 41 | nilfs_palloc_groups_count(const struct inode *inode) |
| 42 | { |
| 43 | return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */)); |
| 44 | } |
| 45 | |
| 46 | /** |
| 47 | * nilfs_palloc_init_blockgroup - initialize private variables for allocator |
| 48 | * @inode: inode of metadata file using this allocator |
| 49 | * @entry_size: size of the persistent object |
| 50 | * |
| 51 | * Return: 0 on success, or a negative error code on failure. |
| 52 | */ |
| 53 | int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size) |
| 54 | { |
| 55 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); |
| 56 | |
| 57 | mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS); |
| 58 | if (!mi->mi_bgl) |
| 59 | return -ENOMEM; |
| 60 | |
| 61 | bgl_lock_init(bgl: mi->mi_bgl); |
| 62 | |
| 63 | nilfs_mdt_set_entry_size(inode, entry_size, 0); |
| 64 | |
| 65 | mi->mi_blocks_per_group = |
| 66 | DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode), |
| 67 | mi->mi_entries_per_block) + 1; |
| 68 | /* |
| 69 | * Number of blocks in a group including entry blocks |
| 70 | * and a bitmap block |
| 71 | */ |
| 72 | mi->mi_blocks_per_desc_block = |
| 73 | nilfs_palloc_groups_per_desc_block(inode) * |
| 74 | mi->mi_blocks_per_group + 1; |
| 75 | /* |
| 76 | * Number of blocks per descriptor including the |
| 77 | * descriptor block |
| 78 | */ |
| 79 | return 0; |
| 80 | } |
| 81 | |
| 82 | /** |
| 83 | * nilfs_palloc_group - get group number and offset from an entry number |
| 84 | * @inode: inode of metadata file using this allocator |
| 85 | * @nr: serial number of the entry (e.g. inode number) |
| 86 | * @offset: pointer to store offset number in the group |
| 87 | * |
| 88 | * Return: Number of the group that contains the entry with the index |
| 89 | * specified by @nr. |
| 90 | */ |
| 91 | static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, |
| 92 | unsigned long *offset) |
| 93 | { |
| 94 | __u64 group = nr; |
| 95 | |
| 96 | *offset = do_div(group, nilfs_palloc_entries_per_group(inode)); |
| 97 | return group; |
| 98 | } |
| 99 | |
| 100 | /** |
| 101 | * nilfs_palloc_desc_blkoff - get block offset of a group descriptor block |
| 102 | * @inode: inode of metadata file using this allocator |
| 103 | * @group: group number |
| 104 | * |
| 105 | * Return: Index number in the metadata file of the descriptor block of |
| 106 | * the group specified by @group. |
| 107 | */ |
| 108 | static unsigned long |
| 109 | nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) |
| 110 | { |
| 111 | unsigned long desc_block = |
| 112 | group / nilfs_palloc_groups_per_desc_block(inode); |
| 113 | return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block; |
| 114 | } |
| 115 | |
| 116 | /** |
| 117 | * nilfs_palloc_bitmap_blkoff - get block offset of a bitmap block |
| 118 | * @inode: inode of metadata file using this allocator |
| 119 | * @group: group number |
| 120 | * |
| 121 | * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap |
| 122 | * block used to allocate/deallocate entries in the specified group. |
| 123 | * |
| 124 | * Return: Index number in the metadata file of the bitmap block of |
| 125 | * the group specified by @group. |
| 126 | */ |
| 127 | static unsigned long |
| 128 | nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) |
| 129 | { |
| 130 | unsigned long desc_offset = |
| 131 | group % nilfs_palloc_groups_per_desc_block(inode); |
| 132 | return nilfs_palloc_desc_blkoff(inode, group) + 1 + |
| 133 | desc_offset * NILFS_MDT(inode)->mi_blocks_per_group; |
| 134 | } |
| 135 | |
| 136 | /** |
| 137 | * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group |
| 138 | * @desc: pointer to descriptor structure for the group |
| 139 | * @lock: spin lock protecting @desc |
| 140 | * |
| 141 | * Return: Number of free entries written in the group descriptor @desc. |
| 142 | */ |
| 143 | static unsigned long |
| 144 | nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, |
| 145 | spinlock_t *lock) |
| 146 | { |
| 147 | unsigned long nfree; |
| 148 | |
| 149 | spin_lock(lock); |
| 150 | nfree = le32_to_cpu(desc->pg_nfrees); |
| 151 | spin_unlock(lock); |
| 152 | return nfree; |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * nilfs_palloc_group_desc_add_entries - adjust count of free entries |
| 157 | * @desc: pointer to descriptor structure for the group |
| 158 | * @lock: spin lock protecting @desc |
| 159 | * @n: delta to be added |
| 160 | * |
| 161 | * Return: Number of free entries after adjusting the group descriptor |
| 162 | * @desc. |
| 163 | */ |
| 164 | static u32 |
| 165 | nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, |
| 166 | spinlock_t *lock, u32 n) |
| 167 | { |
| 168 | u32 nfree; |
| 169 | |
| 170 | spin_lock(lock); |
| 171 | le32_add_cpu(var: &desc->pg_nfrees, val: n); |
| 172 | nfree = le32_to_cpu(desc->pg_nfrees); |
| 173 | spin_unlock(lock); |
| 174 | return nfree; |
| 175 | } |
| 176 | |
| 177 | /** |
| 178 | * nilfs_palloc_entry_blkoff - get block offset of an entry block |
| 179 | * @inode: inode of metadata file using this allocator |
| 180 | * @nr: serial number of the entry (e.g. inode number) |
| 181 | * |
| 182 | * Return: Index number in the metadata file of the block containing |
| 183 | * the entry specified by @nr. |
| 184 | */ |
| 185 | static unsigned long |
| 186 | nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) |
| 187 | { |
| 188 | unsigned long group, group_offset; |
| 189 | |
| 190 | group = nilfs_palloc_group(inode, nr, offset: &group_offset); |
| 191 | |
| 192 | return nilfs_palloc_bitmap_blkoff(inode, group) + 1 + |
| 193 | group_offset / NILFS_MDT(inode)->mi_entries_per_block; |
| 194 | } |
| 195 | |
| 196 | /** |
| 197 | * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block |
| 198 | * @inode: inode of metadata file |
| 199 | * @bh: buffer head of the buffer to be initialized |
| 200 | * @from: kernel address mapped for a chunk of the block |
| 201 | * |
| 202 | * This function does not yet support the case where block size > PAGE_SIZE. |
| 203 | */ |
| 204 | static void nilfs_palloc_desc_block_init(struct inode *inode, |
| 205 | struct buffer_head *bh, void *from) |
| 206 | { |
| 207 | struct nilfs_palloc_group_desc *desc = from; |
| 208 | unsigned long n = nilfs_palloc_groups_per_desc_block(inode); |
| 209 | __le32 nfrees; |
| 210 | |
| 211 | nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode)); |
| 212 | while (n-- > 0) { |
| 213 | desc->pg_nfrees = nfrees; |
| 214 | desc++; |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, |
| 219 | int create, |
| 220 | void (*init_block)(struct inode *, |
| 221 | struct buffer_head *, |
| 222 | void *), |
| 223 | struct buffer_head **bhp, |
| 224 | struct nilfs_bh_assoc *prev, |
| 225 | spinlock_t *lock) |
| 226 | { |
| 227 | int ret; |
| 228 | |
| 229 | spin_lock(lock); |
| 230 | if (prev->bh && blkoff == prev->blkoff && |
| 231 | likely(buffer_uptodate(prev->bh))) { |
| 232 | get_bh(bh: prev->bh); |
| 233 | *bhp = prev->bh; |
| 234 | spin_unlock(lock); |
| 235 | return 0; |
| 236 | } |
| 237 | spin_unlock(lock); |
| 238 | |
| 239 | ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp); |
| 240 | if (!ret) { |
| 241 | spin_lock(lock); |
| 242 | /* |
| 243 | * The following code must be safe for change of the |
| 244 | * cache contents during the get block call. |
| 245 | */ |
| 246 | brelse(bh: prev->bh); |
| 247 | get_bh(bh: *bhp); |
| 248 | prev->bh = *bhp; |
| 249 | prev->blkoff = blkoff; |
| 250 | spin_unlock(lock); |
| 251 | } |
| 252 | return ret; |
| 253 | } |
| 254 | |
| 255 | /** |
| 256 | * nilfs_palloc_delete_block - delete a block on the persistent allocator file |
| 257 | * @inode: inode of metadata file using this allocator |
| 258 | * @blkoff: block offset |
| 259 | * @prev: nilfs_bh_assoc struct of the last used buffer |
| 260 | * @lock: spin lock protecting @prev |
| 261 | * |
| 262 | * Return: 0 on success, or one of the following negative error codes on |
| 263 | * failure: |
| 264 | * * %-EIO - I/O error (including metadata corruption). |
| 265 | * * %-ENOENT - Non-existent block. |
| 266 | * * %-ENOMEM - Insufficient memory available. |
| 267 | */ |
| 268 | static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, |
| 269 | struct nilfs_bh_assoc *prev, |
| 270 | spinlock_t *lock) |
| 271 | { |
| 272 | spin_lock(lock); |
| 273 | if (prev->bh && blkoff == prev->blkoff) { |
| 274 | brelse(bh: prev->bh); |
| 275 | prev->bh = NULL; |
| 276 | } |
| 277 | spin_unlock(lock); |
| 278 | return nilfs_mdt_delete_block(inode, blkoff); |
| 279 | } |
| 280 | |
| 281 | /** |
| 282 | * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block |
| 283 | * @inode: inode of metadata file using this allocator |
| 284 | * @group: group number |
| 285 | * @create: create flag |
| 286 | * @bhp: pointer to store the resultant buffer head |
| 287 | * |
| 288 | * Return: 0 on success, or a negative error code on failure. |
| 289 | */ |
| 290 | static int nilfs_palloc_get_desc_block(struct inode *inode, |
| 291 | unsigned long group, |
| 292 | int create, struct buffer_head **bhp) |
| 293 | { |
| 294 | struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; |
| 295 | |
| 296 | return nilfs_palloc_get_block(inode, |
| 297 | blkoff: nilfs_palloc_desc_blkoff(inode, group), |
| 298 | create, init_block: nilfs_palloc_desc_block_init, |
| 299 | bhp, prev: &cache->prev_desc, lock: &cache->lock); |
| 300 | } |
| 301 | |
| 302 | /** |
| 303 | * nilfs_palloc_get_bitmap_block - get buffer head of a bitmap block |
| 304 | * @inode: inode of metadata file using this allocator |
| 305 | * @group: group number |
| 306 | * @create: create flag |
| 307 | * @bhp: pointer to store the resultant buffer head |
| 308 | * |
| 309 | * Return: 0 on success, or a negative error code on failure. |
| 310 | */ |
| 311 | static int nilfs_palloc_get_bitmap_block(struct inode *inode, |
| 312 | unsigned long group, |
| 313 | int create, struct buffer_head **bhp) |
| 314 | { |
| 315 | struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; |
| 316 | |
| 317 | return nilfs_palloc_get_block(inode, |
| 318 | blkoff: nilfs_palloc_bitmap_blkoff(inode, group), |
| 319 | create, NULL, bhp, |
| 320 | prev: &cache->prev_bitmap, lock: &cache->lock); |
| 321 | } |
| 322 | |
| 323 | /** |
| 324 | * nilfs_palloc_delete_bitmap_block - delete a bitmap block |
| 325 | * @inode: inode of metadata file using this allocator |
| 326 | * @group: group number |
| 327 | * |
| 328 | * Return: 0 on success, or a negative error code on failure. |
| 329 | */ |
| 330 | static int nilfs_palloc_delete_bitmap_block(struct inode *inode, |
| 331 | unsigned long group) |
| 332 | { |
| 333 | struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; |
| 334 | |
| 335 | return nilfs_palloc_delete_block(inode, |
| 336 | blkoff: nilfs_palloc_bitmap_blkoff(inode, |
| 337 | group), |
| 338 | prev: &cache->prev_bitmap, lock: &cache->lock); |
| 339 | } |
| 340 | |
| 341 | /** |
| 342 | * nilfs_palloc_get_entry_block - get buffer head of an entry block |
| 343 | * @inode: inode of metadata file using this allocator |
| 344 | * @nr: serial number of the entry (e.g. inode number) |
| 345 | * @create: create flag |
| 346 | * @bhp: pointer to store the resultant buffer head |
| 347 | * |
| 348 | * Return: 0 on success, or a negative error code on failure. |
| 349 | */ |
| 350 | int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, |
| 351 | int create, struct buffer_head **bhp) |
| 352 | { |
| 353 | struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; |
| 354 | |
| 355 | return nilfs_palloc_get_block(inode, |
| 356 | blkoff: nilfs_palloc_entry_blkoff(inode, nr), |
| 357 | create, NULL, bhp, |
| 358 | prev: &cache->prev_entry, lock: &cache->lock); |
| 359 | } |
| 360 | |
| 361 | /** |
| 362 | * nilfs_palloc_delete_entry_block - delete an entry block |
| 363 | * @inode: inode of metadata file using this allocator |
| 364 | * @nr: serial number of the entry |
| 365 | * |
| 366 | * Return: 0 on success, or a negative error code on failure. |
| 367 | */ |
| 368 | static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) |
| 369 | { |
| 370 | struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; |
| 371 | |
| 372 | return nilfs_palloc_delete_block(inode, |
| 373 | blkoff: nilfs_palloc_entry_blkoff(inode, nr), |
| 374 | prev: &cache->prev_entry, lock: &cache->lock); |
| 375 | } |
| 376 | |
| 377 | /** |
| 378 | * nilfs_palloc_group_desc_offset - calculate the byte offset of a group |
| 379 | * descriptor in the folio containing it |
| 380 | * @inode: inode of metadata file using this allocator |
| 381 | * @group: group number |
| 382 | * @bh: buffer head of the group descriptor block |
| 383 | * |
| 384 | * Return: Byte offset in the folio of the group descriptor for @group. |
| 385 | */ |
| 386 | static size_t nilfs_palloc_group_desc_offset(const struct inode *inode, |
| 387 | unsigned long group, |
| 388 | const struct buffer_head *bh) |
| 389 | { |
| 390 | return offset_in_folio(bh->b_folio, bh->b_data) + |
| 391 | sizeof(struct nilfs_palloc_group_desc) * |
| 392 | (group % nilfs_palloc_groups_per_desc_block(inode)); |
| 393 | } |
| 394 | |
| 395 | /** |
| 396 | * nilfs_palloc_bitmap_offset - calculate the byte offset of a bitmap block |
| 397 | * in the folio containing it |
| 398 | * @bh: buffer head of the bitmap block |
| 399 | * |
| 400 | * Return: Byte offset in the folio of the bitmap block for @bh. |
| 401 | */ |
| 402 | static size_t nilfs_palloc_bitmap_offset(const struct buffer_head *bh) |
| 403 | { |
| 404 | return offset_in_folio(bh->b_folio, bh->b_data); |
| 405 | } |
| 406 | |
| 407 | /** |
| 408 | * nilfs_palloc_entry_offset - calculate the byte offset of an entry in the |
| 409 | * folio containing it |
| 410 | * @inode: inode of metadata file using this allocator |
| 411 | * @nr: serial number of the entry (e.g. inode number) |
| 412 | * @bh: buffer head of the entry block |
| 413 | * |
| 414 | * Return: Byte offset in the folio of the entry @nr. |
| 415 | */ |
| 416 | size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr, |
| 417 | const struct buffer_head *bh) |
| 418 | { |
| 419 | unsigned long entry_index_in_group, entry_index_in_block; |
| 420 | |
| 421 | nilfs_palloc_group(inode, nr, offset: &entry_index_in_group); |
| 422 | entry_index_in_block = entry_index_in_group % |
| 423 | NILFS_MDT(inode)->mi_entries_per_block; |
| 424 | |
| 425 | return offset_in_folio(bh->b_folio, bh->b_data) + |
| 426 | entry_index_in_block * NILFS_MDT(inode)->mi_entry_size; |
| 427 | } |
| 428 | |
| 429 | /** |
| 430 | * nilfs_palloc_find_available_slot - find available slot in a group |
| 431 | * @bitmap: bitmap of the group |
| 432 | * @target: offset number of an entry in the group (start point) |
| 433 | * @bsize: size in bits |
| 434 | * @lock: spin lock protecting @bitmap |
| 435 | * @wrap: whether to wrap around |
| 436 | * |
| 437 | * Return: Offset number within the group of the found free entry, or |
| 438 | * %-ENOSPC if not found. |
| 439 | */ |
| 440 | static int nilfs_palloc_find_available_slot(unsigned char *bitmap, |
| 441 | unsigned long target, |
| 442 | unsigned int bsize, |
| 443 | spinlock_t *lock, bool wrap) |
| 444 | { |
| 445 | int pos, end = bsize; |
| 446 | |
| 447 | if (likely(target < bsize)) { |
| 448 | pos = target; |
| 449 | do { |
| 450 | pos = nilfs_find_next_zero_bit(addr: bitmap, size: end, offset: pos); |
| 451 | if (pos >= end) |
| 452 | break; |
| 453 | if (!nilfs_set_bit_atomic(lock, pos, bitmap)) |
| 454 | return pos; |
| 455 | } while (++pos < end); |
| 456 | |
| 457 | end = target; |
| 458 | } |
| 459 | if (!wrap) |
| 460 | return -ENOSPC; |
| 461 | |
| 462 | /* wrap around */ |
| 463 | for (pos = 0; pos < end; pos++) { |
| 464 | pos = nilfs_find_next_zero_bit(addr: bitmap, size: end, offset: pos); |
| 465 | if (pos >= end) |
| 466 | break; |
| 467 | if (!nilfs_set_bit_atomic(lock, pos, bitmap)) |
| 468 | return pos; |
| 469 | } |
| 470 | |
| 471 | return -ENOSPC; |
| 472 | } |
| 473 | |
| 474 | /** |
| 475 | * nilfs_palloc_rest_groups_in_desc_block - get the remaining number of groups |
| 476 | * in a group descriptor block |
| 477 | * @inode: inode of metadata file using this allocator |
| 478 | * @curr: current group number |
| 479 | * @max: maximum number of groups |
| 480 | * |
| 481 | * Return: Number of remaining descriptors (= groups) managed by the descriptor |
| 482 | * block. |
| 483 | */ |
| 484 | static unsigned long |
| 485 | nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, |
| 486 | unsigned long curr, unsigned long max) |
| 487 | { |
| 488 | return min_t(unsigned long, |
| 489 | nilfs_palloc_groups_per_desc_block(inode) - |
| 490 | curr % nilfs_palloc_groups_per_desc_block(inode), |
| 491 | max - curr + 1); |
| 492 | } |
| 493 | |
| 494 | /** |
| 495 | * nilfs_palloc_count_desc_blocks - count descriptor blocks number |
| 496 | * @inode: inode of metadata file using this allocator |
| 497 | * @desc_blocks: descriptor blocks number [out] |
| 498 | * |
| 499 | * Return: 0 on success, or a negative error code on failure. |
| 500 | */ |
| 501 | static int nilfs_palloc_count_desc_blocks(struct inode *inode, |
| 502 | unsigned long *desc_blocks) |
| 503 | { |
| 504 | __u64 blknum; |
| 505 | int ret; |
| 506 | |
| 507 | ret = nilfs_bmap_last_key(bmap: NILFS_I(inode)->i_bmap, keyp: &blknum); |
| 508 | if (likely(!ret)) |
| 509 | *desc_blocks = DIV_ROUND_UP( |
| 510 | (unsigned long)blknum, |
| 511 | NILFS_MDT(inode)->mi_blocks_per_desc_block); |
| 512 | return ret; |
| 513 | } |
| 514 | |
| 515 | /** |
| 516 | * nilfs_palloc_mdt_file_can_grow - check potential opportunity for |
| 517 | * MDT file growing |
| 518 | * @inode: inode of metadata file using this allocator |
| 519 | * @desc_blocks: known current descriptor blocks count |
| 520 | * |
| 521 | * Return: true if a group can be added in the metadata file, false if not. |
| 522 | */ |
| 523 | static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, |
| 524 | unsigned long desc_blocks) |
| 525 | { |
| 526 | return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < |
| 527 | nilfs_palloc_groups_count(inode); |
| 528 | } |
| 529 | |
| 530 | /** |
| 531 | * nilfs_palloc_count_max_entries - count max number of entries that can be |
| 532 | * described by descriptor blocks count |
| 533 | * @inode: inode of metadata file using this allocator |
| 534 | * @nused: current number of used entries |
| 535 | * @nmaxp: max number of entries [out] |
| 536 | * |
| 537 | * Return: 0 on success, or one of the following negative error codes on |
| 538 | * failure: |
| 539 | * * %-EIO - I/O error (including metadata corruption). |
| 540 | * * %-ENOMEM - Insufficient memory available. |
| 541 | * * %-ERANGE - Number of entries in use is out of range. |
| 542 | */ |
| 543 | int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) |
| 544 | { |
| 545 | unsigned long desc_blocks = 0; |
| 546 | u64 entries_per_desc_block, nmax; |
| 547 | int err; |
| 548 | |
| 549 | err = nilfs_palloc_count_desc_blocks(inode, desc_blocks: &desc_blocks); |
| 550 | if (unlikely(err)) |
| 551 | return err; |
| 552 | |
| 553 | entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * |
| 554 | nilfs_palloc_groups_per_desc_block(inode); |
| 555 | nmax = entries_per_desc_block * desc_blocks; |
| 556 | |
| 557 | if (nused == nmax && |
| 558 | nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) |
| 559 | nmax += entries_per_desc_block; |
| 560 | |
| 561 | if (nused > nmax) |
| 562 | return -ERANGE; |
| 563 | |
| 564 | *nmaxp = nmax; |
| 565 | return 0; |
| 566 | } |
| 567 | |
| 568 | /** |
| 569 | * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object |
| 570 | * @inode: inode of metadata file using this allocator |
| 571 | * @req: nilfs_palloc_req structure exchanged for the allocation |
| 572 | * @wrap: whether to wrap around |
| 573 | * |
| 574 | * Return: 0 on success, or one of the following negative error codes on |
| 575 | * failure: |
| 576 | * * %-EIO - I/O error (including metadata corruption). |
| 577 | * * %-ENOMEM - Insufficient memory available. |
| 578 | * * %-ENOSPC - Entries exhausted (No entries available for allocation). |
| 579 | * * %-EROFS - Read only filesystem |
| 580 | */ |
| 581 | int nilfs_palloc_prepare_alloc_entry(struct inode *inode, |
| 582 | struct nilfs_palloc_req *req, bool wrap) |
| 583 | { |
| 584 | struct buffer_head *desc_bh, *bitmap_bh; |
| 585 | struct nilfs_palloc_group_desc *desc; |
| 586 | unsigned char *bitmap; |
| 587 | size_t doff, boff; |
| 588 | unsigned long group, maxgroup, ngroups; |
| 589 | unsigned long group_offset, maxgroup_offset; |
| 590 | unsigned long n, entries_per_group; |
| 591 | unsigned long i, j; |
| 592 | spinlock_t *lock; |
| 593 | int pos, ret; |
| 594 | |
| 595 | ngroups = nilfs_palloc_groups_count(inode); |
| 596 | maxgroup = ngroups - 1; |
| 597 | group = nilfs_palloc_group(inode, nr: req->pr_entry_nr, offset: &group_offset); |
| 598 | entries_per_group = nilfs_palloc_entries_per_group(inode); |
| 599 | |
| 600 | for (i = 0; i < ngroups; i += n) { |
| 601 | if (group >= ngroups && wrap) { |
| 602 | /* wrap around */ |
| 603 | group = 0; |
| 604 | maxgroup = nilfs_palloc_group(inode, nr: req->pr_entry_nr, |
| 605 | offset: &maxgroup_offset) - 1; |
| 606 | } |
| 607 | ret = nilfs_palloc_get_desc_block(inode, group, create: 1, bhp: &desc_bh); |
| 608 | if (ret < 0) |
| 609 | return ret; |
| 610 | |
| 611 | doff = nilfs_palloc_group_desc_offset(inode, group, bh: desc_bh); |
| 612 | desc = kmap_local_folio(folio: desc_bh->b_folio, offset: doff); |
| 613 | n = nilfs_palloc_rest_groups_in_desc_block(inode, curr: group, |
| 614 | max: maxgroup); |
| 615 | for (j = 0; j < n; j++, group++, group_offset = 0) { |
| 616 | lock = nilfs_mdt_bgl_lock(inode, block_group: group); |
| 617 | if (nilfs_palloc_group_desc_nfrees(desc: &desc[j], lock) == 0) |
| 618 | continue; |
| 619 | |
| 620 | kunmap_local(desc); |
| 621 | ret = nilfs_palloc_get_bitmap_block(inode, group, create: 1, |
| 622 | bhp: &bitmap_bh); |
| 623 | if (unlikely(ret < 0)) { |
| 624 | brelse(bh: desc_bh); |
| 625 | return ret; |
| 626 | } |
| 627 | |
| 628 | /* |
| 629 | * Re-kmap the folio containing the first (and |
| 630 | * subsequent) group descriptors. |
| 631 | */ |
| 632 | desc = kmap_local_folio(folio: desc_bh->b_folio, offset: doff); |
| 633 | |
| 634 | boff = nilfs_palloc_bitmap_offset(bh: bitmap_bh); |
| 635 | bitmap = kmap_local_folio(folio: bitmap_bh->b_folio, offset: boff); |
| 636 | pos = nilfs_palloc_find_available_slot( |
| 637 | bitmap, target: group_offset, bsize: entries_per_group, lock, |
| 638 | wrap); |
| 639 | /* |
| 640 | * Since the search for a free slot in the second and |
| 641 | * subsequent bitmap blocks always starts from the |
| 642 | * beginning, the wrap flag only has an effect on the |
| 643 | * first search. |
| 644 | */ |
| 645 | kunmap_local(bitmap); |
| 646 | if (pos >= 0) |
| 647 | goto found; |
| 648 | |
| 649 | brelse(bh: bitmap_bh); |
| 650 | } |
| 651 | |
| 652 | kunmap_local(desc); |
| 653 | brelse(bh: desc_bh); |
| 654 | } |
| 655 | |
| 656 | /* no entries left */ |
| 657 | return -ENOSPC; |
| 658 | |
| 659 | found: |
| 660 | /* found a free entry */ |
| 661 | nilfs_palloc_group_desc_add_entries(desc: &desc[j], lock, n: -1); |
| 662 | req->pr_entry_nr = entries_per_group * group + pos; |
| 663 | kunmap_local(desc); |
| 664 | |
| 665 | req->pr_desc_bh = desc_bh; |
| 666 | req->pr_bitmap_bh = bitmap_bh; |
| 667 | return 0; |
| 668 | } |
| 669 | |
| 670 | /** |
| 671 | * nilfs_palloc_commit_alloc_entry - finish allocation of a persistent object |
| 672 | * @inode: inode of metadata file using this allocator |
| 673 | * @req: nilfs_palloc_req structure exchanged for the allocation |
| 674 | */ |
| 675 | void nilfs_palloc_commit_alloc_entry(struct inode *inode, |
| 676 | struct nilfs_palloc_req *req) |
| 677 | { |
| 678 | mark_buffer_dirty(bh: req->pr_bitmap_bh); |
| 679 | mark_buffer_dirty(bh: req->pr_desc_bh); |
| 680 | nilfs_mdt_mark_dirty(inode); |
| 681 | |
| 682 | brelse(bh: req->pr_bitmap_bh); |
| 683 | brelse(bh: req->pr_desc_bh); |
| 684 | } |
| 685 | |
| 686 | /** |
| 687 | * nilfs_palloc_commit_free_entry - finish deallocating a persistent object |
| 688 | * @inode: inode of metadata file using this allocator |
| 689 | * @req: nilfs_palloc_req structure exchanged for the removal |
| 690 | */ |
| 691 | void nilfs_palloc_commit_free_entry(struct inode *inode, |
| 692 | struct nilfs_palloc_req *req) |
| 693 | { |
| 694 | unsigned long group, group_offset; |
| 695 | size_t doff, boff; |
| 696 | struct nilfs_palloc_group_desc *desc; |
| 697 | unsigned char *bitmap; |
| 698 | spinlock_t *lock; |
| 699 | |
| 700 | group = nilfs_palloc_group(inode, nr: req->pr_entry_nr, offset: &group_offset); |
| 701 | doff = nilfs_palloc_group_desc_offset(inode, group, bh: req->pr_desc_bh); |
| 702 | desc = kmap_local_folio(folio: req->pr_desc_bh->b_folio, offset: doff); |
| 703 | |
| 704 | boff = nilfs_palloc_bitmap_offset(bh: req->pr_bitmap_bh); |
| 705 | bitmap = kmap_local_folio(folio: req->pr_bitmap_bh->b_folio, offset: boff); |
| 706 | lock = nilfs_mdt_bgl_lock(inode, block_group: group); |
| 707 | |
| 708 | if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) |
| 709 | nilfs_warn(inode->i_sb, |
| 710 | "%s (ino=%lu): entry number %llu already freed" , |
| 711 | __func__, inode->i_ino, |
| 712 | (unsigned long long)req->pr_entry_nr); |
| 713 | else |
| 714 | nilfs_palloc_group_desc_add_entries(desc, lock, n: 1); |
| 715 | |
| 716 | kunmap_local(bitmap); |
| 717 | kunmap_local(desc); |
| 718 | |
| 719 | mark_buffer_dirty(bh: req->pr_desc_bh); |
| 720 | mark_buffer_dirty(bh: req->pr_bitmap_bh); |
| 721 | nilfs_mdt_mark_dirty(inode); |
| 722 | |
| 723 | brelse(bh: req->pr_bitmap_bh); |
| 724 | brelse(bh: req->pr_desc_bh); |
| 725 | } |
| 726 | |
| 727 | /** |
| 728 | * nilfs_palloc_abort_alloc_entry - cancel allocation of a persistent object |
| 729 | * @inode: inode of metadata file using this allocator |
| 730 | * @req: nilfs_palloc_req structure exchanged for the allocation |
| 731 | */ |
| 732 | void nilfs_palloc_abort_alloc_entry(struct inode *inode, |
| 733 | struct nilfs_palloc_req *req) |
| 734 | { |
| 735 | struct nilfs_palloc_group_desc *desc; |
| 736 | size_t doff, boff; |
| 737 | unsigned char *bitmap; |
| 738 | unsigned long group, group_offset; |
| 739 | spinlock_t *lock; |
| 740 | |
| 741 | group = nilfs_palloc_group(inode, nr: req->pr_entry_nr, offset: &group_offset); |
| 742 | doff = nilfs_palloc_group_desc_offset(inode, group, bh: req->pr_desc_bh); |
| 743 | desc = kmap_local_folio(folio: req->pr_desc_bh->b_folio, offset: doff); |
| 744 | |
| 745 | boff = nilfs_palloc_bitmap_offset(bh: req->pr_bitmap_bh); |
| 746 | bitmap = kmap_local_folio(folio: req->pr_bitmap_bh->b_folio, offset: boff); |
| 747 | lock = nilfs_mdt_bgl_lock(inode, block_group: group); |
| 748 | |
| 749 | if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) |
| 750 | nilfs_warn(inode->i_sb, |
| 751 | "%s (ino=%lu): entry number %llu already freed" , |
| 752 | __func__, inode->i_ino, |
| 753 | (unsigned long long)req->pr_entry_nr); |
| 754 | else |
| 755 | nilfs_palloc_group_desc_add_entries(desc, lock, n: 1); |
| 756 | |
| 757 | kunmap_local(bitmap); |
| 758 | kunmap_local(desc); |
| 759 | |
| 760 | brelse(bh: req->pr_bitmap_bh); |
| 761 | brelse(bh: req->pr_desc_bh); |
| 762 | |
| 763 | req->pr_entry_nr = 0; |
| 764 | req->pr_bitmap_bh = NULL; |
| 765 | req->pr_desc_bh = NULL; |
| 766 | } |
| 767 | |
| 768 | /** |
| 769 | * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object |
| 770 | * @inode: inode of metadata file using this allocator |
| 771 | * @req: nilfs_palloc_req structure exchanged for the removal |
| 772 | * |
| 773 | * Return: 0 on success, or a negative error code on failure. |
| 774 | */ |
| 775 | int nilfs_palloc_prepare_free_entry(struct inode *inode, |
| 776 | struct nilfs_palloc_req *req) |
| 777 | { |
| 778 | struct buffer_head *desc_bh, *bitmap_bh; |
| 779 | unsigned long group, group_offset; |
| 780 | int ret; |
| 781 | |
| 782 | group = nilfs_palloc_group(inode, nr: req->pr_entry_nr, offset: &group_offset); |
| 783 | ret = nilfs_palloc_get_desc_block(inode, group, create: 1, bhp: &desc_bh); |
| 784 | if (ret < 0) |
| 785 | return ret; |
| 786 | ret = nilfs_palloc_get_bitmap_block(inode, group, create: 1, bhp: &bitmap_bh); |
| 787 | if (ret < 0) { |
| 788 | brelse(bh: desc_bh); |
| 789 | return ret; |
| 790 | } |
| 791 | |
| 792 | req->pr_desc_bh = desc_bh; |
| 793 | req->pr_bitmap_bh = bitmap_bh; |
| 794 | return 0; |
| 795 | } |
| 796 | |
| 797 | /** |
| 798 | * nilfs_palloc_abort_free_entry - cancel deallocating a persistent object |
| 799 | * @inode: inode of metadata file using this allocator |
| 800 | * @req: nilfs_palloc_req structure exchanged for the removal |
| 801 | */ |
| 802 | void nilfs_palloc_abort_free_entry(struct inode *inode, |
| 803 | struct nilfs_palloc_req *req) |
| 804 | { |
| 805 | brelse(bh: req->pr_bitmap_bh); |
| 806 | brelse(bh: req->pr_desc_bh); |
| 807 | |
| 808 | req->pr_entry_nr = 0; |
| 809 | req->pr_bitmap_bh = NULL; |
| 810 | req->pr_desc_bh = NULL; |
| 811 | } |
| 812 | |
| 813 | /** |
| 814 | * nilfs_palloc_freev - deallocate a set of persistent objects |
| 815 | * @inode: inode of metadata file using this allocator |
| 816 | * @entry_nrs: array of entry numbers to be deallocated |
| 817 | * @nitems: number of entries stored in @entry_nrs |
| 818 | * |
| 819 | * Return: 0 on success, or a negative error code on failure. |
| 820 | */ |
| 821 | int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) |
| 822 | { |
| 823 | struct buffer_head *desc_bh, *bitmap_bh; |
| 824 | struct nilfs_palloc_group_desc *desc; |
| 825 | unsigned char *bitmap; |
| 826 | size_t doff, boff; |
| 827 | unsigned long group, group_offset; |
| 828 | __u64 group_min_nr, last_nrs[8]; |
| 829 | const unsigned long epg = nilfs_palloc_entries_per_group(inode); |
| 830 | const unsigned int epb = NILFS_MDT(inode)->mi_entries_per_block; |
| 831 | unsigned int entry_start, end, pos; |
| 832 | spinlock_t *lock; |
| 833 | int i, j, k, ret; |
| 834 | u32 nfree; |
| 835 | |
| 836 | for (i = 0; i < nitems; i = j) { |
| 837 | int change_group = false; |
| 838 | int nempties = 0, n = 0; |
| 839 | |
| 840 | group = nilfs_palloc_group(inode, nr: entry_nrs[i], offset: &group_offset); |
| 841 | ret = nilfs_palloc_get_desc_block(inode, group, create: 0, bhp: &desc_bh); |
| 842 | if (ret < 0) |
| 843 | return ret; |
| 844 | ret = nilfs_palloc_get_bitmap_block(inode, group, create: 0, |
| 845 | bhp: &bitmap_bh); |
| 846 | if (ret < 0) { |
| 847 | brelse(bh: desc_bh); |
| 848 | return ret; |
| 849 | } |
| 850 | |
| 851 | /* Get the first entry number of the group */ |
| 852 | group_min_nr = (__u64)group * epg; |
| 853 | |
| 854 | boff = nilfs_palloc_bitmap_offset(bh: bitmap_bh); |
| 855 | bitmap = kmap_local_folio(folio: bitmap_bh->b_folio, offset: boff); |
| 856 | lock = nilfs_mdt_bgl_lock(inode, block_group: group); |
| 857 | |
| 858 | j = i; |
| 859 | entry_start = rounddown(group_offset, epb); |
| 860 | do { |
| 861 | if (!nilfs_clear_bit_atomic(lock, group_offset, |
| 862 | bitmap)) { |
| 863 | nilfs_warn(inode->i_sb, |
| 864 | "%s (ino=%lu): entry number %llu already freed" , |
| 865 | __func__, inode->i_ino, |
| 866 | (unsigned long long)entry_nrs[j]); |
| 867 | } else { |
| 868 | n++; |
| 869 | } |
| 870 | |
| 871 | j++; |
| 872 | if (j >= nitems || entry_nrs[j] < group_min_nr || |
| 873 | entry_nrs[j] >= group_min_nr + epg) { |
| 874 | change_group = true; |
| 875 | } else { |
| 876 | group_offset = entry_nrs[j] - group_min_nr; |
| 877 | if (group_offset >= entry_start && |
| 878 | group_offset < entry_start + epb) { |
| 879 | /* This entry is in the same block */ |
| 880 | continue; |
| 881 | } |
| 882 | } |
| 883 | |
| 884 | /* Test if the entry block is empty or not */ |
| 885 | end = entry_start + epb; |
| 886 | pos = nilfs_find_next_bit(addr: bitmap, size: end, offset: entry_start); |
| 887 | if (pos >= end) { |
| 888 | last_nrs[nempties++] = entry_nrs[j - 1]; |
| 889 | if (nempties >= ARRAY_SIZE(last_nrs)) |
| 890 | break; |
| 891 | } |
| 892 | |
| 893 | if (change_group) |
| 894 | break; |
| 895 | |
| 896 | /* Go on to the next entry block */ |
| 897 | entry_start = rounddown(group_offset, epb); |
| 898 | } while (true); |
| 899 | |
| 900 | kunmap_local(bitmap); |
| 901 | mark_buffer_dirty(bh: bitmap_bh); |
| 902 | brelse(bh: bitmap_bh); |
| 903 | |
| 904 | for (k = 0; k < nempties; k++) { |
| 905 | ret = nilfs_palloc_delete_entry_block(inode, |
| 906 | nr: last_nrs[k]); |
| 907 | if (ret && ret != -ENOENT) |
| 908 | nilfs_warn(inode->i_sb, |
| 909 | "error %d deleting block that object (entry=%llu, ino=%lu) belongs to" , |
| 910 | ret, (unsigned long long)last_nrs[k], |
| 911 | inode->i_ino); |
| 912 | } |
| 913 | |
| 914 | doff = nilfs_palloc_group_desc_offset(inode, group, bh: desc_bh); |
| 915 | desc = kmap_local_folio(folio: desc_bh->b_folio, offset: doff); |
| 916 | nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n); |
| 917 | kunmap_local(desc); |
| 918 | mark_buffer_dirty(bh: desc_bh); |
| 919 | nilfs_mdt_mark_dirty(inode); |
| 920 | brelse(bh: desc_bh); |
| 921 | |
| 922 | if (nfree == nilfs_palloc_entries_per_group(inode)) { |
| 923 | ret = nilfs_palloc_delete_bitmap_block(inode, group); |
| 924 | if (ret && ret != -ENOENT) |
| 925 | nilfs_warn(inode->i_sb, |
| 926 | "error %d deleting bitmap block of group=%lu, ino=%lu" , |
| 927 | ret, group, inode->i_ino); |
| 928 | } |
| 929 | } |
| 930 | return 0; |
| 931 | } |
| 932 | |
| 933 | void nilfs_palloc_setup_cache(struct inode *inode, |
| 934 | struct nilfs_palloc_cache *cache) |
| 935 | { |
| 936 | NILFS_MDT(inode)->mi_palloc_cache = cache; |
| 937 | spin_lock_init(&cache->lock); |
| 938 | } |
| 939 | |
| 940 | void nilfs_palloc_clear_cache(struct inode *inode) |
| 941 | { |
| 942 | struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; |
| 943 | |
| 944 | spin_lock(lock: &cache->lock); |
| 945 | brelse(bh: cache->prev_desc.bh); |
| 946 | brelse(bh: cache->prev_bitmap.bh); |
| 947 | brelse(bh: cache->prev_entry.bh); |
| 948 | cache->prev_desc.bh = NULL; |
| 949 | cache->prev_bitmap.bh = NULL; |
| 950 | cache->prev_entry.bh = NULL; |
| 951 | spin_unlock(lock: &cache->lock); |
| 952 | } |
| 953 | |
| 954 | void nilfs_palloc_destroy_cache(struct inode *inode) |
| 955 | { |
| 956 | nilfs_palloc_clear_cache(inode); |
| 957 | NILFS_MDT(inode)->mi_palloc_cache = NULL; |
| 958 | } |
| 959 | |