| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | |
| 3 | /* |
| 4 | * zsmalloc memory allocator |
| 5 | * |
| 6 | * Copyright (C) 2011 Nitin Gupta |
| 7 | * Copyright (C) 2012, 2013 Minchan Kim |
| 8 | * |
| 9 | * This code is released using a dual license strategy: BSD/GPL |
| 10 | * You can choose the license that better fits your requirements. |
| 11 | * |
| 12 | * Released under the terms of 3-clause BSD License |
| 13 | * Released under the terms of GNU General Public License Version 2.0 |
| 14 | */ |
| 15 | |
| 16 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 17 | |
| 18 | /* |
| 19 | * lock ordering: |
| 20 | * page_lock |
| 21 | * pool->lock |
| 22 | * class->lock |
| 23 | * zspage->lock |
| 24 | */ |
| 25 | |
| 26 | #include <linux/module.h> |
| 27 | #include <linux/kernel.h> |
| 28 | #include <linux/sched.h> |
| 29 | #include <linux/errno.h> |
| 30 | #include <linux/highmem.h> |
| 31 | #include <linux/string.h> |
| 32 | #include <linux/slab.h> |
| 33 | #include <linux/spinlock.h> |
| 34 | #include <linux/sprintf.h> |
| 35 | #include <linux/shrinker.h> |
| 36 | #include <linux/types.h> |
| 37 | #include <linux/debugfs.h> |
| 38 | #include <linux/zsmalloc.h> |
| 39 | #include <linux/fs.h> |
| 40 | #include <linux/workqueue.h> |
| 41 | #include "zpdesc.h" |
| 42 | |
| 43 | #define ZSPAGE_MAGIC 0x58 |
| 44 | |
| 45 | /* |
| 46 | * This must be power of 2 and greater than or equal to sizeof(link_free). |
| 47 | * These two conditions ensure that any 'struct link_free' itself doesn't |
| 48 | * span more than 1 page which avoids complex case of mapping 2 pages simply |
| 49 | * to restore link_free pointer values. |
| 50 | */ |
| 51 | #define ZS_ALIGN 8 |
| 52 | |
| 53 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) |
| 54 | |
| 55 | /* |
| 56 | * Object location (<PFN>, <obj_idx>) is encoded as |
| 57 | * a single (unsigned long) handle value. |
| 58 | * |
| 59 | * Note that object index <obj_idx> starts from 0. |
| 60 | * |
| 61 | * This is made more complicated by various memory models and PAE. |
| 62 | */ |
| 63 | |
| 64 | #ifndef MAX_POSSIBLE_PHYSMEM_BITS |
| 65 | #ifdef MAX_PHYSMEM_BITS |
| 66 | #define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS |
| 67 | #else |
| 68 | /* |
| 69 | * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just |
| 70 | * be PAGE_SHIFT |
| 71 | */ |
| 72 | #define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG |
| 73 | #endif |
| 74 | #endif |
| 75 | |
| 76 | #define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) |
| 77 | |
| 78 | /* |
| 79 | * Head in allocated object should have OBJ_ALLOCATED_TAG |
| 80 | * to identify the object was allocated or not. |
| 81 | * It's okay to add the status bit in the least bit because |
| 82 | * header keeps handle which is 4byte-aligned address so we |
| 83 | * have room for two bit at least. |
| 84 | */ |
| 85 | #define OBJ_ALLOCATED_TAG 1 |
| 86 | |
| 87 | #define OBJ_TAG_BITS 1 |
| 88 | #define OBJ_TAG_MASK OBJ_ALLOCATED_TAG |
| 89 | |
| 90 | #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) |
| 91 | #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) |
| 92 | |
| 93 | #define HUGE_BITS 1 |
| 94 | #define FULLNESS_BITS 4 |
| 95 | #define CLASS_BITS 8 |
| 96 | #define MAGIC_VAL_BITS 8 |
| 97 | |
| 98 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL)) |
| 99 | |
| 100 | /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ |
| 101 | #define ZS_MIN_ALLOC_SIZE \ |
| 102 | MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) |
| 103 | /* each chunk includes extra space to keep handle */ |
| 104 | #define ZS_MAX_ALLOC_SIZE PAGE_SIZE |
| 105 | |
| 106 | /* |
| 107 | * On systems with 4K page size, this gives 255 size classes! There is a |
| 108 | * trader-off here: |
| 109 | * - Large number of size classes is potentially wasteful as free page are |
| 110 | * spread across these classes |
| 111 | * - Small number of size classes causes large internal fragmentation |
| 112 | * - Probably its better to use specific size classes (empirically |
| 113 | * determined). NOTE: all those class sizes must be set as multiple of |
| 114 | * ZS_ALIGN to make sure link_free itself never has to span 2 pages. |
| 115 | * |
| 116 | * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN |
| 117 | * (reason above) |
| 118 | */ |
| 119 | #define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> CLASS_BITS) |
| 120 | #define ZS_SIZE_CLASSES (DIV_ROUND_UP(ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE, \ |
| 121 | ZS_SIZE_CLASS_DELTA) + 1) |
| 122 | |
| 123 | /* |
| 124 | * Pages are distinguished by the ratio of used memory (that is the ratio |
| 125 | * of ->inuse objects to all objects that page can store). For example, |
| 126 | * INUSE_RATIO_10 means that the ratio of used objects is > 0% and <= 10%. |
| 127 | * |
| 128 | * The number of fullness groups is not random. It allows us to keep |
| 129 | * difference between the least busy page in the group (minimum permitted |
| 130 | * number of ->inuse objects) and the most busy page (maximum permitted |
| 131 | * number of ->inuse objects) at a reasonable value. |
| 132 | */ |
| 133 | enum fullness_group { |
| 134 | ZS_INUSE_RATIO_0, |
| 135 | ZS_INUSE_RATIO_10, |
| 136 | /* NOTE: 8 more fullness groups here */ |
| 137 | ZS_INUSE_RATIO_99 = 10, |
| 138 | ZS_INUSE_RATIO_100, |
| 139 | NR_FULLNESS_GROUPS, |
| 140 | }; |
| 141 | |
| 142 | enum class_stat_type { |
| 143 | /* NOTE: stats for 12 fullness groups here: from inuse 0 to 100 */ |
| 144 | ZS_OBJS_ALLOCATED = NR_FULLNESS_GROUPS, |
| 145 | ZS_OBJS_INUSE, |
| 146 | NR_CLASS_STAT_TYPES, |
| 147 | }; |
| 148 | |
| 149 | struct zs_size_stat { |
| 150 | unsigned long objs[NR_CLASS_STAT_TYPES]; |
| 151 | }; |
| 152 | |
| 153 | #ifdef CONFIG_ZSMALLOC_STAT |
| 154 | static struct dentry *zs_stat_root; |
| 155 | #endif |
| 156 | |
| 157 | static size_t huge_class_size; |
| 158 | |
| 159 | struct size_class { |
| 160 | spinlock_t lock; |
| 161 | struct list_head fullness_list[NR_FULLNESS_GROUPS]; |
| 162 | /* |
| 163 | * Size of objects stored in this class. Must be multiple |
| 164 | * of ZS_ALIGN. |
| 165 | */ |
| 166 | int size; |
| 167 | int objs_per_zspage; |
| 168 | /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ |
| 169 | int pages_per_zspage; |
| 170 | |
| 171 | unsigned int index; |
| 172 | struct zs_size_stat stats; |
| 173 | }; |
| 174 | |
| 175 | /* |
| 176 | * Placed within free objects to form a singly linked list. |
| 177 | * For every zspage, zspage->freeobj gives head of this list. |
| 178 | * |
| 179 | * This must be power of 2 and less than or equal to ZS_ALIGN |
| 180 | */ |
| 181 | struct link_free { |
| 182 | union { |
| 183 | /* |
| 184 | * Free object index; |
| 185 | * It's valid for non-allocated object |
| 186 | */ |
| 187 | unsigned long next; |
| 188 | /* |
| 189 | * Handle of allocated object. |
| 190 | */ |
| 191 | unsigned long handle; |
| 192 | }; |
| 193 | }; |
| 194 | |
| 195 | struct zs_pool { |
| 196 | const char *name; |
| 197 | |
| 198 | struct size_class *size_class[ZS_SIZE_CLASSES]; |
| 199 | struct kmem_cache *handle_cachep; |
| 200 | struct kmem_cache *zspage_cachep; |
| 201 | |
| 202 | atomic_long_t pages_allocated; |
| 203 | |
| 204 | struct zs_pool_stats stats; |
| 205 | |
| 206 | /* Compact classes */ |
| 207 | struct shrinker *shrinker; |
| 208 | |
| 209 | #ifdef CONFIG_ZSMALLOC_STAT |
| 210 | struct dentry *stat_dentry; |
| 211 | #endif |
| 212 | #ifdef CONFIG_COMPACTION |
| 213 | struct work_struct free_work; |
| 214 | #endif |
| 215 | /* protect zspage migration/compaction */ |
| 216 | rwlock_t lock; |
| 217 | atomic_t compaction_in_progress; |
| 218 | }; |
| 219 | |
| 220 | static inline void zpdesc_set_first(struct zpdesc *zpdesc) |
| 221 | { |
| 222 | SetPagePrivate(zpdesc_page(zpdesc)); |
| 223 | } |
| 224 | |
| 225 | static inline void zpdesc_inc_zone_page_state(struct zpdesc *zpdesc) |
| 226 | { |
| 227 | inc_zone_page_state(zpdesc_page(zpdesc), NR_ZSPAGES); |
| 228 | } |
| 229 | |
| 230 | static inline void zpdesc_dec_zone_page_state(struct zpdesc *zpdesc) |
| 231 | { |
| 232 | dec_zone_page_state(zpdesc_page(zpdesc), NR_ZSPAGES); |
| 233 | } |
| 234 | |
| 235 | static inline struct zpdesc *alloc_zpdesc(gfp_t gfp, const int nid) |
| 236 | { |
| 237 | struct page *page = alloc_pages_node(nid, gfp, 0); |
| 238 | |
| 239 | return page_zpdesc(page); |
| 240 | } |
| 241 | |
| 242 | static inline void free_zpdesc(struct zpdesc *zpdesc) |
| 243 | { |
| 244 | struct page *page = zpdesc_page(zpdesc); |
| 245 | |
| 246 | /* PageZsmalloc is sticky until the page is freed to the buddy. */ |
| 247 | __free_page(page); |
| 248 | } |
| 249 | |
| 250 | #define ZS_PAGE_UNLOCKED 0 |
| 251 | #define ZS_PAGE_WRLOCKED -1 |
| 252 | |
| 253 | struct zspage_lock { |
| 254 | spinlock_t lock; |
| 255 | int cnt; |
| 256 | struct lockdep_map dep_map; |
| 257 | }; |
| 258 | |
| 259 | struct zspage { |
| 260 | struct { |
| 261 | unsigned int huge:HUGE_BITS; |
| 262 | unsigned int fullness:FULLNESS_BITS; |
| 263 | unsigned int class:CLASS_BITS + 1; |
| 264 | unsigned int magic:MAGIC_VAL_BITS; |
| 265 | }; |
| 266 | unsigned int inuse; |
| 267 | unsigned int freeobj; |
| 268 | struct zpdesc *first_zpdesc; |
| 269 | struct list_head list; /* fullness list */ |
| 270 | struct zs_pool *pool; |
| 271 | struct zspage_lock zsl; |
| 272 | }; |
| 273 | |
| 274 | static void zspage_lock_init(struct zspage *zspage) |
| 275 | { |
| 276 | static struct lock_class_key __key; |
| 277 | struct zspage_lock *zsl = &zspage->zsl; |
| 278 | |
| 279 | lockdep_init_map(lock: &zsl->dep_map, name: "zspage->lock" , key: &__key, subclass: 0); |
| 280 | spin_lock_init(&zsl->lock); |
| 281 | zsl->cnt = ZS_PAGE_UNLOCKED; |
| 282 | } |
| 283 | |
| 284 | /* |
| 285 | * The zspage lock can be held from atomic contexts, but it needs to remain |
| 286 | * preemptible when held for reading because it remains held outside of those |
| 287 | * atomic contexts, otherwise we unnecessarily lose preemptibility. |
| 288 | * |
| 289 | * To achieve this, the following rules are enforced on readers and writers: |
| 290 | * |
| 291 | * - Writers are blocked by both writers and readers, while readers are only |
| 292 | * blocked by writers (i.e. normal rwlock semantics). |
| 293 | * |
| 294 | * - Writers are always atomic (to allow readers to spin waiting for them). |
| 295 | * |
| 296 | * - Writers always use trylock (as the lock may be held be sleeping readers). |
| 297 | * |
| 298 | * - Readers may spin on the lock (as they can only wait for atomic writers). |
| 299 | * |
| 300 | * - Readers may sleep while holding the lock (as writes only use trylock). |
| 301 | */ |
| 302 | static void zspage_read_lock(struct zspage *zspage) |
| 303 | { |
| 304 | struct zspage_lock *zsl = &zspage->zsl; |
| 305 | |
| 306 | rwsem_acquire_read(&zsl->dep_map, 0, 0, _RET_IP_); |
| 307 | |
| 308 | spin_lock(lock: &zsl->lock); |
| 309 | zsl->cnt++; |
| 310 | spin_unlock(lock: &zsl->lock); |
| 311 | |
| 312 | lock_acquired(lock: &zsl->dep_map, _RET_IP_); |
| 313 | } |
| 314 | |
| 315 | static void zspage_read_unlock(struct zspage *zspage) |
| 316 | { |
| 317 | struct zspage_lock *zsl = &zspage->zsl; |
| 318 | |
| 319 | rwsem_release(&zsl->dep_map, _RET_IP_); |
| 320 | |
| 321 | spin_lock(lock: &zsl->lock); |
| 322 | zsl->cnt--; |
| 323 | spin_unlock(lock: &zsl->lock); |
| 324 | } |
| 325 | |
| 326 | static __must_check bool zspage_write_trylock(struct zspage *zspage) |
| 327 | { |
| 328 | struct zspage_lock *zsl = &zspage->zsl; |
| 329 | |
| 330 | spin_lock(lock: &zsl->lock); |
| 331 | if (zsl->cnt == ZS_PAGE_UNLOCKED) { |
| 332 | zsl->cnt = ZS_PAGE_WRLOCKED; |
| 333 | rwsem_acquire(&zsl->dep_map, 0, 1, _RET_IP_); |
| 334 | lock_acquired(lock: &zsl->dep_map, _RET_IP_); |
| 335 | return true; |
| 336 | } |
| 337 | |
| 338 | spin_unlock(lock: &zsl->lock); |
| 339 | return false; |
| 340 | } |
| 341 | |
| 342 | static void zspage_write_unlock(struct zspage *zspage) |
| 343 | { |
| 344 | struct zspage_lock *zsl = &zspage->zsl; |
| 345 | |
| 346 | rwsem_release(&zsl->dep_map, _RET_IP_); |
| 347 | |
| 348 | zsl->cnt = ZS_PAGE_UNLOCKED; |
| 349 | spin_unlock(lock: &zsl->lock); |
| 350 | } |
| 351 | |
| 352 | /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ |
| 353 | static void SetZsHugePage(struct zspage *zspage) |
| 354 | { |
| 355 | zspage->huge = 1; |
| 356 | } |
| 357 | |
| 358 | static bool ZsHugePage(struct zspage *zspage) |
| 359 | { |
| 360 | return zspage->huge; |
| 361 | } |
| 362 | |
| 363 | #ifdef CONFIG_COMPACTION |
| 364 | static void kick_deferred_free(struct zs_pool *pool); |
| 365 | static void init_deferred_free(struct zs_pool *pool); |
| 366 | static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); |
| 367 | #else |
| 368 | static void kick_deferred_free(struct zs_pool *pool) {} |
| 369 | static void init_deferred_free(struct zs_pool *pool) {} |
| 370 | static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} |
| 371 | #endif |
| 372 | |
| 373 | static int create_cache(struct zs_pool *pool) |
| 374 | { |
| 375 | char *name; |
| 376 | |
| 377 | name = kasprintf(GFP_KERNEL, fmt: "zs_handle-%s" , pool->name); |
| 378 | if (!name) |
| 379 | return -ENOMEM; |
| 380 | pool->handle_cachep = kmem_cache_create(name, ZS_HANDLE_SIZE, |
| 381 | 0, 0, NULL); |
| 382 | kfree(objp: name); |
| 383 | if (!pool->handle_cachep) |
| 384 | return -EINVAL; |
| 385 | |
| 386 | name = kasprintf(GFP_KERNEL, fmt: "zspage-%s" , pool->name); |
| 387 | if (!name) |
| 388 | return -ENOMEM; |
| 389 | pool->zspage_cachep = kmem_cache_create(name, sizeof(struct zspage), |
| 390 | 0, 0, NULL); |
| 391 | kfree(objp: name); |
| 392 | if (!pool->zspage_cachep) { |
| 393 | kmem_cache_destroy(s: pool->handle_cachep); |
| 394 | pool->handle_cachep = NULL; |
| 395 | return -EINVAL; |
| 396 | } |
| 397 | |
| 398 | return 0; |
| 399 | } |
| 400 | |
| 401 | static void destroy_cache(struct zs_pool *pool) |
| 402 | { |
| 403 | kmem_cache_destroy(s: pool->handle_cachep); |
| 404 | kmem_cache_destroy(s: pool->zspage_cachep); |
| 405 | } |
| 406 | |
| 407 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) |
| 408 | { |
| 409 | return (unsigned long)kmem_cache_alloc(pool->handle_cachep, |
| 410 | gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
| 411 | } |
| 412 | |
| 413 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) |
| 414 | { |
| 415 | kmem_cache_free(s: pool->handle_cachep, objp: (void *)handle); |
| 416 | } |
| 417 | |
| 418 | static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags) |
| 419 | { |
| 420 | return kmem_cache_zalloc(pool->zspage_cachep, |
| 421 | flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
| 422 | } |
| 423 | |
| 424 | static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) |
| 425 | { |
| 426 | kmem_cache_free(s: pool->zspage_cachep, objp: zspage); |
| 427 | } |
| 428 | |
| 429 | /* class->lock(which owns the handle) synchronizes races */ |
| 430 | static void record_obj(unsigned long handle, unsigned long obj) |
| 431 | { |
| 432 | *(unsigned long *)handle = obj; |
| 433 | } |
| 434 | |
| 435 | static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc) |
| 436 | { |
| 437 | return PagePrivate(zpdesc_page(zpdesc)); |
| 438 | } |
| 439 | |
| 440 | /* Protected by class->lock */ |
| 441 | static inline int get_zspage_inuse(struct zspage *zspage) |
| 442 | { |
| 443 | return zspage->inuse; |
| 444 | } |
| 445 | |
| 446 | static inline void mod_zspage_inuse(struct zspage *zspage, int val) |
| 447 | { |
| 448 | zspage->inuse += val; |
| 449 | } |
| 450 | |
| 451 | static struct zpdesc *get_first_zpdesc(struct zspage *zspage) |
| 452 | { |
| 453 | struct zpdesc *first_zpdesc = zspage->first_zpdesc; |
| 454 | |
| 455 | VM_BUG_ON_PAGE(!is_first_zpdesc(first_zpdesc), zpdesc_page(first_zpdesc)); |
| 456 | return first_zpdesc; |
| 457 | } |
| 458 | |
| 459 | #define FIRST_OBJ_PAGE_TYPE_MASK 0xffffff |
| 460 | |
| 461 | static inline unsigned int get_first_obj_offset(struct zpdesc *zpdesc) |
| 462 | { |
| 463 | VM_WARN_ON_ONCE(!PageZsmalloc(zpdesc_page(zpdesc))); |
| 464 | return zpdesc->first_obj_offset & FIRST_OBJ_PAGE_TYPE_MASK; |
| 465 | } |
| 466 | |
| 467 | static inline void set_first_obj_offset(struct zpdesc *zpdesc, unsigned int offset) |
| 468 | { |
| 469 | /* With 24 bits available, we can support offsets into 16 MiB pages. */ |
| 470 | BUILD_BUG_ON(PAGE_SIZE > SZ_16M); |
| 471 | VM_WARN_ON_ONCE(!PageZsmalloc(zpdesc_page(zpdesc))); |
| 472 | VM_WARN_ON_ONCE(offset & ~FIRST_OBJ_PAGE_TYPE_MASK); |
| 473 | zpdesc->first_obj_offset &= ~FIRST_OBJ_PAGE_TYPE_MASK; |
| 474 | zpdesc->first_obj_offset |= offset & FIRST_OBJ_PAGE_TYPE_MASK; |
| 475 | } |
| 476 | |
| 477 | static inline unsigned int get_freeobj(struct zspage *zspage) |
| 478 | { |
| 479 | return zspage->freeobj; |
| 480 | } |
| 481 | |
| 482 | static inline void set_freeobj(struct zspage *zspage, unsigned int obj) |
| 483 | { |
| 484 | zspage->freeobj = obj; |
| 485 | } |
| 486 | |
| 487 | static struct size_class *zspage_class(struct zs_pool *pool, |
| 488 | struct zspage *zspage) |
| 489 | { |
| 490 | return pool->size_class[zspage->class]; |
| 491 | } |
| 492 | |
| 493 | /* |
| 494 | * zsmalloc divides the pool into various size classes where each |
| 495 | * class maintains a list of zspages where each zspage is divided |
| 496 | * into equal sized chunks. Each allocation falls into one of these |
| 497 | * classes depending on its size. This function returns index of the |
| 498 | * size class which has chunk size big enough to hold the given size. |
| 499 | */ |
| 500 | static int get_size_class_index(int size) |
| 501 | { |
| 502 | int idx = 0; |
| 503 | |
| 504 | if (likely(size > ZS_MIN_ALLOC_SIZE)) |
| 505 | idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE, |
| 506 | ZS_SIZE_CLASS_DELTA); |
| 507 | |
| 508 | return min_t(int, ZS_SIZE_CLASSES - 1, idx); |
| 509 | } |
| 510 | |
| 511 | static inline void class_stat_add(struct size_class *class, int type, |
| 512 | unsigned long cnt) |
| 513 | { |
| 514 | class->stats.objs[type] += cnt; |
| 515 | } |
| 516 | |
| 517 | static inline void class_stat_sub(struct size_class *class, int type, |
| 518 | unsigned long cnt) |
| 519 | { |
| 520 | class->stats.objs[type] -= cnt; |
| 521 | } |
| 522 | |
| 523 | static inline unsigned long class_stat_read(struct size_class *class, int type) |
| 524 | { |
| 525 | return class->stats.objs[type]; |
| 526 | } |
| 527 | |
| 528 | #ifdef CONFIG_ZSMALLOC_STAT |
| 529 | |
| 530 | static void __init zs_stat_init(void) |
| 531 | { |
| 532 | if (!debugfs_initialized()) { |
| 533 | pr_warn("debugfs not available, stat dir not created\n" ); |
| 534 | return; |
| 535 | } |
| 536 | |
| 537 | zs_stat_root = debugfs_create_dir(name: "zsmalloc" , NULL); |
| 538 | } |
| 539 | |
| 540 | static void __exit zs_stat_exit(void) |
| 541 | { |
| 542 | debugfs_remove_recursive(dentry: zs_stat_root); |
| 543 | } |
| 544 | |
| 545 | static unsigned long zs_can_compact(struct size_class *class); |
| 546 | |
| 547 | static int zs_stats_size_show(struct seq_file *s, void *v) |
| 548 | { |
| 549 | int i, fg; |
| 550 | struct zs_pool *pool = s->private; |
| 551 | struct size_class *class; |
| 552 | int objs_per_zspage; |
| 553 | unsigned long obj_allocated, obj_used, pages_used, freeable; |
| 554 | unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0; |
| 555 | unsigned long total_freeable = 0; |
| 556 | unsigned long inuse_totals[NR_FULLNESS_GROUPS] = {0, }; |
| 557 | |
| 558 | seq_printf(m: s, fmt: " %5s %5s %9s %9s %9s %9s %9s %9s %9s %9s %9s %9s %9s %13s %10s %10s %16s %8s\n" , |
| 559 | "class" , "size" , "10%" , "20%" , "30%" , "40%" , |
| 560 | "50%" , "60%" , "70%" , "80%" , "90%" , "99%" , "100%" , |
| 561 | "obj_allocated" , "obj_used" , "pages_used" , |
| 562 | "pages_per_zspage" , "freeable" ); |
| 563 | |
| 564 | for (i = 0; i < ZS_SIZE_CLASSES; i++) { |
| 565 | |
| 566 | class = pool->size_class[i]; |
| 567 | |
| 568 | if (class->index != i) |
| 569 | continue; |
| 570 | |
| 571 | spin_lock(lock: &class->lock); |
| 572 | |
| 573 | seq_printf(m: s, fmt: " %5u %5u " , i, class->size); |
| 574 | for (fg = ZS_INUSE_RATIO_10; fg < NR_FULLNESS_GROUPS; fg++) { |
| 575 | inuse_totals[fg] += class_stat_read(class, type: fg); |
| 576 | seq_printf(m: s, fmt: "%9lu " , class_stat_read(class, type: fg)); |
| 577 | } |
| 578 | |
| 579 | obj_allocated = class_stat_read(class, type: ZS_OBJS_ALLOCATED); |
| 580 | obj_used = class_stat_read(class, type: ZS_OBJS_INUSE); |
| 581 | freeable = zs_can_compact(class); |
| 582 | spin_unlock(lock: &class->lock); |
| 583 | |
| 584 | objs_per_zspage = class->objs_per_zspage; |
| 585 | pages_used = obj_allocated / objs_per_zspage * |
| 586 | class->pages_per_zspage; |
| 587 | |
| 588 | seq_printf(m: s, fmt: "%13lu %10lu %10lu %16d %8lu\n" , |
| 589 | obj_allocated, obj_used, pages_used, |
| 590 | class->pages_per_zspage, freeable); |
| 591 | |
| 592 | total_objs += obj_allocated; |
| 593 | total_used_objs += obj_used; |
| 594 | total_pages += pages_used; |
| 595 | total_freeable += freeable; |
| 596 | } |
| 597 | |
| 598 | seq_puts(m: s, s: "\n" ); |
| 599 | seq_printf(m: s, fmt: " %5s %5s " , "Total" , "" ); |
| 600 | |
| 601 | for (fg = ZS_INUSE_RATIO_10; fg < NR_FULLNESS_GROUPS; fg++) |
| 602 | seq_printf(m: s, fmt: "%9lu " , inuse_totals[fg]); |
| 603 | |
| 604 | seq_printf(m: s, fmt: "%13lu %10lu %10lu %16s %8lu\n" , |
| 605 | total_objs, total_used_objs, total_pages, "" , |
| 606 | total_freeable); |
| 607 | |
| 608 | return 0; |
| 609 | } |
| 610 | DEFINE_SHOW_ATTRIBUTE(zs_stats_size); |
| 611 | |
| 612 | static void zs_pool_stat_create(struct zs_pool *pool, const char *name) |
| 613 | { |
| 614 | if (!zs_stat_root) { |
| 615 | pr_warn("no root stat dir, not creating <%s> stat dir\n" , name); |
| 616 | return; |
| 617 | } |
| 618 | |
| 619 | pool->stat_dentry = debugfs_create_dir(name, parent: zs_stat_root); |
| 620 | |
| 621 | debugfs_create_file("classes" , S_IFREG | 0444, pool->stat_dentry, pool, |
| 622 | &zs_stats_size_fops); |
| 623 | } |
| 624 | |
| 625 | static void zs_pool_stat_destroy(struct zs_pool *pool) |
| 626 | { |
| 627 | debugfs_remove_recursive(dentry: pool->stat_dentry); |
| 628 | } |
| 629 | |
| 630 | #else /* CONFIG_ZSMALLOC_STAT */ |
| 631 | static void __init zs_stat_init(void) |
| 632 | { |
| 633 | } |
| 634 | |
| 635 | static void __exit zs_stat_exit(void) |
| 636 | { |
| 637 | } |
| 638 | |
| 639 | static inline void zs_pool_stat_create(struct zs_pool *pool, const char *name) |
| 640 | { |
| 641 | } |
| 642 | |
| 643 | static inline void zs_pool_stat_destroy(struct zs_pool *pool) |
| 644 | { |
| 645 | } |
| 646 | #endif |
| 647 | |
| 648 | |
| 649 | /* |
| 650 | * For each size class, zspages are divided into different groups |
| 651 | * depending on their usage ratio. This function returns fullness |
| 652 | * status of the given page. |
| 653 | */ |
| 654 | static int get_fullness_group(struct size_class *class, struct zspage *zspage) |
| 655 | { |
| 656 | int inuse, objs_per_zspage, ratio; |
| 657 | |
| 658 | inuse = get_zspage_inuse(zspage); |
| 659 | objs_per_zspage = class->objs_per_zspage; |
| 660 | |
| 661 | if (inuse == 0) |
| 662 | return ZS_INUSE_RATIO_0; |
| 663 | if (inuse == objs_per_zspage) |
| 664 | return ZS_INUSE_RATIO_100; |
| 665 | |
| 666 | ratio = 100 * inuse / objs_per_zspage; |
| 667 | /* |
| 668 | * Take integer division into consideration: a page with one inuse |
| 669 | * object out of 127 possible, will end up having 0 usage ratio, |
| 670 | * which is wrong as it belongs in ZS_INUSE_RATIO_10 fullness group. |
| 671 | */ |
| 672 | return ratio / 10 + 1; |
| 673 | } |
| 674 | |
| 675 | /* |
| 676 | * Each size class maintains various freelists and zspages are assigned |
| 677 | * to one of these freelists based on the number of live objects they |
| 678 | * have. This functions inserts the given zspage into the freelist |
| 679 | * identified by <class, fullness_group>. |
| 680 | */ |
| 681 | static void insert_zspage(struct size_class *class, |
| 682 | struct zspage *zspage, |
| 683 | int fullness) |
| 684 | { |
| 685 | class_stat_add(class, type: fullness, cnt: 1); |
| 686 | list_add(new: &zspage->list, head: &class->fullness_list[fullness]); |
| 687 | zspage->fullness = fullness; |
| 688 | } |
| 689 | |
| 690 | /* |
| 691 | * This function removes the given zspage from the freelist identified |
| 692 | * by <class, fullness_group>. |
| 693 | */ |
| 694 | static void remove_zspage(struct size_class *class, struct zspage *zspage) |
| 695 | { |
| 696 | int fullness = zspage->fullness; |
| 697 | |
| 698 | VM_BUG_ON(list_empty(&class->fullness_list[fullness])); |
| 699 | |
| 700 | list_del_init(entry: &zspage->list); |
| 701 | class_stat_sub(class, type: fullness, cnt: 1); |
| 702 | } |
| 703 | |
| 704 | /* |
| 705 | * Each size class maintains zspages in different fullness groups depending |
| 706 | * on the number of live objects they contain. When allocating or freeing |
| 707 | * objects, the fullness status of the page can change, for instance, from |
| 708 | * INUSE_RATIO_80 to INUSE_RATIO_70 when freeing an object. This function |
| 709 | * checks if such a status change has occurred for the given page and |
| 710 | * accordingly moves the page from the list of the old fullness group to that |
| 711 | * of the new fullness group. |
| 712 | */ |
| 713 | static int fix_fullness_group(struct size_class *class, struct zspage *zspage) |
| 714 | { |
| 715 | int newfg; |
| 716 | |
| 717 | newfg = get_fullness_group(class, zspage); |
| 718 | if (newfg == zspage->fullness) |
| 719 | goto out; |
| 720 | |
| 721 | remove_zspage(class, zspage); |
| 722 | insert_zspage(class, zspage, fullness: newfg); |
| 723 | out: |
| 724 | return newfg; |
| 725 | } |
| 726 | |
| 727 | static struct zspage *get_zspage(struct zpdesc *zpdesc) |
| 728 | { |
| 729 | struct zspage *zspage = zpdesc->zspage; |
| 730 | |
| 731 | BUG_ON(zspage->magic != ZSPAGE_MAGIC); |
| 732 | return zspage; |
| 733 | } |
| 734 | |
| 735 | static struct zpdesc *get_next_zpdesc(struct zpdesc *zpdesc) |
| 736 | { |
| 737 | struct zspage *zspage = get_zspage(zpdesc); |
| 738 | |
| 739 | if (unlikely(ZsHugePage(zspage))) |
| 740 | return NULL; |
| 741 | |
| 742 | return zpdesc->next; |
| 743 | } |
| 744 | |
| 745 | /** |
| 746 | * obj_to_location - get (<zpdesc>, <obj_idx>) from encoded object value |
| 747 | * @obj: the encoded object value |
| 748 | * @zpdesc: zpdesc object resides in zspage |
| 749 | * @obj_idx: object index |
| 750 | */ |
| 751 | static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc, |
| 752 | unsigned int *obj_idx) |
| 753 | { |
| 754 | *zpdesc = pfn_zpdesc(pfn: obj >> OBJ_INDEX_BITS); |
| 755 | *obj_idx = (obj & OBJ_INDEX_MASK); |
| 756 | } |
| 757 | |
| 758 | static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc) |
| 759 | { |
| 760 | *zpdesc = pfn_zpdesc(pfn: obj >> OBJ_INDEX_BITS); |
| 761 | } |
| 762 | |
| 763 | /** |
| 764 | * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>) |
| 765 | * @zpdesc: zpdesc object resides in zspage |
| 766 | * @obj_idx: object index |
| 767 | */ |
| 768 | static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx) |
| 769 | { |
| 770 | unsigned long obj; |
| 771 | |
| 772 | obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS; |
| 773 | obj |= obj_idx & OBJ_INDEX_MASK; |
| 774 | |
| 775 | return obj; |
| 776 | } |
| 777 | |
| 778 | static unsigned long handle_to_obj(unsigned long handle) |
| 779 | { |
| 780 | return *(unsigned long *)handle; |
| 781 | } |
| 782 | |
| 783 | static inline bool obj_allocated(struct zpdesc *zpdesc, void *obj, |
| 784 | unsigned long *phandle) |
| 785 | { |
| 786 | unsigned long handle; |
| 787 | struct zspage *zspage = get_zspage(zpdesc); |
| 788 | |
| 789 | if (unlikely(ZsHugePage(zspage))) { |
| 790 | VM_BUG_ON_PAGE(!is_first_zpdesc(zpdesc), zpdesc_page(zpdesc)); |
| 791 | handle = zpdesc->handle; |
| 792 | } else |
| 793 | handle = *(unsigned long *)obj; |
| 794 | |
| 795 | if (!(handle & OBJ_ALLOCATED_TAG)) |
| 796 | return false; |
| 797 | |
| 798 | /* Clear all tags before returning the handle */ |
| 799 | *phandle = handle & ~OBJ_TAG_MASK; |
| 800 | return true; |
| 801 | } |
| 802 | |
| 803 | static void reset_zpdesc(struct zpdesc *zpdesc) |
| 804 | { |
| 805 | struct page *page = zpdesc_page(zpdesc); |
| 806 | |
| 807 | ClearPagePrivate(page); |
| 808 | zpdesc->zspage = NULL; |
| 809 | zpdesc->next = NULL; |
| 810 | /* PageZsmalloc is sticky until the page is freed to the buddy. */ |
| 811 | } |
| 812 | |
| 813 | static int trylock_zspage(struct zspage *zspage) |
| 814 | { |
| 815 | struct zpdesc *cursor, *fail; |
| 816 | |
| 817 | for (cursor = get_first_zpdesc(zspage); cursor != NULL; cursor = |
| 818 | get_next_zpdesc(zpdesc: cursor)) { |
| 819 | if (!zpdesc_trylock(zpdesc: cursor)) { |
| 820 | fail = cursor; |
| 821 | goto unlock; |
| 822 | } |
| 823 | } |
| 824 | |
| 825 | return 1; |
| 826 | unlock: |
| 827 | for (cursor = get_first_zpdesc(zspage); cursor != fail; cursor = |
| 828 | get_next_zpdesc(zpdesc: cursor)) |
| 829 | zpdesc_unlock(zpdesc: cursor); |
| 830 | |
| 831 | return 0; |
| 832 | } |
| 833 | |
| 834 | static void __free_zspage(struct zs_pool *pool, struct size_class *class, |
| 835 | struct zspage *zspage) |
| 836 | { |
| 837 | struct zpdesc *zpdesc, *next; |
| 838 | |
| 839 | assert_spin_locked(&class->lock); |
| 840 | |
| 841 | VM_BUG_ON(get_zspage_inuse(zspage)); |
| 842 | VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0); |
| 843 | |
| 844 | next = zpdesc = get_first_zpdesc(zspage); |
| 845 | do { |
| 846 | VM_BUG_ON_PAGE(!zpdesc_is_locked(zpdesc), zpdesc_page(zpdesc)); |
| 847 | next = get_next_zpdesc(zpdesc); |
| 848 | reset_zpdesc(zpdesc); |
| 849 | zpdesc_unlock(zpdesc); |
| 850 | zpdesc_dec_zone_page_state(zpdesc); |
| 851 | zpdesc_put(zpdesc); |
| 852 | zpdesc = next; |
| 853 | } while (zpdesc != NULL); |
| 854 | |
| 855 | cache_free_zspage(pool, zspage); |
| 856 | |
| 857 | class_stat_sub(class, type: ZS_OBJS_ALLOCATED, cnt: class->objs_per_zspage); |
| 858 | atomic_long_sub(i: class->pages_per_zspage, v: &pool->pages_allocated); |
| 859 | } |
| 860 | |
| 861 | static void free_zspage(struct zs_pool *pool, struct size_class *class, |
| 862 | struct zspage *zspage) |
| 863 | { |
| 864 | VM_BUG_ON(get_zspage_inuse(zspage)); |
| 865 | VM_BUG_ON(list_empty(&zspage->list)); |
| 866 | |
| 867 | /* |
| 868 | * Since zs_free couldn't be sleepable, this function cannot call |
| 869 | * lock_page. The page locks trylock_zspage got will be released |
| 870 | * by __free_zspage. |
| 871 | */ |
| 872 | if (!trylock_zspage(zspage)) { |
| 873 | kick_deferred_free(pool); |
| 874 | return; |
| 875 | } |
| 876 | |
| 877 | remove_zspage(class, zspage); |
| 878 | __free_zspage(pool, class, zspage); |
| 879 | } |
| 880 | |
| 881 | /* Initialize a newly allocated zspage */ |
| 882 | static void init_zspage(struct size_class *class, struct zspage *zspage) |
| 883 | { |
| 884 | unsigned int freeobj = 1; |
| 885 | unsigned long off = 0; |
| 886 | struct zpdesc *zpdesc = get_first_zpdesc(zspage); |
| 887 | |
| 888 | while (zpdesc) { |
| 889 | struct zpdesc *next_zpdesc; |
| 890 | struct link_free *link; |
| 891 | void *vaddr; |
| 892 | |
| 893 | set_first_obj_offset(zpdesc, offset: off); |
| 894 | |
| 895 | vaddr = kmap_local_zpdesc(zpdesc); |
| 896 | link = (struct link_free *)vaddr + off / sizeof(*link); |
| 897 | |
| 898 | while ((off += class->size) < PAGE_SIZE) { |
| 899 | link->next = freeobj++ << OBJ_TAG_BITS; |
| 900 | link += class->size / sizeof(*link); |
| 901 | } |
| 902 | |
| 903 | /* |
| 904 | * We now come to the last (full or partial) object on this |
| 905 | * page, which must point to the first object on the next |
| 906 | * page (if present) |
| 907 | */ |
| 908 | next_zpdesc = get_next_zpdesc(zpdesc); |
| 909 | if (next_zpdesc) { |
| 910 | link->next = freeobj++ << OBJ_TAG_BITS; |
| 911 | } else { |
| 912 | /* |
| 913 | * Reset OBJ_TAG_BITS bit to last link to tell |
| 914 | * whether it's allocated object or not. |
| 915 | */ |
| 916 | link->next = -1UL << OBJ_TAG_BITS; |
| 917 | } |
| 918 | kunmap_local(vaddr); |
| 919 | zpdesc = next_zpdesc; |
| 920 | off %= PAGE_SIZE; |
| 921 | } |
| 922 | |
| 923 | set_freeobj(zspage, obj: 0); |
| 924 | } |
| 925 | |
| 926 | static void create_page_chain(struct size_class *class, struct zspage *zspage, |
| 927 | struct zpdesc *zpdescs[]) |
| 928 | { |
| 929 | int i; |
| 930 | struct zpdesc *zpdesc; |
| 931 | struct zpdesc *prev_zpdesc = NULL; |
| 932 | int nr_zpdescs = class->pages_per_zspage; |
| 933 | |
| 934 | /* |
| 935 | * Allocate individual pages and link them together as: |
| 936 | * 1. all pages are linked together using zpdesc->next |
| 937 | * 2. each sub-page point to zspage using zpdesc->zspage |
| 938 | * |
| 939 | * we set PG_private to identify the first zpdesc (i.e. no other zpdesc |
| 940 | * has this flag set). |
| 941 | */ |
| 942 | for (i = 0; i < nr_zpdescs; i++) { |
| 943 | zpdesc = zpdescs[i]; |
| 944 | zpdesc->zspage = zspage; |
| 945 | zpdesc->next = NULL; |
| 946 | if (i == 0) { |
| 947 | zspage->first_zpdesc = zpdesc; |
| 948 | zpdesc_set_first(zpdesc); |
| 949 | if (unlikely(class->objs_per_zspage == 1 && |
| 950 | class->pages_per_zspage == 1)) |
| 951 | SetZsHugePage(zspage); |
| 952 | } else { |
| 953 | prev_zpdesc->next = zpdesc; |
| 954 | } |
| 955 | prev_zpdesc = zpdesc; |
| 956 | } |
| 957 | } |
| 958 | |
| 959 | /* |
| 960 | * Allocate a zspage for the given size class |
| 961 | */ |
| 962 | static struct zspage *alloc_zspage(struct zs_pool *pool, |
| 963 | struct size_class *class, |
| 964 | gfp_t gfp, const int nid) |
| 965 | { |
| 966 | int i; |
| 967 | struct zpdesc *zpdescs[ZS_MAX_PAGES_PER_ZSPAGE]; |
| 968 | struct zspage *zspage = cache_alloc_zspage(pool, flags: gfp); |
| 969 | |
| 970 | if (!zspage) |
| 971 | return NULL; |
| 972 | |
| 973 | if (!IS_ENABLED(CONFIG_COMPACTION)) |
| 974 | gfp &= ~__GFP_MOVABLE; |
| 975 | |
| 976 | zspage->magic = ZSPAGE_MAGIC; |
| 977 | zspage->pool = pool; |
| 978 | zspage->class = class->index; |
| 979 | zspage_lock_init(zspage); |
| 980 | |
| 981 | for (i = 0; i < class->pages_per_zspage; i++) { |
| 982 | struct zpdesc *zpdesc; |
| 983 | |
| 984 | zpdesc = alloc_zpdesc(gfp, nid); |
| 985 | if (!zpdesc) { |
| 986 | while (--i >= 0) { |
| 987 | zpdesc_dec_zone_page_state(zpdesc: zpdescs[i]); |
| 988 | free_zpdesc(zpdesc: zpdescs[i]); |
| 989 | } |
| 990 | cache_free_zspage(pool, zspage); |
| 991 | return NULL; |
| 992 | } |
| 993 | __zpdesc_set_zsmalloc(zpdesc); |
| 994 | |
| 995 | zpdesc_inc_zone_page_state(zpdesc); |
| 996 | zpdescs[i] = zpdesc; |
| 997 | } |
| 998 | |
| 999 | create_page_chain(class, zspage, zpdescs); |
| 1000 | init_zspage(class, zspage); |
| 1001 | |
| 1002 | return zspage; |
| 1003 | } |
| 1004 | |
| 1005 | static struct zspage *find_get_zspage(struct size_class *class) |
| 1006 | { |
| 1007 | int i; |
| 1008 | struct zspage *zspage; |
| 1009 | |
| 1010 | for (i = ZS_INUSE_RATIO_99; i >= ZS_INUSE_RATIO_0; i--) { |
| 1011 | zspage = list_first_entry_or_null(&class->fullness_list[i], |
| 1012 | struct zspage, list); |
| 1013 | if (zspage) |
| 1014 | break; |
| 1015 | } |
| 1016 | |
| 1017 | return zspage; |
| 1018 | } |
| 1019 | |
| 1020 | static bool can_merge(struct size_class *prev, int pages_per_zspage, |
| 1021 | int objs_per_zspage) |
| 1022 | { |
| 1023 | if (prev->pages_per_zspage == pages_per_zspage && |
| 1024 | prev->objs_per_zspage == objs_per_zspage) |
| 1025 | return true; |
| 1026 | |
| 1027 | return false; |
| 1028 | } |
| 1029 | |
| 1030 | static bool zspage_full(struct size_class *class, struct zspage *zspage) |
| 1031 | { |
| 1032 | return get_zspage_inuse(zspage) == class->objs_per_zspage; |
| 1033 | } |
| 1034 | |
| 1035 | static bool zspage_empty(struct zspage *zspage) |
| 1036 | { |
| 1037 | return get_zspage_inuse(zspage) == 0; |
| 1038 | } |
| 1039 | |
| 1040 | /** |
| 1041 | * zs_lookup_class_index() - Returns index of the zsmalloc &size_class |
| 1042 | * that hold objects of the provided size. |
| 1043 | * @pool: zsmalloc pool to use |
| 1044 | * @size: object size |
| 1045 | * |
| 1046 | * Context: Any context. |
| 1047 | * |
| 1048 | * Return: the index of the zsmalloc &size_class that hold objects of the |
| 1049 | * provided size. |
| 1050 | */ |
| 1051 | unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size) |
| 1052 | { |
| 1053 | struct size_class *class; |
| 1054 | |
| 1055 | class = pool->size_class[get_size_class_index(size)]; |
| 1056 | |
| 1057 | return class->index; |
| 1058 | } |
| 1059 | EXPORT_SYMBOL_GPL(zs_lookup_class_index); |
| 1060 | |
| 1061 | unsigned long zs_get_total_pages(struct zs_pool *pool) |
| 1062 | { |
| 1063 | return atomic_long_read(v: &pool->pages_allocated); |
| 1064 | } |
| 1065 | EXPORT_SYMBOL_GPL(zs_get_total_pages); |
| 1066 | |
| 1067 | void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle, |
| 1068 | void *local_copy) |
| 1069 | { |
| 1070 | struct zspage *zspage; |
| 1071 | struct zpdesc *zpdesc; |
| 1072 | unsigned long obj, off; |
| 1073 | unsigned int obj_idx; |
| 1074 | struct size_class *class; |
| 1075 | void *addr; |
| 1076 | |
| 1077 | /* Guarantee we can get zspage from handle safely */ |
| 1078 | read_lock(&pool->lock); |
| 1079 | obj = handle_to_obj(handle); |
| 1080 | obj_to_location(obj, zpdesc: &zpdesc, obj_idx: &obj_idx); |
| 1081 | zspage = get_zspage(zpdesc); |
| 1082 | |
| 1083 | /* Make sure migration doesn't move any pages in this zspage */ |
| 1084 | zspage_read_lock(zspage); |
| 1085 | read_unlock(&pool->lock); |
| 1086 | |
| 1087 | class = zspage_class(pool, zspage); |
| 1088 | off = offset_in_page(class->size * obj_idx); |
| 1089 | |
| 1090 | if (off + class->size <= PAGE_SIZE) { |
| 1091 | /* this object is contained entirely within a page */ |
| 1092 | addr = kmap_local_zpdesc(zpdesc); |
| 1093 | addr += off; |
| 1094 | } else { |
| 1095 | size_t sizes[2]; |
| 1096 | |
| 1097 | /* this object spans two pages */ |
| 1098 | sizes[0] = PAGE_SIZE - off; |
| 1099 | sizes[1] = class->size - sizes[0]; |
| 1100 | addr = local_copy; |
| 1101 | |
| 1102 | memcpy_from_page(to: addr, zpdesc_page(zpdesc), |
| 1103 | offset: off, len: sizes[0]); |
| 1104 | zpdesc = get_next_zpdesc(zpdesc); |
| 1105 | memcpy_from_page(to: addr + sizes[0], |
| 1106 | zpdesc_page(zpdesc), |
| 1107 | offset: 0, len: sizes[1]); |
| 1108 | } |
| 1109 | |
| 1110 | if (!ZsHugePage(zspage)) |
| 1111 | addr += ZS_HANDLE_SIZE; |
| 1112 | |
| 1113 | return addr; |
| 1114 | } |
| 1115 | EXPORT_SYMBOL_GPL(zs_obj_read_begin); |
| 1116 | |
| 1117 | void zs_obj_read_end(struct zs_pool *pool, unsigned long handle, |
| 1118 | void *handle_mem) |
| 1119 | { |
| 1120 | struct zspage *zspage; |
| 1121 | struct zpdesc *zpdesc; |
| 1122 | unsigned long obj, off; |
| 1123 | unsigned int obj_idx; |
| 1124 | struct size_class *class; |
| 1125 | |
| 1126 | obj = handle_to_obj(handle); |
| 1127 | obj_to_location(obj, zpdesc: &zpdesc, obj_idx: &obj_idx); |
| 1128 | zspage = get_zspage(zpdesc); |
| 1129 | class = zspage_class(pool, zspage); |
| 1130 | off = offset_in_page(class->size * obj_idx); |
| 1131 | |
| 1132 | if (off + class->size <= PAGE_SIZE) { |
| 1133 | if (!ZsHugePage(zspage)) |
| 1134 | off += ZS_HANDLE_SIZE; |
| 1135 | handle_mem -= off; |
| 1136 | kunmap_local(handle_mem); |
| 1137 | } |
| 1138 | |
| 1139 | zspage_read_unlock(zspage); |
| 1140 | } |
| 1141 | EXPORT_SYMBOL_GPL(zs_obj_read_end); |
| 1142 | |
| 1143 | void zs_obj_write(struct zs_pool *pool, unsigned long handle, |
| 1144 | void *handle_mem, size_t mem_len) |
| 1145 | { |
| 1146 | struct zspage *zspage; |
| 1147 | struct zpdesc *zpdesc; |
| 1148 | unsigned long obj, off; |
| 1149 | unsigned int obj_idx; |
| 1150 | struct size_class *class; |
| 1151 | |
| 1152 | /* Guarantee we can get zspage from handle safely */ |
| 1153 | read_lock(&pool->lock); |
| 1154 | obj = handle_to_obj(handle); |
| 1155 | obj_to_location(obj, zpdesc: &zpdesc, obj_idx: &obj_idx); |
| 1156 | zspage = get_zspage(zpdesc); |
| 1157 | |
| 1158 | /* Make sure migration doesn't move any pages in this zspage */ |
| 1159 | zspage_read_lock(zspage); |
| 1160 | read_unlock(&pool->lock); |
| 1161 | |
| 1162 | class = zspage_class(pool, zspage); |
| 1163 | off = offset_in_page(class->size * obj_idx); |
| 1164 | |
| 1165 | if (!ZsHugePage(zspage)) |
| 1166 | off += ZS_HANDLE_SIZE; |
| 1167 | |
| 1168 | if (off + mem_len <= PAGE_SIZE) { |
| 1169 | /* this object is contained entirely within a page */ |
| 1170 | void *dst = kmap_local_zpdesc(zpdesc); |
| 1171 | |
| 1172 | memcpy(dst + off, handle_mem, mem_len); |
| 1173 | kunmap_local(dst); |
| 1174 | } else { |
| 1175 | /* this object spans two pages */ |
| 1176 | size_t sizes[2]; |
| 1177 | |
| 1178 | sizes[0] = PAGE_SIZE - off; |
| 1179 | sizes[1] = mem_len - sizes[0]; |
| 1180 | |
| 1181 | memcpy_to_page(zpdesc_page(zpdesc), offset: off, |
| 1182 | from: handle_mem, len: sizes[0]); |
| 1183 | zpdesc = get_next_zpdesc(zpdesc); |
| 1184 | memcpy_to_page(zpdesc_page(zpdesc), offset: 0, |
| 1185 | from: handle_mem + sizes[0], len: sizes[1]); |
| 1186 | } |
| 1187 | |
| 1188 | zspage_read_unlock(zspage); |
| 1189 | } |
| 1190 | EXPORT_SYMBOL_GPL(zs_obj_write); |
| 1191 | |
| 1192 | /** |
| 1193 | * zs_huge_class_size() - Returns the size (in bytes) of the first huge |
| 1194 | * zsmalloc &size_class. |
| 1195 | * @pool: zsmalloc pool to use |
| 1196 | * |
| 1197 | * The function returns the size of the first huge class - any object of equal |
| 1198 | * or bigger size will be stored in zspage consisting of a single physical |
| 1199 | * page. |
| 1200 | * |
| 1201 | * Context: Any context. |
| 1202 | * |
| 1203 | * Return: the size (in bytes) of the first huge zsmalloc &size_class. |
| 1204 | */ |
| 1205 | size_t zs_huge_class_size(struct zs_pool *pool) |
| 1206 | { |
| 1207 | return huge_class_size; |
| 1208 | } |
| 1209 | EXPORT_SYMBOL_GPL(zs_huge_class_size); |
| 1210 | |
| 1211 | static unsigned long obj_malloc(struct zs_pool *pool, |
| 1212 | struct zspage *zspage, unsigned long handle) |
| 1213 | { |
| 1214 | int i, nr_zpdesc, offset; |
| 1215 | unsigned long obj; |
| 1216 | struct link_free *link; |
| 1217 | struct size_class *class; |
| 1218 | |
| 1219 | struct zpdesc *m_zpdesc; |
| 1220 | unsigned long m_offset; |
| 1221 | void *vaddr; |
| 1222 | |
| 1223 | class = pool->size_class[zspage->class]; |
| 1224 | obj = get_freeobj(zspage); |
| 1225 | |
| 1226 | offset = obj * class->size; |
| 1227 | nr_zpdesc = offset >> PAGE_SHIFT; |
| 1228 | m_offset = offset_in_page(offset); |
| 1229 | m_zpdesc = get_first_zpdesc(zspage); |
| 1230 | |
| 1231 | for (i = 0; i < nr_zpdesc; i++) |
| 1232 | m_zpdesc = get_next_zpdesc(zpdesc: m_zpdesc); |
| 1233 | |
| 1234 | vaddr = kmap_local_zpdesc(zpdesc: m_zpdesc); |
| 1235 | link = (struct link_free *)vaddr + m_offset / sizeof(*link); |
| 1236 | set_freeobj(zspage, obj: link->next >> OBJ_TAG_BITS); |
| 1237 | if (likely(!ZsHugePage(zspage))) |
| 1238 | /* record handle in the header of allocated chunk */ |
| 1239 | link->handle = handle | OBJ_ALLOCATED_TAG; |
| 1240 | else |
| 1241 | zspage->first_zpdesc->handle = handle | OBJ_ALLOCATED_TAG; |
| 1242 | |
| 1243 | kunmap_local(vaddr); |
| 1244 | mod_zspage_inuse(zspage, val: 1); |
| 1245 | |
| 1246 | obj = location_to_obj(zpdesc: m_zpdesc, obj_idx: obj); |
| 1247 | record_obj(handle, obj); |
| 1248 | |
| 1249 | return obj; |
| 1250 | } |
| 1251 | |
| 1252 | |
| 1253 | /** |
| 1254 | * zs_malloc - Allocate block of given size from pool. |
| 1255 | * @pool: pool to allocate from |
| 1256 | * @size: size of block to allocate |
| 1257 | * @gfp: gfp flags when allocating object |
| 1258 | * @nid: The preferred node id to allocate new zspage (if needed) |
| 1259 | * |
| 1260 | * On success, handle to the allocated object is returned, |
| 1261 | * otherwise an ERR_PTR(). |
| 1262 | * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail. |
| 1263 | */ |
| 1264 | unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp, |
| 1265 | const int nid) |
| 1266 | { |
| 1267 | unsigned long handle; |
| 1268 | struct size_class *class; |
| 1269 | int newfg; |
| 1270 | struct zspage *zspage; |
| 1271 | |
| 1272 | if (unlikely(!size)) |
| 1273 | return (unsigned long)ERR_PTR(error: -EINVAL); |
| 1274 | |
| 1275 | if (unlikely(size > ZS_MAX_ALLOC_SIZE)) |
| 1276 | return (unsigned long)ERR_PTR(error: -ENOSPC); |
| 1277 | |
| 1278 | handle = cache_alloc_handle(pool, gfp); |
| 1279 | if (!handle) |
| 1280 | return (unsigned long)ERR_PTR(error: -ENOMEM); |
| 1281 | |
| 1282 | /* extra space in chunk to keep the handle */ |
| 1283 | size += ZS_HANDLE_SIZE; |
| 1284 | class = pool->size_class[get_size_class_index(size)]; |
| 1285 | |
| 1286 | /* class->lock effectively protects the zpage migration */ |
| 1287 | spin_lock(lock: &class->lock); |
| 1288 | zspage = find_get_zspage(class); |
| 1289 | if (likely(zspage)) { |
| 1290 | obj_malloc(pool, zspage, handle); |
| 1291 | /* Now move the zspage to another fullness group, if required */ |
| 1292 | fix_fullness_group(class, zspage); |
| 1293 | class_stat_add(class, type: ZS_OBJS_INUSE, cnt: 1); |
| 1294 | |
| 1295 | goto out; |
| 1296 | } |
| 1297 | |
| 1298 | spin_unlock(lock: &class->lock); |
| 1299 | |
| 1300 | zspage = alloc_zspage(pool, class, gfp, nid); |
| 1301 | if (!zspage) { |
| 1302 | cache_free_handle(pool, handle); |
| 1303 | return (unsigned long)ERR_PTR(error: -ENOMEM); |
| 1304 | } |
| 1305 | |
| 1306 | spin_lock(lock: &class->lock); |
| 1307 | obj_malloc(pool, zspage, handle); |
| 1308 | newfg = get_fullness_group(class, zspage); |
| 1309 | insert_zspage(class, zspage, fullness: newfg); |
| 1310 | atomic_long_add(i: class->pages_per_zspage, v: &pool->pages_allocated); |
| 1311 | class_stat_add(class, type: ZS_OBJS_ALLOCATED, cnt: class->objs_per_zspage); |
| 1312 | class_stat_add(class, type: ZS_OBJS_INUSE, cnt: 1); |
| 1313 | |
| 1314 | /* We completely set up zspage so mark them as movable */ |
| 1315 | SetZsPageMovable(pool, zspage); |
| 1316 | out: |
| 1317 | spin_unlock(lock: &class->lock); |
| 1318 | |
| 1319 | return handle; |
| 1320 | } |
| 1321 | EXPORT_SYMBOL_GPL(zs_malloc); |
| 1322 | |
| 1323 | static void obj_free(int class_size, unsigned long obj) |
| 1324 | { |
| 1325 | struct link_free *link; |
| 1326 | struct zspage *zspage; |
| 1327 | struct zpdesc *f_zpdesc; |
| 1328 | unsigned long f_offset; |
| 1329 | unsigned int f_objidx; |
| 1330 | void *vaddr; |
| 1331 | |
| 1332 | |
| 1333 | obj_to_location(obj, zpdesc: &f_zpdesc, obj_idx: &f_objidx); |
| 1334 | f_offset = offset_in_page(class_size * f_objidx); |
| 1335 | zspage = get_zspage(zpdesc: f_zpdesc); |
| 1336 | |
| 1337 | vaddr = kmap_local_zpdesc(zpdesc: f_zpdesc); |
| 1338 | link = (struct link_free *)(vaddr + f_offset); |
| 1339 | |
| 1340 | /* Insert this object in containing zspage's freelist */ |
| 1341 | if (likely(!ZsHugePage(zspage))) |
| 1342 | link->next = get_freeobj(zspage) << OBJ_TAG_BITS; |
| 1343 | else |
| 1344 | f_zpdesc->handle = 0; |
| 1345 | set_freeobj(zspage, obj: f_objidx); |
| 1346 | |
| 1347 | kunmap_local(vaddr); |
| 1348 | mod_zspage_inuse(zspage, val: -1); |
| 1349 | } |
| 1350 | |
| 1351 | void zs_free(struct zs_pool *pool, unsigned long handle) |
| 1352 | { |
| 1353 | struct zspage *zspage; |
| 1354 | struct zpdesc *f_zpdesc; |
| 1355 | unsigned long obj; |
| 1356 | struct size_class *class; |
| 1357 | int fullness; |
| 1358 | |
| 1359 | if (IS_ERR_OR_NULL(ptr: (void *)handle)) |
| 1360 | return; |
| 1361 | |
| 1362 | /* |
| 1363 | * The pool->lock protects the race with zpage's migration |
| 1364 | * so it's safe to get the page from handle. |
| 1365 | */ |
| 1366 | read_lock(&pool->lock); |
| 1367 | obj = handle_to_obj(handle); |
| 1368 | obj_to_zpdesc(obj, zpdesc: &f_zpdesc); |
| 1369 | zspage = get_zspage(zpdesc: f_zpdesc); |
| 1370 | class = zspage_class(pool, zspage); |
| 1371 | spin_lock(lock: &class->lock); |
| 1372 | read_unlock(&pool->lock); |
| 1373 | |
| 1374 | class_stat_sub(class, type: ZS_OBJS_INUSE, cnt: 1); |
| 1375 | obj_free(class_size: class->size, obj); |
| 1376 | |
| 1377 | fullness = fix_fullness_group(class, zspage); |
| 1378 | if (fullness == ZS_INUSE_RATIO_0) |
| 1379 | free_zspage(pool, class, zspage); |
| 1380 | |
| 1381 | spin_unlock(lock: &class->lock); |
| 1382 | cache_free_handle(pool, handle); |
| 1383 | } |
| 1384 | EXPORT_SYMBOL_GPL(zs_free); |
| 1385 | |
| 1386 | static void zs_object_copy(struct size_class *class, unsigned long dst, |
| 1387 | unsigned long src) |
| 1388 | { |
| 1389 | struct zpdesc *s_zpdesc, *d_zpdesc; |
| 1390 | unsigned int s_objidx, d_objidx; |
| 1391 | unsigned long s_off, d_off; |
| 1392 | void *s_addr, *d_addr; |
| 1393 | int s_size, d_size, size; |
| 1394 | int written = 0; |
| 1395 | |
| 1396 | s_size = d_size = class->size; |
| 1397 | |
| 1398 | obj_to_location(obj: src, zpdesc: &s_zpdesc, obj_idx: &s_objidx); |
| 1399 | obj_to_location(obj: dst, zpdesc: &d_zpdesc, obj_idx: &d_objidx); |
| 1400 | |
| 1401 | s_off = offset_in_page(class->size * s_objidx); |
| 1402 | d_off = offset_in_page(class->size * d_objidx); |
| 1403 | |
| 1404 | if (s_off + class->size > PAGE_SIZE) |
| 1405 | s_size = PAGE_SIZE - s_off; |
| 1406 | |
| 1407 | if (d_off + class->size > PAGE_SIZE) |
| 1408 | d_size = PAGE_SIZE - d_off; |
| 1409 | |
| 1410 | s_addr = kmap_local_zpdesc(zpdesc: s_zpdesc); |
| 1411 | d_addr = kmap_local_zpdesc(zpdesc: d_zpdesc); |
| 1412 | |
| 1413 | while (1) { |
| 1414 | size = min(s_size, d_size); |
| 1415 | memcpy(d_addr + d_off, s_addr + s_off, size); |
| 1416 | written += size; |
| 1417 | |
| 1418 | if (written == class->size) |
| 1419 | break; |
| 1420 | |
| 1421 | s_off += size; |
| 1422 | s_size -= size; |
| 1423 | d_off += size; |
| 1424 | d_size -= size; |
| 1425 | |
| 1426 | /* |
| 1427 | * Calling kunmap_local(d_addr) is necessary. kunmap_local() |
| 1428 | * calls must occurs in reverse order of calls to kmap_local_page(). |
| 1429 | * So, to call kunmap_local(s_addr) we should first call |
| 1430 | * kunmap_local(d_addr). For more details see |
| 1431 | * Documentation/mm/highmem.rst. |
| 1432 | */ |
| 1433 | if (s_off >= PAGE_SIZE) { |
| 1434 | kunmap_local(d_addr); |
| 1435 | kunmap_local(s_addr); |
| 1436 | s_zpdesc = get_next_zpdesc(zpdesc: s_zpdesc); |
| 1437 | s_addr = kmap_local_zpdesc(zpdesc: s_zpdesc); |
| 1438 | d_addr = kmap_local_zpdesc(zpdesc: d_zpdesc); |
| 1439 | s_size = class->size - written; |
| 1440 | s_off = 0; |
| 1441 | } |
| 1442 | |
| 1443 | if (d_off >= PAGE_SIZE) { |
| 1444 | kunmap_local(d_addr); |
| 1445 | d_zpdesc = get_next_zpdesc(zpdesc: d_zpdesc); |
| 1446 | d_addr = kmap_local_zpdesc(zpdesc: d_zpdesc); |
| 1447 | d_size = class->size - written; |
| 1448 | d_off = 0; |
| 1449 | } |
| 1450 | } |
| 1451 | |
| 1452 | kunmap_local(d_addr); |
| 1453 | kunmap_local(s_addr); |
| 1454 | } |
| 1455 | |
| 1456 | /* |
| 1457 | * Find alloced object in zspage from index object and |
| 1458 | * return handle. |
| 1459 | */ |
| 1460 | static unsigned long find_alloced_obj(struct size_class *class, |
| 1461 | struct zpdesc *zpdesc, int *obj_idx) |
| 1462 | { |
| 1463 | unsigned int offset; |
| 1464 | int index = *obj_idx; |
| 1465 | unsigned long handle = 0; |
| 1466 | void *addr = kmap_local_zpdesc(zpdesc); |
| 1467 | |
| 1468 | offset = get_first_obj_offset(zpdesc); |
| 1469 | offset += class->size * index; |
| 1470 | |
| 1471 | while (offset < PAGE_SIZE) { |
| 1472 | if (obj_allocated(zpdesc, obj: addr + offset, phandle: &handle)) |
| 1473 | break; |
| 1474 | |
| 1475 | offset += class->size; |
| 1476 | index++; |
| 1477 | } |
| 1478 | |
| 1479 | kunmap_local(addr); |
| 1480 | |
| 1481 | *obj_idx = index; |
| 1482 | |
| 1483 | return handle; |
| 1484 | } |
| 1485 | |
| 1486 | static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage, |
| 1487 | struct zspage *dst_zspage) |
| 1488 | { |
| 1489 | unsigned long used_obj, free_obj; |
| 1490 | unsigned long handle; |
| 1491 | int obj_idx = 0; |
| 1492 | struct zpdesc *s_zpdesc = get_first_zpdesc(zspage: src_zspage); |
| 1493 | struct size_class *class = pool->size_class[src_zspage->class]; |
| 1494 | |
| 1495 | while (1) { |
| 1496 | handle = find_alloced_obj(class, zpdesc: s_zpdesc, obj_idx: &obj_idx); |
| 1497 | if (!handle) { |
| 1498 | s_zpdesc = get_next_zpdesc(zpdesc: s_zpdesc); |
| 1499 | if (!s_zpdesc) |
| 1500 | break; |
| 1501 | obj_idx = 0; |
| 1502 | continue; |
| 1503 | } |
| 1504 | |
| 1505 | used_obj = handle_to_obj(handle); |
| 1506 | free_obj = obj_malloc(pool, zspage: dst_zspage, handle); |
| 1507 | zs_object_copy(class, dst: free_obj, src: used_obj); |
| 1508 | obj_idx++; |
| 1509 | obj_free(class_size: class->size, obj: used_obj); |
| 1510 | |
| 1511 | /* Stop if there is no more space */ |
| 1512 | if (zspage_full(class, zspage: dst_zspage)) |
| 1513 | break; |
| 1514 | |
| 1515 | /* Stop if there are no more objects to migrate */ |
| 1516 | if (zspage_empty(zspage: src_zspage)) |
| 1517 | break; |
| 1518 | } |
| 1519 | } |
| 1520 | |
| 1521 | static struct zspage *isolate_src_zspage(struct size_class *class) |
| 1522 | { |
| 1523 | struct zspage *zspage; |
| 1524 | int fg; |
| 1525 | |
| 1526 | for (fg = ZS_INUSE_RATIO_10; fg <= ZS_INUSE_RATIO_99; fg++) { |
| 1527 | zspage = list_first_entry_or_null(&class->fullness_list[fg], |
| 1528 | struct zspage, list); |
| 1529 | if (zspage) { |
| 1530 | remove_zspage(class, zspage); |
| 1531 | return zspage; |
| 1532 | } |
| 1533 | } |
| 1534 | |
| 1535 | return zspage; |
| 1536 | } |
| 1537 | |
| 1538 | static struct zspage *isolate_dst_zspage(struct size_class *class) |
| 1539 | { |
| 1540 | struct zspage *zspage; |
| 1541 | int fg; |
| 1542 | |
| 1543 | for (fg = ZS_INUSE_RATIO_99; fg >= ZS_INUSE_RATIO_10; fg--) { |
| 1544 | zspage = list_first_entry_or_null(&class->fullness_list[fg], |
| 1545 | struct zspage, list); |
| 1546 | if (zspage) { |
| 1547 | remove_zspage(class, zspage); |
| 1548 | return zspage; |
| 1549 | } |
| 1550 | } |
| 1551 | |
| 1552 | return zspage; |
| 1553 | } |
| 1554 | |
| 1555 | /* |
| 1556 | * putback_zspage - add @zspage into right class's fullness list |
| 1557 | * @class: destination class |
| 1558 | * @zspage: target page |
| 1559 | * |
| 1560 | * Return @zspage's fullness status |
| 1561 | */ |
| 1562 | static int putback_zspage(struct size_class *class, struct zspage *zspage) |
| 1563 | { |
| 1564 | int fullness; |
| 1565 | |
| 1566 | fullness = get_fullness_group(class, zspage); |
| 1567 | insert_zspage(class, zspage, fullness); |
| 1568 | |
| 1569 | return fullness; |
| 1570 | } |
| 1571 | |
| 1572 | #ifdef CONFIG_COMPACTION |
| 1573 | /* |
| 1574 | * To prevent zspage destroy during migration, zspage freeing should |
| 1575 | * hold locks of all pages in the zspage. |
| 1576 | */ |
| 1577 | static void lock_zspage(struct zspage *zspage) |
| 1578 | { |
| 1579 | struct zpdesc *curr_zpdesc, *zpdesc; |
| 1580 | |
| 1581 | /* |
| 1582 | * Pages we haven't locked yet can be migrated off the list while we're |
| 1583 | * trying to lock them, so we need to be careful and only attempt to |
| 1584 | * lock each page under zspage_read_lock(). Otherwise, the page we lock |
| 1585 | * may no longer belong to the zspage. This means that we may wait for |
| 1586 | * the wrong page to unlock, so we must take a reference to the page |
| 1587 | * prior to waiting for it to unlock outside zspage_read_lock(). |
| 1588 | */ |
| 1589 | while (1) { |
| 1590 | zspage_read_lock(zspage); |
| 1591 | zpdesc = get_first_zpdesc(zspage); |
| 1592 | if (zpdesc_trylock(zpdesc)) |
| 1593 | break; |
| 1594 | zpdesc_get(zpdesc); |
| 1595 | zspage_read_unlock(zspage); |
| 1596 | zpdesc_wait_locked(zpdesc); |
| 1597 | zpdesc_put(zpdesc); |
| 1598 | } |
| 1599 | |
| 1600 | curr_zpdesc = zpdesc; |
| 1601 | while ((zpdesc = get_next_zpdesc(zpdesc: curr_zpdesc))) { |
| 1602 | if (zpdesc_trylock(zpdesc)) { |
| 1603 | curr_zpdesc = zpdesc; |
| 1604 | } else { |
| 1605 | zpdesc_get(zpdesc); |
| 1606 | zspage_read_unlock(zspage); |
| 1607 | zpdesc_wait_locked(zpdesc); |
| 1608 | zpdesc_put(zpdesc); |
| 1609 | zspage_read_lock(zspage); |
| 1610 | } |
| 1611 | } |
| 1612 | zspage_read_unlock(zspage); |
| 1613 | } |
| 1614 | #endif /* CONFIG_COMPACTION */ |
| 1615 | |
| 1616 | #ifdef CONFIG_COMPACTION |
| 1617 | |
| 1618 | static void replace_sub_page(struct size_class *class, struct zspage *zspage, |
| 1619 | struct zpdesc *newzpdesc, struct zpdesc *oldzpdesc) |
| 1620 | { |
| 1621 | struct zpdesc *zpdesc; |
| 1622 | struct zpdesc *zpdescs[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, }; |
| 1623 | unsigned int first_obj_offset; |
| 1624 | int idx = 0; |
| 1625 | |
| 1626 | zpdesc = get_first_zpdesc(zspage); |
| 1627 | do { |
| 1628 | if (zpdesc == oldzpdesc) |
| 1629 | zpdescs[idx] = newzpdesc; |
| 1630 | else |
| 1631 | zpdescs[idx] = zpdesc; |
| 1632 | idx++; |
| 1633 | } while ((zpdesc = get_next_zpdesc(zpdesc)) != NULL); |
| 1634 | |
| 1635 | create_page_chain(class, zspage, zpdescs); |
| 1636 | first_obj_offset = get_first_obj_offset(zpdesc: oldzpdesc); |
| 1637 | set_first_obj_offset(zpdesc: newzpdesc, offset: first_obj_offset); |
| 1638 | if (unlikely(ZsHugePage(zspage))) |
| 1639 | newzpdesc->handle = oldzpdesc->handle; |
| 1640 | __zpdesc_set_movable(zpdesc: newzpdesc); |
| 1641 | } |
| 1642 | |
| 1643 | static bool zs_page_isolate(struct page *page, isolate_mode_t mode) |
| 1644 | { |
| 1645 | /* |
| 1646 | * Page is locked so zspage can't be destroyed concurrently |
| 1647 | * (see free_zspage()). But if the page was already destroyed |
| 1648 | * (see reset_zpdesc()), refuse isolation here. |
| 1649 | */ |
| 1650 | return page_zpdesc(page)->zspage; |
| 1651 | } |
| 1652 | |
| 1653 | static int zs_page_migrate(struct page *newpage, struct page *page, |
| 1654 | enum migrate_mode mode) |
| 1655 | { |
| 1656 | struct zs_pool *pool; |
| 1657 | struct size_class *class; |
| 1658 | struct zspage *zspage; |
| 1659 | struct zpdesc *dummy; |
| 1660 | struct zpdesc *newzpdesc = page_zpdesc(newpage); |
| 1661 | struct zpdesc *zpdesc = page_zpdesc(page); |
| 1662 | void *s_addr, *d_addr, *addr; |
| 1663 | unsigned int offset; |
| 1664 | unsigned long handle; |
| 1665 | unsigned long old_obj, new_obj; |
| 1666 | unsigned int obj_idx; |
| 1667 | |
| 1668 | /* |
| 1669 | * TODO: nothing prevents a zspage from getting destroyed while |
| 1670 | * it is isolated for migration, as the page lock is temporarily |
| 1671 | * dropped after zs_page_isolate() succeeded: we should rework that |
| 1672 | * and defer destroying such pages once they are un-isolated (putback) |
| 1673 | * instead. |
| 1674 | */ |
| 1675 | if (!zpdesc->zspage) |
| 1676 | return 0; |
| 1677 | |
| 1678 | /* The page is locked, so this pointer must remain valid */ |
| 1679 | zspage = get_zspage(zpdesc); |
| 1680 | pool = zspage->pool; |
| 1681 | |
| 1682 | /* |
| 1683 | * The pool migrate_lock protects the race between zpage migration |
| 1684 | * and zs_free. |
| 1685 | */ |
| 1686 | write_lock(&pool->lock); |
| 1687 | class = zspage_class(pool, zspage); |
| 1688 | |
| 1689 | /* |
| 1690 | * the class lock protects zpage alloc/free in the zspage. |
| 1691 | */ |
| 1692 | spin_lock(lock: &class->lock); |
| 1693 | /* the zspage write_lock protects zpage access via zs_obj_read/write() */ |
| 1694 | if (!zspage_write_trylock(zspage)) { |
| 1695 | spin_unlock(lock: &class->lock); |
| 1696 | write_unlock(&pool->lock); |
| 1697 | return -EINVAL; |
| 1698 | } |
| 1699 | |
| 1700 | /* We're committed, tell the world that this is a Zsmalloc page. */ |
| 1701 | __zpdesc_set_zsmalloc(zpdesc: newzpdesc); |
| 1702 | |
| 1703 | offset = get_first_obj_offset(zpdesc); |
| 1704 | s_addr = kmap_local_zpdesc(zpdesc); |
| 1705 | |
| 1706 | /* |
| 1707 | * Here, any user cannot access all objects in the zspage so let's move. |
| 1708 | */ |
| 1709 | d_addr = kmap_local_zpdesc(zpdesc: newzpdesc); |
| 1710 | copy_page(to: d_addr, from: s_addr); |
| 1711 | kunmap_local(d_addr); |
| 1712 | |
| 1713 | for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE; |
| 1714 | addr += class->size) { |
| 1715 | if (obj_allocated(zpdesc, obj: addr, phandle: &handle)) { |
| 1716 | |
| 1717 | old_obj = handle_to_obj(handle); |
| 1718 | obj_to_location(obj: old_obj, zpdesc: &dummy, obj_idx: &obj_idx); |
| 1719 | new_obj = (unsigned long)location_to_obj(zpdesc: newzpdesc, obj_idx); |
| 1720 | record_obj(handle, obj: new_obj); |
| 1721 | } |
| 1722 | } |
| 1723 | kunmap_local(s_addr); |
| 1724 | |
| 1725 | replace_sub_page(class, zspage, newzpdesc, oldzpdesc: zpdesc); |
| 1726 | /* |
| 1727 | * Since we complete the data copy and set up new zspage structure, |
| 1728 | * it's okay to release migration_lock. |
| 1729 | */ |
| 1730 | write_unlock(&pool->lock); |
| 1731 | spin_unlock(lock: &class->lock); |
| 1732 | zspage_write_unlock(zspage); |
| 1733 | |
| 1734 | zpdesc_get(zpdesc: newzpdesc); |
| 1735 | if (zpdesc_zone(zpdesc: newzpdesc) != zpdesc_zone(zpdesc)) { |
| 1736 | zpdesc_dec_zone_page_state(zpdesc); |
| 1737 | zpdesc_inc_zone_page_state(zpdesc: newzpdesc); |
| 1738 | } |
| 1739 | |
| 1740 | reset_zpdesc(zpdesc); |
| 1741 | zpdesc_put(zpdesc); |
| 1742 | |
| 1743 | return 0; |
| 1744 | } |
| 1745 | |
| 1746 | static void zs_page_putback(struct page *page) |
| 1747 | { |
| 1748 | } |
| 1749 | |
| 1750 | const struct movable_operations zsmalloc_mops = { |
| 1751 | .isolate_page = zs_page_isolate, |
| 1752 | .migrate_page = zs_page_migrate, |
| 1753 | .putback_page = zs_page_putback, |
| 1754 | }; |
| 1755 | |
| 1756 | /* |
| 1757 | * Caller should hold page_lock of all pages in the zspage |
| 1758 | * In here, we cannot use zspage meta data. |
| 1759 | */ |
| 1760 | static void async_free_zspage(struct work_struct *work) |
| 1761 | { |
| 1762 | int i; |
| 1763 | struct size_class *class; |
| 1764 | struct zspage *zspage, *tmp; |
| 1765 | LIST_HEAD(free_pages); |
| 1766 | struct zs_pool *pool = container_of(work, struct zs_pool, |
| 1767 | free_work); |
| 1768 | |
| 1769 | for (i = 0; i < ZS_SIZE_CLASSES; i++) { |
| 1770 | class = pool->size_class[i]; |
| 1771 | if (class->index != i) |
| 1772 | continue; |
| 1773 | |
| 1774 | spin_lock(lock: &class->lock); |
| 1775 | list_splice_init(list: &class->fullness_list[ZS_INUSE_RATIO_0], |
| 1776 | head: &free_pages); |
| 1777 | spin_unlock(lock: &class->lock); |
| 1778 | } |
| 1779 | |
| 1780 | list_for_each_entry_safe(zspage, tmp, &free_pages, list) { |
| 1781 | list_del(entry: &zspage->list); |
| 1782 | lock_zspage(zspage); |
| 1783 | |
| 1784 | class = zspage_class(pool, zspage); |
| 1785 | spin_lock(lock: &class->lock); |
| 1786 | class_stat_sub(class, type: ZS_INUSE_RATIO_0, cnt: 1); |
| 1787 | __free_zspage(pool, class, zspage); |
| 1788 | spin_unlock(lock: &class->lock); |
| 1789 | } |
| 1790 | }; |
| 1791 | |
| 1792 | static void kick_deferred_free(struct zs_pool *pool) |
| 1793 | { |
| 1794 | schedule_work(work: &pool->free_work); |
| 1795 | } |
| 1796 | |
| 1797 | static void zs_flush_migration(struct zs_pool *pool) |
| 1798 | { |
| 1799 | flush_work(work: &pool->free_work); |
| 1800 | } |
| 1801 | |
| 1802 | static void init_deferred_free(struct zs_pool *pool) |
| 1803 | { |
| 1804 | INIT_WORK(&pool->free_work, async_free_zspage); |
| 1805 | } |
| 1806 | |
| 1807 | static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) |
| 1808 | { |
| 1809 | struct zpdesc *zpdesc = get_first_zpdesc(zspage); |
| 1810 | |
| 1811 | do { |
| 1812 | WARN_ON(!zpdesc_trylock(zpdesc)); |
| 1813 | __zpdesc_set_movable(zpdesc); |
| 1814 | zpdesc_unlock(zpdesc); |
| 1815 | } while ((zpdesc = get_next_zpdesc(zpdesc)) != NULL); |
| 1816 | } |
| 1817 | #else |
| 1818 | static inline void zs_flush_migration(struct zs_pool *pool) { } |
| 1819 | #endif |
| 1820 | |
| 1821 | /* |
| 1822 | * |
| 1823 | * Based on the number of unused allocated objects calculate |
| 1824 | * and return the number of pages that we can free. |
| 1825 | */ |
| 1826 | static unsigned long zs_can_compact(struct size_class *class) |
| 1827 | { |
| 1828 | unsigned long obj_wasted; |
| 1829 | unsigned long obj_allocated = class_stat_read(class, type: ZS_OBJS_ALLOCATED); |
| 1830 | unsigned long obj_used = class_stat_read(class, type: ZS_OBJS_INUSE); |
| 1831 | |
| 1832 | if (obj_allocated <= obj_used) |
| 1833 | return 0; |
| 1834 | |
| 1835 | obj_wasted = obj_allocated - obj_used; |
| 1836 | obj_wasted /= class->objs_per_zspage; |
| 1837 | |
| 1838 | return obj_wasted * class->pages_per_zspage; |
| 1839 | } |
| 1840 | |
| 1841 | static unsigned long __zs_compact(struct zs_pool *pool, |
| 1842 | struct size_class *class) |
| 1843 | { |
| 1844 | struct zspage *src_zspage = NULL; |
| 1845 | struct zspage *dst_zspage = NULL; |
| 1846 | unsigned long pages_freed = 0; |
| 1847 | |
| 1848 | /* |
| 1849 | * protect the race between zpage migration and zs_free |
| 1850 | * as well as zpage allocation/free |
| 1851 | */ |
| 1852 | write_lock(&pool->lock); |
| 1853 | spin_lock(lock: &class->lock); |
| 1854 | while (zs_can_compact(class)) { |
| 1855 | int fg; |
| 1856 | |
| 1857 | if (!dst_zspage) { |
| 1858 | dst_zspage = isolate_dst_zspage(class); |
| 1859 | if (!dst_zspage) |
| 1860 | break; |
| 1861 | } |
| 1862 | |
| 1863 | src_zspage = isolate_src_zspage(class); |
| 1864 | if (!src_zspage) |
| 1865 | break; |
| 1866 | |
| 1867 | if (!zspage_write_trylock(zspage: src_zspage)) |
| 1868 | break; |
| 1869 | |
| 1870 | migrate_zspage(pool, src_zspage, dst_zspage); |
| 1871 | zspage_write_unlock(zspage: src_zspage); |
| 1872 | |
| 1873 | fg = putback_zspage(class, zspage: src_zspage); |
| 1874 | if (fg == ZS_INUSE_RATIO_0) { |
| 1875 | free_zspage(pool, class, zspage: src_zspage); |
| 1876 | pages_freed += class->pages_per_zspage; |
| 1877 | } |
| 1878 | src_zspage = NULL; |
| 1879 | |
| 1880 | if (get_fullness_group(class, zspage: dst_zspage) == ZS_INUSE_RATIO_100 |
| 1881 | || rwlock_is_contended(&pool->lock)) { |
| 1882 | putback_zspage(class, zspage: dst_zspage); |
| 1883 | dst_zspage = NULL; |
| 1884 | |
| 1885 | spin_unlock(lock: &class->lock); |
| 1886 | write_unlock(&pool->lock); |
| 1887 | cond_resched(); |
| 1888 | write_lock(&pool->lock); |
| 1889 | spin_lock(lock: &class->lock); |
| 1890 | } |
| 1891 | } |
| 1892 | |
| 1893 | if (src_zspage) |
| 1894 | putback_zspage(class, zspage: src_zspage); |
| 1895 | |
| 1896 | if (dst_zspage) |
| 1897 | putback_zspage(class, zspage: dst_zspage); |
| 1898 | |
| 1899 | spin_unlock(lock: &class->lock); |
| 1900 | write_unlock(&pool->lock); |
| 1901 | |
| 1902 | return pages_freed; |
| 1903 | } |
| 1904 | |
| 1905 | unsigned long zs_compact(struct zs_pool *pool) |
| 1906 | { |
| 1907 | int i; |
| 1908 | struct size_class *class; |
| 1909 | unsigned long pages_freed = 0; |
| 1910 | |
| 1911 | /* |
| 1912 | * Pool compaction is performed under pool->lock so it is basically |
| 1913 | * single-threaded. Having more than one thread in __zs_compact() |
| 1914 | * will increase pool->lock contention, which will impact other |
| 1915 | * zsmalloc operations that need pool->lock. |
| 1916 | */ |
| 1917 | if (atomic_xchg(v: &pool->compaction_in_progress, new: 1)) |
| 1918 | return 0; |
| 1919 | |
| 1920 | for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { |
| 1921 | class = pool->size_class[i]; |
| 1922 | if (class->index != i) |
| 1923 | continue; |
| 1924 | pages_freed += __zs_compact(pool, class); |
| 1925 | } |
| 1926 | atomic_long_add(i: pages_freed, v: &pool->stats.pages_compacted); |
| 1927 | atomic_set(v: &pool->compaction_in_progress, i: 0); |
| 1928 | |
| 1929 | return pages_freed; |
| 1930 | } |
| 1931 | EXPORT_SYMBOL_GPL(zs_compact); |
| 1932 | |
| 1933 | void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats) |
| 1934 | { |
| 1935 | memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats)); |
| 1936 | } |
| 1937 | EXPORT_SYMBOL_GPL(zs_pool_stats); |
| 1938 | |
| 1939 | static unsigned long zs_shrinker_scan(struct shrinker *shrinker, |
| 1940 | struct shrink_control *sc) |
| 1941 | { |
| 1942 | unsigned long pages_freed; |
| 1943 | struct zs_pool *pool = shrinker->private_data; |
| 1944 | |
| 1945 | /* |
| 1946 | * Compact classes and calculate compaction delta. |
| 1947 | * Can run concurrently with a manually triggered |
| 1948 | * (by user) compaction. |
| 1949 | */ |
| 1950 | pages_freed = zs_compact(pool); |
| 1951 | |
| 1952 | return pages_freed ? pages_freed : SHRINK_STOP; |
| 1953 | } |
| 1954 | |
| 1955 | static unsigned long zs_shrinker_count(struct shrinker *shrinker, |
| 1956 | struct shrink_control *sc) |
| 1957 | { |
| 1958 | int i; |
| 1959 | struct size_class *class; |
| 1960 | unsigned long pages_to_free = 0; |
| 1961 | struct zs_pool *pool = shrinker->private_data; |
| 1962 | |
| 1963 | for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { |
| 1964 | class = pool->size_class[i]; |
| 1965 | if (class->index != i) |
| 1966 | continue; |
| 1967 | |
| 1968 | pages_to_free += zs_can_compact(class); |
| 1969 | } |
| 1970 | |
| 1971 | return pages_to_free; |
| 1972 | } |
| 1973 | |
| 1974 | static void zs_unregister_shrinker(struct zs_pool *pool) |
| 1975 | { |
| 1976 | shrinker_free(shrinker: pool->shrinker); |
| 1977 | } |
| 1978 | |
| 1979 | static int zs_register_shrinker(struct zs_pool *pool) |
| 1980 | { |
| 1981 | pool->shrinker = shrinker_alloc(flags: 0, fmt: "mm-zspool:%s" , pool->name); |
| 1982 | if (!pool->shrinker) |
| 1983 | return -ENOMEM; |
| 1984 | |
| 1985 | pool->shrinker->scan_objects = zs_shrinker_scan; |
| 1986 | pool->shrinker->count_objects = zs_shrinker_count; |
| 1987 | pool->shrinker->batch = 0; |
| 1988 | pool->shrinker->private_data = pool; |
| 1989 | |
| 1990 | shrinker_register(shrinker: pool->shrinker); |
| 1991 | |
| 1992 | return 0; |
| 1993 | } |
| 1994 | |
| 1995 | static int calculate_zspage_chain_size(int class_size) |
| 1996 | { |
| 1997 | int i, min_waste = INT_MAX; |
| 1998 | int chain_size = 1; |
| 1999 | |
| 2000 | if (is_power_of_2(n: class_size)) |
| 2001 | return chain_size; |
| 2002 | |
| 2003 | for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) { |
| 2004 | int waste; |
| 2005 | |
| 2006 | waste = (i * PAGE_SIZE) % class_size; |
| 2007 | if (waste < min_waste) { |
| 2008 | min_waste = waste; |
| 2009 | chain_size = i; |
| 2010 | } |
| 2011 | } |
| 2012 | |
| 2013 | return chain_size; |
| 2014 | } |
| 2015 | |
| 2016 | /** |
| 2017 | * zs_create_pool - Creates an allocation pool to work from. |
| 2018 | * @name: pool name to be created |
| 2019 | * |
| 2020 | * This function must be called before anything when using |
| 2021 | * the zsmalloc allocator. |
| 2022 | * |
| 2023 | * On success, a pointer to the newly created pool is returned, |
| 2024 | * otherwise NULL. |
| 2025 | */ |
| 2026 | struct zs_pool *zs_create_pool(const char *name) |
| 2027 | { |
| 2028 | int i; |
| 2029 | struct zs_pool *pool; |
| 2030 | struct size_class *prev_class = NULL; |
| 2031 | |
| 2032 | pool = kzalloc(sizeof(*pool), GFP_KERNEL); |
| 2033 | if (!pool) |
| 2034 | return NULL; |
| 2035 | |
| 2036 | init_deferred_free(pool); |
| 2037 | rwlock_init(&pool->lock); |
| 2038 | atomic_set(v: &pool->compaction_in_progress, i: 0); |
| 2039 | |
| 2040 | pool->name = kstrdup(s: name, GFP_KERNEL); |
| 2041 | if (!pool->name) |
| 2042 | goto err; |
| 2043 | |
| 2044 | if (create_cache(pool)) |
| 2045 | goto err; |
| 2046 | |
| 2047 | /* |
| 2048 | * Iterate reversely, because, size of size_class that we want to use |
| 2049 | * for merging should be larger or equal to current size. |
| 2050 | */ |
| 2051 | for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { |
| 2052 | int size; |
| 2053 | int pages_per_zspage; |
| 2054 | int objs_per_zspage; |
| 2055 | struct size_class *class; |
| 2056 | int fullness; |
| 2057 | |
| 2058 | size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA; |
| 2059 | if (size > ZS_MAX_ALLOC_SIZE) |
| 2060 | size = ZS_MAX_ALLOC_SIZE; |
| 2061 | pages_per_zspage = calculate_zspage_chain_size(class_size: size); |
| 2062 | objs_per_zspage = pages_per_zspage * PAGE_SIZE / size; |
| 2063 | |
| 2064 | /* |
| 2065 | * We iterate from biggest down to smallest classes, |
| 2066 | * so huge_class_size holds the size of the first huge |
| 2067 | * class. Any object bigger than or equal to that will |
| 2068 | * endup in the huge class. |
| 2069 | */ |
| 2070 | if (pages_per_zspage != 1 && objs_per_zspage != 1 && |
| 2071 | !huge_class_size) { |
| 2072 | huge_class_size = size; |
| 2073 | /* |
| 2074 | * The object uses ZS_HANDLE_SIZE bytes to store the |
| 2075 | * handle. We need to subtract it, because zs_malloc() |
| 2076 | * unconditionally adds handle size before it performs |
| 2077 | * size class search - so object may be smaller than |
| 2078 | * huge class size, yet it still can end up in the huge |
| 2079 | * class because it grows by ZS_HANDLE_SIZE extra bytes |
| 2080 | * right before class lookup. |
| 2081 | */ |
| 2082 | huge_class_size -= (ZS_HANDLE_SIZE - 1); |
| 2083 | } |
| 2084 | |
| 2085 | /* |
| 2086 | * size_class is used for normal zsmalloc operation such |
| 2087 | * as alloc/free for that size. Although it is natural that we |
| 2088 | * have one size_class for each size, there is a chance that we |
| 2089 | * can get more memory utilization if we use one size_class for |
| 2090 | * many different sizes whose size_class have same |
| 2091 | * characteristics. So, we makes size_class point to |
| 2092 | * previous size_class if possible. |
| 2093 | */ |
| 2094 | if (prev_class) { |
| 2095 | if (can_merge(prev: prev_class, pages_per_zspage, objs_per_zspage)) { |
| 2096 | pool->size_class[i] = prev_class; |
| 2097 | continue; |
| 2098 | } |
| 2099 | } |
| 2100 | |
| 2101 | class = kzalloc(sizeof(struct size_class), GFP_KERNEL); |
| 2102 | if (!class) |
| 2103 | goto err; |
| 2104 | |
| 2105 | class->size = size; |
| 2106 | class->index = i; |
| 2107 | class->pages_per_zspage = pages_per_zspage; |
| 2108 | class->objs_per_zspage = objs_per_zspage; |
| 2109 | spin_lock_init(&class->lock); |
| 2110 | pool->size_class[i] = class; |
| 2111 | |
| 2112 | fullness = ZS_INUSE_RATIO_0; |
| 2113 | while (fullness < NR_FULLNESS_GROUPS) { |
| 2114 | INIT_LIST_HEAD(list: &class->fullness_list[fullness]); |
| 2115 | fullness++; |
| 2116 | } |
| 2117 | |
| 2118 | prev_class = class; |
| 2119 | } |
| 2120 | |
| 2121 | /* debug only, don't abort if it fails */ |
| 2122 | zs_pool_stat_create(pool, name); |
| 2123 | |
| 2124 | /* |
| 2125 | * Not critical since shrinker is only used to trigger internal |
| 2126 | * defragmentation of the pool which is pretty optional thing. If |
| 2127 | * registration fails we still can use the pool normally and user can |
| 2128 | * trigger compaction manually. Thus, ignore return code. |
| 2129 | */ |
| 2130 | zs_register_shrinker(pool); |
| 2131 | |
| 2132 | return pool; |
| 2133 | |
| 2134 | err: |
| 2135 | zs_destroy_pool(pool); |
| 2136 | return NULL; |
| 2137 | } |
| 2138 | EXPORT_SYMBOL_GPL(zs_create_pool); |
| 2139 | |
| 2140 | void zs_destroy_pool(struct zs_pool *pool) |
| 2141 | { |
| 2142 | int i; |
| 2143 | |
| 2144 | zs_unregister_shrinker(pool); |
| 2145 | zs_flush_migration(pool); |
| 2146 | zs_pool_stat_destroy(pool); |
| 2147 | |
| 2148 | for (i = 0; i < ZS_SIZE_CLASSES; i++) { |
| 2149 | int fg; |
| 2150 | struct size_class *class = pool->size_class[i]; |
| 2151 | |
| 2152 | if (!class) |
| 2153 | continue; |
| 2154 | |
| 2155 | if (class->index != i) |
| 2156 | continue; |
| 2157 | |
| 2158 | for (fg = ZS_INUSE_RATIO_0; fg < NR_FULLNESS_GROUPS; fg++) { |
| 2159 | if (list_empty(head: &class->fullness_list[fg])) |
| 2160 | continue; |
| 2161 | |
| 2162 | pr_err("Class-%d fullness group %d is not empty\n" , |
| 2163 | class->size, fg); |
| 2164 | } |
| 2165 | kfree(objp: class); |
| 2166 | } |
| 2167 | |
| 2168 | destroy_cache(pool); |
| 2169 | kfree(objp: pool->name); |
| 2170 | kfree(objp: pool); |
| 2171 | } |
| 2172 | EXPORT_SYMBOL_GPL(zs_destroy_pool); |
| 2173 | |
| 2174 | static int __init zs_init(void) |
| 2175 | { |
| 2176 | int rc __maybe_unused; |
| 2177 | |
| 2178 | #ifdef CONFIG_COMPACTION |
| 2179 | rc = set_movable_ops(ops: &zsmalloc_mops, type: PGTY_zsmalloc); |
| 2180 | if (rc) |
| 2181 | return rc; |
| 2182 | #endif |
| 2183 | zs_stat_init(); |
| 2184 | return 0; |
| 2185 | } |
| 2186 | |
| 2187 | static void __exit zs_exit(void) |
| 2188 | { |
| 2189 | #ifdef CONFIG_COMPACTION |
| 2190 | set_movable_ops(NULL, type: PGTY_zsmalloc); |
| 2191 | #endif |
| 2192 | zs_stat_exit(); |
| 2193 | } |
| 2194 | |
| 2195 | module_init(zs_init); |
| 2196 | module_exit(zs_exit); |
| 2197 | |
| 2198 | MODULE_LICENSE("Dual BSD/GPL" ); |
| 2199 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>" ); |
| 2200 | MODULE_DESCRIPTION("zsmalloc memory allocator" ); |
| 2201 | |