| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | |
| 3 | #ifndef _NET_PAGE_POOL_TYPES_H |
| 4 | #define _NET_PAGE_POOL_TYPES_H |
| 5 | |
| 6 | #include <linux/dma-direction.h> |
| 7 | #include <linux/ptr_ring.h> |
| 8 | #include <linux/types.h> |
| 9 | #include <linux/xarray.h> |
| 10 | #include <net/netmem.h> |
| 11 | |
| 12 | #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA |
| 13 | * map/unmap |
| 14 | */ |
| 15 | #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets |
| 16 | * from page_pool will be |
| 17 | * DMA-synced-for-device according to |
| 18 | * the length provided by the device |
| 19 | * driver. |
| 20 | * Please note DMA-sync-for-CPU is still |
| 21 | * device driver responsibility |
| 22 | */ |
| 23 | #define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */ |
| 24 | |
| 25 | /* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting |
| 26 | * this must be able to support unreadable netmem, where netmem_address() would |
| 27 | * return NULL. This flag should not be set for header page_pools. |
| 28 | * |
| 29 | * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set |
| 30 | * page_pool_params.slow.queue_idx. |
| 31 | */ |
| 32 | #define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3) |
| 33 | |
| 34 | #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ |
| 35 | PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) |
| 36 | |
| 37 | /* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */ |
| 38 | #define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) |
| 39 | |
| 40 | /* |
| 41 | * Fast allocation side cache array/stack |
| 42 | * |
| 43 | * The cache size and refill watermark is related to the network |
| 44 | * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX |
| 45 | * ring is usually refilled and the max consumed elements will be 64, |
| 46 | * thus a natural max size of objects needed in the cache. |
| 47 | * |
| 48 | * Keeping room for more objects, is due to XDP_DROP use-case. As |
| 49 | * XDP_DROP allows the opportunity to recycle objects directly into |
| 50 | * this array, as it shares the same softirq/NAPI protection. If |
| 51 | * cache is already full (or partly full) then the XDP_DROP recycles |
| 52 | * would have to take a slower code path. |
| 53 | */ |
| 54 | #define PP_ALLOC_CACHE_SIZE 128 |
| 55 | #define PP_ALLOC_CACHE_REFILL 64 |
| 56 | struct pp_alloc_cache { |
| 57 | u32 count; |
| 58 | netmem_ref cache[PP_ALLOC_CACHE_SIZE]; |
| 59 | }; |
| 60 | |
| 61 | /** |
| 62 | * struct page_pool_params - page pool parameters |
| 63 | * @fast: params accessed frequently on hotpath |
| 64 | * @order: 2^order pages on allocation |
| 65 | * @pool_size: size of the ptr_ring |
| 66 | * @nid: NUMA node id to allocate from pages from |
| 67 | * @dev: device, for DMA pre-mapping purposes |
| 68 | * @napi: NAPI which is the sole consumer of pages, otherwise NULL |
| 69 | * @dma_dir: DMA mapping direction |
| 70 | * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV |
| 71 | * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV |
| 72 | * @slow: params with slowpath access only (initialization and Netlink) |
| 73 | * @netdev: netdev this pool will serve (leave as NULL if none or multiple) |
| 74 | * @queue_idx: queue idx this page_pool is being created for. |
| 75 | * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL, |
| 76 | * PP_FLAG_ALLOW_UNREADABLE_NETMEM. |
| 77 | */ |
| 78 | struct page_pool_params { |
| 79 | struct_group_tagged(page_pool_params_fast, fast, |
| 80 | unsigned int order; |
| 81 | unsigned int pool_size; |
| 82 | int nid; |
| 83 | struct device *dev; |
| 84 | struct napi_struct *napi; |
| 85 | enum dma_data_direction dma_dir; |
| 86 | unsigned int max_len; |
| 87 | unsigned int offset; |
| 88 | ); |
| 89 | struct_group_tagged(page_pool_params_slow, slow, |
| 90 | struct net_device *netdev; |
| 91 | unsigned int queue_idx; |
| 92 | unsigned int flags; |
| 93 | /* private: used by test code only */ |
| 94 | void (*init_callback)(netmem_ref netmem, void *arg); |
| 95 | void *init_arg; |
| 96 | ); |
| 97 | }; |
| 98 | |
| 99 | #ifdef CONFIG_PAGE_POOL_STATS |
| 100 | /** |
| 101 | * struct page_pool_alloc_stats - allocation statistics |
| 102 | * @fast: successful fast path allocations |
| 103 | * @slow: slow path order-0 allocations |
| 104 | * @slow_high_order: slow path high order allocations |
| 105 | * @empty: ptr ring is empty, so a slow path allocation was forced |
| 106 | * @refill: an allocation which triggered a refill of the cache |
| 107 | * @waive: pages obtained from the ptr ring that cannot be added to |
| 108 | * the cache due to a NUMA mismatch |
| 109 | */ |
| 110 | struct page_pool_alloc_stats { |
| 111 | u64 fast; |
| 112 | u64 slow; |
| 113 | u64 slow_high_order; |
| 114 | u64 empty; |
| 115 | u64 refill; |
| 116 | u64 waive; |
| 117 | }; |
| 118 | |
| 119 | /** |
| 120 | * struct page_pool_recycle_stats - recycling (freeing) statistics |
| 121 | * @cached: recycling placed page in the page pool cache |
| 122 | * @cache_full: page pool cache was full |
| 123 | * @ring: page placed into the ptr ring |
| 124 | * @ring_full: page released from page pool because the ptr ring was full |
| 125 | * @released_refcnt: page released (and not recycled) because refcnt > 1 |
| 126 | */ |
| 127 | struct page_pool_recycle_stats { |
| 128 | u64 cached; |
| 129 | u64 cache_full; |
| 130 | u64 ring; |
| 131 | u64 ring_full; |
| 132 | u64 released_refcnt; |
| 133 | }; |
| 134 | |
| 135 | /** |
| 136 | * struct page_pool_stats - combined page pool use statistics |
| 137 | * @alloc_stats: see struct page_pool_alloc_stats |
| 138 | * @recycle_stats: see struct page_pool_recycle_stats |
| 139 | * |
| 140 | * Wrapper struct for combining page pool stats with different storage |
| 141 | * requirements. |
| 142 | */ |
| 143 | struct page_pool_stats { |
| 144 | struct page_pool_alloc_stats alloc_stats; |
| 145 | struct page_pool_recycle_stats recycle_stats; |
| 146 | }; |
| 147 | #endif |
| 148 | |
| 149 | /* The whole frag API block must stay within one cacheline. On 32-bit systems, |
| 150 | * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``. |
| 151 | * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``. |
| 152 | * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that |
| 153 | * one for simplicity. |
| 154 | * Having it aligned to a cacheline boundary may be excessive and doesn't bring |
| 155 | * any good. |
| 156 | */ |
| 157 | #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) |
| 158 | |
| 159 | struct memory_provider_ops; |
| 160 | |
| 161 | struct pp_memory_provider_params { |
| 162 | void *mp_priv; |
| 163 | const struct memory_provider_ops *mp_ops; |
| 164 | }; |
| 165 | |
| 166 | struct page_pool { |
| 167 | struct page_pool_params_fast p; |
| 168 | |
| 169 | int cpuid; |
| 170 | u32 pages_state_hold_cnt; |
| 171 | |
| 172 | bool has_init_callback:1; /* slow::init_callback is set */ |
| 173 | bool dma_map:1; /* Perform DMA mapping */ |
| 174 | bool dma_sync:1; /* Perform DMA sync for device */ |
| 175 | bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */ |
| 176 | #ifdef CONFIG_PAGE_POOL_STATS |
| 177 | bool system:1; /* This is a global percpu pool */ |
| 178 | #endif |
| 179 | |
| 180 | __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); |
| 181 | long frag_users; |
| 182 | netmem_ref frag_page; |
| 183 | unsigned int frag_offset; |
| 184 | __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); |
| 185 | |
| 186 | struct delayed_work release_dw; |
| 187 | void (*disconnect)(void *pool); |
| 188 | unsigned long defer_start; |
| 189 | unsigned long defer_warn; |
| 190 | |
| 191 | #ifdef CONFIG_PAGE_POOL_STATS |
| 192 | /* these stats are incremented while in softirq context */ |
| 193 | struct page_pool_alloc_stats alloc_stats; |
| 194 | #endif |
| 195 | u32 xdp_mem_id; |
| 196 | |
| 197 | /* |
| 198 | * Data structure for allocation side |
| 199 | * |
| 200 | * Drivers allocation side usually already perform some kind |
| 201 | * of resource protection. Piggyback on this protection, and |
| 202 | * require driver to protect allocation side. |
| 203 | * |
| 204 | * For NIC drivers this means, allocate a page_pool per |
| 205 | * RX-queue. As the RX-queue is already protected by |
| 206 | * Softirq/BH scheduling and napi_schedule. NAPI schedule |
| 207 | * guarantee that a single napi_struct will only be scheduled |
| 208 | * on a single CPU (see napi_schedule). |
| 209 | */ |
| 210 | struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; |
| 211 | |
| 212 | /* Data structure for storing recycled pages. |
| 213 | * |
| 214 | * Returning/freeing pages is more complicated synchronization |
| 215 | * wise, because free's can happen on remote CPUs, with no |
| 216 | * association with allocation resource. |
| 217 | * |
| 218 | * Use ptr_ring, as it separates consumer and producer |
| 219 | * efficiently, it a way that doesn't bounce cache-lines. |
| 220 | * |
| 221 | * TODO: Implement bulk return pages into this structure. |
| 222 | */ |
| 223 | struct ptr_ring ring; |
| 224 | |
| 225 | void *mp_priv; |
| 226 | const struct memory_provider_ops *mp_ops; |
| 227 | |
| 228 | struct xarray dma_mapped; |
| 229 | |
| 230 | #ifdef CONFIG_PAGE_POOL_STATS |
| 231 | /* recycle stats are per-cpu to avoid locking */ |
| 232 | struct page_pool_recycle_stats __percpu *recycle_stats; |
| 233 | #endif |
| 234 | atomic_t pages_state_release_cnt; |
| 235 | |
| 236 | /* A page_pool is strictly tied to a single RX-queue being |
| 237 | * protected by NAPI, due to above pp_alloc_cache. This |
| 238 | * refcnt serves purpose is to simplify drivers error handling. |
| 239 | */ |
| 240 | refcount_t user_cnt; |
| 241 | |
| 242 | u64 destroy_cnt; |
| 243 | |
| 244 | /* Slow/Control-path information follows */ |
| 245 | struct page_pool_params_slow slow; |
| 246 | /* User-facing fields, protected by page_pools_lock */ |
| 247 | struct { |
| 248 | struct hlist_node list; |
| 249 | u64 detach_time; |
| 250 | u32 id; |
| 251 | } user; |
| 252 | }; |
| 253 | |
| 254 | struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); |
| 255 | netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp); |
| 256 | struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, |
| 257 | unsigned int size, gfp_t gfp); |
| 258 | netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, |
| 259 | unsigned int *offset, unsigned int size, |
| 260 | gfp_t gfp); |
| 261 | struct page_pool *page_pool_create(const struct page_pool_params *params); |
| 262 | struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, |
| 263 | int cpuid); |
| 264 | |
| 265 | struct xdp_mem_info; |
| 266 | |
| 267 | #ifdef CONFIG_PAGE_POOL |
| 268 | void page_pool_enable_direct_recycling(struct page_pool *pool, |
| 269 | struct napi_struct *napi); |
| 270 | void page_pool_disable_direct_recycling(struct page_pool *pool); |
| 271 | void page_pool_destroy(struct page_pool *pool); |
| 272 | void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), |
| 273 | const struct xdp_mem_info *mem); |
| 274 | void page_pool_put_netmem_bulk(netmem_ref *data, u32 count); |
| 275 | #else |
| 276 | static inline void page_pool_destroy(struct page_pool *pool) |
| 277 | { |
| 278 | } |
| 279 | |
| 280 | static inline void page_pool_use_xdp_mem(struct page_pool *pool, |
| 281 | void (*disconnect)(void *), |
| 282 | const struct xdp_mem_info *mem) |
| 283 | { |
| 284 | } |
| 285 | |
| 286 | static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) |
| 287 | { |
| 288 | } |
| 289 | #endif |
| 290 | |
| 291 | void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, |
| 292 | unsigned int dma_sync_size, |
| 293 | bool allow_direct); |
| 294 | void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, |
| 295 | unsigned int dma_sync_size, |
| 296 | bool allow_direct); |
| 297 | |
| 298 | static inline bool is_page_pool_compiled_in(void) |
| 299 | { |
| 300 | #ifdef CONFIG_PAGE_POOL |
| 301 | return true; |
| 302 | #else |
| 303 | return false; |
| 304 | #endif |
| 305 | } |
| 306 | |
| 307 | /* Caller must provide appropriate safe context, e.g. NAPI. */ |
| 308 | void page_pool_update_nid(struct page_pool *pool, int new_nid); |
| 309 | |
| 310 | #endif /* _NET_PAGE_POOL_H */ |
| 311 | |