| 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | /* |
| 3 | * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES |
| 4 | */ |
| 5 | #ifndef __GENERIC_PT_IOMMU_H |
| 6 | #define __GENERIC_PT_IOMMU_H |
| 7 | |
| 8 | #include <linux/generic_pt/common.h> |
| 9 | #include <linux/iommu.h> |
| 10 | #include <linux/mm_types.h> |
| 11 | |
| 12 | struct iommu_iotlb_gather; |
| 13 | struct pt_iommu_ops; |
| 14 | struct pt_iommu_driver_ops; |
| 15 | struct iommu_dirty_bitmap; |
| 16 | |
| 17 | /** |
| 18 | * DOC: IOMMU Radix Page Table |
| 19 | * |
| 20 | * The IOMMU implementation of the Generic Page Table provides an ops struct |
| 21 | * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and |
| 22 | * the generic map/unmap interface. |
| 23 | * |
| 24 | * This interface uses a caller provided locking approach. The caller must have |
| 25 | * a VA range lock concept that prevents concurrent threads from calling ops on |
| 26 | * the same VA. Generally the range lock must be at least as large as a single |
| 27 | * map call. |
| 28 | */ |
| 29 | |
| 30 | /** |
| 31 | * struct pt_iommu - Base structure for IOMMU page tables |
| 32 | * |
| 33 | * The format-specific struct will include this as the first member. |
| 34 | */ |
| 35 | struct pt_iommu { |
| 36 | /** |
| 37 | * @domain: The core IOMMU domain. The driver should use a union to |
| 38 | * overlay this memory with its previously existing domain struct to |
| 39 | * create an alias. |
| 40 | */ |
| 41 | struct iommu_domain domain; |
| 42 | |
| 43 | /** |
| 44 | * @ops: Function pointers to access the API |
| 45 | */ |
| 46 | const struct pt_iommu_ops *ops; |
| 47 | |
| 48 | /** |
| 49 | * @driver_ops: Function pointers provided by the HW driver to help |
| 50 | * manage HW details like caches. |
| 51 | */ |
| 52 | const struct pt_iommu_driver_ops *driver_ops; |
| 53 | |
| 54 | /** |
| 55 | * @nid: Node ID to use for table memory allocations. The IOMMU driver |
| 56 | * may want to set the NID to the device's NID, if there are multiple |
| 57 | * table walkers. |
| 58 | */ |
| 59 | int nid; |
| 60 | |
| 61 | /** |
| 62 | * @iommu_device: Device pointer used for any DMA cache flushing when |
| 63 | * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the |
| 64 | * page table which must have dma ops that perform cache flushing. |
| 65 | */ |
| 66 | struct device *iommu_device; |
| 67 | }; |
| 68 | |
| 69 | /** |
| 70 | * struct pt_iommu_info - Details about the IOMMU page table |
| 71 | * |
| 72 | * Returned from pt_iommu_ops->get_info() |
| 73 | */ |
| 74 | struct pt_iommu_info { |
| 75 | /** |
| 76 | * @pgsize_bitmap: A bitmask where each set bit indicates |
| 77 | * a page size that can be natively stored in the page table. |
| 78 | */ |
| 79 | u64 pgsize_bitmap; |
| 80 | }; |
| 81 | |
| 82 | struct pt_iommu_ops { |
| 83 | /** |
| 84 | * @set_dirty: Make the iova write dirty |
| 85 | * @iommu_table: Table to manipulate |
| 86 | * @iova: IO virtual address to start |
| 87 | * |
| 88 | * This is only used by iommufd testing. It makes the iova dirty so that |
| 89 | * read_and_clear_dirty() will see it as dirty. Unlike all the other ops |
| 90 | * this one is safe to call without holding any locking. It may return |
| 91 | * -EAGAIN if there is a race. |
| 92 | */ |
| 93 | int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova); |
| 94 | |
| 95 | /** |
| 96 | * @get_info: Return the pt_iommu_info structure |
| 97 | * @iommu_table: Table to query |
| 98 | * |
| 99 | * Return some basic static information about the page table. |
| 100 | */ |
| 101 | void (*get_info)(struct pt_iommu *iommu_table, |
| 102 | struct pt_iommu_info *info); |
| 103 | |
| 104 | /** |
| 105 | * @deinit: Undo a format specific init operation |
| 106 | * @iommu_table: Table to destroy |
| 107 | * |
| 108 | * Release all of the memory. The caller must have already removed the |
| 109 | * table from all HW access and all caches. |
| 110 | */ |
| 111 | void (*deinit)(struct pt_iommu *iommu_table); |
| 112 | }; |
| 113 | |
| 114 | /** |
| 115 | * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations |
| 116 | * |
| 117 | * The IOMMU driver should implement these using container_of(iommu_table) to |
| 118 | * get to it's iommu_domain derived structure. All ops can be called in atomic |
| 119 | * contexts as they are buried under DMA API calls. |
| 120 | */ |
| 121 | struct pt_iommu_driver_ops { |
| 122 | /** |
| 123 | * @change_top: Update the top of table pointer |
| 124 | * @iommu_table: Table to operate on |
| 125 | * @top_paddr: New CPU physical address of the top pointer |
| 126 | * @top_level: IOMMU PT level of the new top |
| 127 | * |
| 128 | * Called under the get_top_lock() spinlock. The driver must update all |
| 129 | * HW references to this domain with a new top address and |
| 130 | * configuration. On return mappings placed in the new top must be |
| 131 | * reachable by the HW. |
| 132 | * |
| 133 | * top_level encodes the level in IOMMU PT format, level 0 is the |
| 134 | * smallest page size increasing from there. This has to be translated |
| 135 | * to any HW specific format. During this call the new top will not be |
| 136 | * visible to any other API. |
| 137 | * |
| 138 | * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if |
| 139 | * enabled. |
| 140 | */ |
| 141 | void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr, |
| 142 | unsigned int top_level); |
| 143 | |
| 144 | /** |
| 145 | * @get_top_lock: lock to hold when changing the table top |
| 146 | * @iommu_table: Table to operate on |
| 147 | * |
| 148 | * Return a lock to hold when changing the table top page table from |
| 149 | * being stored in HW. The lock will be held prior to calling |
| 150 | * change_top() and released once the top is fully visible. |
| 151 | * |
| 152 | * Typically this would be a lock that protects the iommu_domain's |
| 153 | * attachment list. |
| 154 | * |
| 155 | * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if |
| 156 | * enabled. |
| 157 | */ |
| 158 | spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table); |
| 159 | }; |
| 160 | |
| 161 | static inline void pt_iommu_deinit(struct pt_iommu *iommu_table) |
| 162 | { |
| 163 | /* |
| 164 | * It is safe to call pt_iommu_deinit() before an init, or if init |
| 165 | * fails. The ops pointer will only become non-NULL if deinit needs to be |
| 166 | * run. |
| 167 | */ |
| 168 | if (iommu_table->ops) |
| 169 | iommu_table->ops->deinit(iommu_table); |
| 170 | } |
| 171 | |
| 172 | /** |
| 173 | * struct pt_iommu_cfg - Common configuration values for all formats |
| 174 | */ |
| 175 | struct pt_iommu_cfg { |
| 176 | /** |
| 177 | * @features: Features required. Only these features will be turned on. |
| 178 | * The feature list should reflect what the IOMMU HW is capable of. |
| 179 | */ |
| 180 | unsigned int features; |
| 181 | /** |
| 182 | * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will |
| 183 | * imply the top level of the table. |
| 184 | */ |
| 185 | u8 hw_max_vasz_lg2; |
| 186 | /** |
| 187 | * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format |
| 188 | * might select a lower maximum OA. |
| 189 | */ |
| 190 | u8 hw_max_oasz_lg2; |
| 191 | }; |
| 192 | |
| 193 | /* Generate the exported function signatures from iommu_pt.h */ |
| 194 | #define IOMMU_PROTOTYPES(fmt) \ |
| 195 | phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \ |
| 196 | dma_addr_t iova); \ |
| 197 | int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \ |
| 198 | unsigned long iova, phys_addr_t paddr, \ |
| 199 | size_t pgsize, size_t pgcount, \ |
| 200 | int prot, gfp_t gfp, size_t *mapped); \ |
| 201 | size_t pt_iommu_##fmt##_unmap_pages( \ |
| 202 | struct iommu_domain *domain, unsigned long iova, \ |
| 203 | size_t pgsize, size_t pgcount, \ |
| 204 | struct iommu_iotlb_gather *iotlb_gather); \ |
| 205 | int pt_iommu_##fmt##_read_and_clear_dirty( \ |
| 206 | struct iommu_domain *domain, unsigned long iova, size_t size, \ |
| 207 | unsigned long flags, struct iommu_dirty_bitmap *dirty); \ |
| 208 | int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ |
| 209 | const struct pt_iommu_##fmt##_cfg *cfg, \ |
| 210 | gfp_t gfp); \ |
| 211 | void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \ |
| 212 | struct pt_iommu_##fmt##_hw_info *info) |
| 213 | #define IOMMU_FORMAT(fmt, member) \ |
| 214 | struct pt_iommu_##fmt { \ |
| 215 | struct pt_iommu iommu; \ |
| 216 | struct pt_##fmt member; \ |
| 217 | }; \ |
| 218 | IOMMU_PROTOTYPES(fmt) |
| 219 | |
| 220 | /* |
| 221 | * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the |
| 222 | * iommu_pt |
| 223 | */ |
| 224 | #define IOMMU_PT_DOMAIN_OPS(fmt) \ |
| 225 | .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \ |
| 226 | .map_pages = &pt_iommu_##fmt##_map_pages, \ |
| 227 | .unmap_pages = &pt_iommu_##fmt##_unmap_pages |
| 228 | #define IOMMU_PT_DIRTY_OPS(fmt) \ |
| 229 | .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty |
| 230 | |
| 231 | /* |
| 232 | * The driver should setup its domain struct like |
| 233 | * union { |
| 234 | * struct iommu_domain domain; |
| 235 | * struct pt_iommu_xxx xx; |
| 236 | * }; |
| 237 | * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain); |
| 238 | * |
| 239 | * Which creates an alias between driver_domain.domain and |
| 240 | * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing |
| 241 | * driver_domain.domain users. |
| 242 | */ |
| 243 | #define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \ |
| 244 | static_assert(offsetof(s, pt_iommu_memb.domain) == \ |
| 245 | offsetof(s, domain_memb)) |
| 246 | |
| 247 | struct pt_iommu_amdv1_cfg { |
| 248 | struct pt_iommu_cfg common; |
| 249 | unsigned int starting_level; |
| 250 | }; |
| 251 | |
| 252 | struct pt_iommu_amdv1_hw_info { |
| 253 | u64 host_pt_root; |
| 254 | u8 mode; |
| 255 | }; |
| 256 | |
| 257 | IOMMU_FORMAT(amdv1, amdpt); |
| 258 | |
| 259 | /* amdv1_mock is used by the iommufd selftest */ |
| 260 | #define pt_iommu_amdv1_mock pt_iommu_amdv1 |
| 261 | #define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg |
| 262 | struct pt_iommu_amdv1_mock_hw_info; |
| 263 | IOMMU_PROTOTYPES(amdv1_mock); |
| 264 | |
| 265 | struct pt_iommu_vtdss_cfg { |
| 266 | struct pt_iommu_cfg common; |
| 267 | /* 4 is a 57 bit 5 level table */ |
| 268 | unsigned int top_level; |
| 269 | }; |
| 270 | |
| 271 | struct pt_iommu_vtdss_hw_info { |
| 272 | u64 ssptptr; |
| 273 | u8 aw; |
| 274 | }; |
| 275 | |
| 276 | IOMMU_FORMAT(vtdss, vtdss_pt); |
| 277 | |
| 278 | struct pt_iommu_x86_64_cfg { |
| 279 | struct pt_iommu_cfg common; |
| 280 | /* 4 is a 57 bit 5 level table */ |
| 281 | unsigned int top_level; |
| 282 | }; |
| 283 | |
| 284 | struct pt_iommu_x86_64_hw_info { |
| 285 | u64 gcr3_pt; |
| 286 | u8 levels; |
| 287 | }; |
| 288 | |
| 289 | IOMMU_FORMAT(x86_64, x86_64_pt); |
| 290 | |
| 291 | #undef IOMMU_PROTOTYPES |
| 292 | #undef IOMMU_FORMAT |
| 293 | #endif |
| 294 | |