| 1 | /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ |
| 2 | /* |
| 3 | * Copyright(c) 2018 Intel Corporation. |
| 4 | * |
| 5 | */ |
| 6 | #ifndef HFI1_TID_RDMA_H |
| 7 | #define HFI1_TID_RDMA_H |
| 8 | |
| 9 | #include <linux/circ_buf.h> |
| 10 | #include "common.h" |
| 11 | |
| 12 | /* Add a convenience helper */ |
| 13 | #define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) |
| 14 | #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) |
| 15 | #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) |
| 16 | |
| 17 | #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ |
| 18 | #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ |
| 19 | #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) |
| 20 | #define TID_RDMA_SEGMENT_SHIFT 18 |
| 21 | |
| 22 | /* |
| 23 | * Bit definitions for priv->s_flags. |
| 24 | * These bit flags overload the bit flags defined for the QP's s_flags. |
| 25 | * Due to the fact that these bit fields are used only for the QP priv |
| 26 | * s_flags, there are no collisions. |
| 27 | * |
| 28 | * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock |
| 29 | * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock |
| 30 | */ |
| 31 | #define HFI1_S_TID_BUSY_SET BIT(0) |
| 32 | /* BIT(1) reserved for RVT_S_BUSY. */ |
| 33 | #define HFI1_R_TID_RSC_TIMER BIT(2) |
| 34 | /* BIT(3) reserved for RVT_S_RESP_PENDING. */ |
| 35 | /* BIT(4) reserved for RVT_S_ACK_PENDING. */ |
| 36 | #define HFI1_S_TID_WAIT_INTERLCK BIT(5) |
| 37 | #define HFI1_R_TID_WAIT_INTERLCK BIT(6) |
| 38 | /* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */ |
| 39 | /* BIT(16) reserved for RVT_S_SEND_ONE */ |
| 40 | #define HFI1_S_TID_RETRY_TIMER BIT(17) |
| 41 | /* BIT(18) reserved for RVT_S_ECN. */ |
| 42 | #define HFI1_R_TID_SW_PSN BIT(19) |
| 43 | /* BIT(26) reserved for HFI1_S_WAIT_HALT */ |
| 44 | /* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */ |
| 45 | /* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */ |
| 46 | |
| 47 | /* |
| 48 | * Unlike regular IB RDMA VERBS, which do not require an entry |
| 49 | * in the s_ack_queue, TID RDMA WRITE requests do because they |
| 50 | * generate responses. |
| 51 | * Therefore, the s_ack_queue needs to be extended by a certain |
| 52 | * amount. The key point is that the queue needs to be extended |
| 53 | * without letting the "user" know so they user doesn't end up |
| 54 | * using these extra entries. |
| 55 | */ |
| 56 | #define HFI1_TID_RDMA_WRITE_CNT 8 |
| 57 | |
| 58 | struct tid_rdma_params { |
| 59 | struct rcu_head rcu_head; |
| 60 | u32 qp; |
| 61 | u32 max_len; |
| 62 | u16 jkey; |
| 63 | u8 max_read; |
| 64 | u8 max_write; |
| 65 | u8 timeout; |
| 66 | u8 urg; |
| 67 | u8 version; |
| 68 | }; |
| 69 | |
| 70 | struct tid_rdma_qp_params { |
| 71 | struct work_struct trigger_work; |
| 72 | struct tid_rdma_params local; |
| 73 | struct tid_rdma_params __rcu *remote; |
| 74 | }; |
| 75 | |
| 76 | /* Track state for each hardware flow */ |
| 77 | struct tid_flow_state { |
| 78 | u32 generation; |
| 79 | u32 psn; |
| 80 | u8 index; |
| 81 | u8 last_index; |
| 82 | }; |
| 83 | |
| 84 | enum tid_rdma_req_state { |
| 85 | TID_REQUEST_INACTIVE = 0, |
| 86 | TID_REQUEST_INIT, |
| 87 | TID_REQUEST_INIT_RESEND, |
| 88 | TID_REQUEST_ACTIVE, |
| 89 | TID_REQUEST_RESEND, |
| 90 | TID_REQUEST_RESEND_ACTIVE, |
| 91 | TID_REQUEST_QUEUED, |
| 92 | TID_REQUEST_SYNC, |
| 93 | TID_REQUEST_RNR_NAK, |
| 94 | TID_REQUEST_COMPLETE, |
| 95 | }; |
| 96 | |
| 97 | struct tid_rdma_request { |
| 98 | struct rvt_qp *qp; |
| 99 | struct hfi1_ctxtdata *rcd; |
| 100 | union { |
| 101 | struct rvt_swqe *swqe; |
| 102 | struct rvt_ack_entry *ack; |
| 103 | } e; |
| 104 | |
| 105 | struct tid_rdma_flow *flows; /* array of tid flows */ |
| 106 | struct rvt_sge_state ss; /* SGE state for TID RDMA requests */ |
| 107 | u16 n_flows; /* size of the flow buffer window */ |
| 108 | u16 setup_head; /* flow index we are setting up */ |
| 109 | u16 clear_tail; /* flow index we are clearing */ |
| 110 | u16 flow_idx; /* flow index most recently set up */ |
| 111 | u16 acked_tail; |
| 112 | |
| 113 | u32 seg_len; |
| 114 | u32 total_len; |
| 115 | u32 r_ack_psn; /* next expected ack PSN */ |
| 116 | u32 r_flow_psn; /* IB PSN of next segment start */ |
| 117 | u32 r_last_acked; /* IB PSN of last ACK'ed packet */ |
| 118 | u32 s_next_psn; /* IB PSN of next segment start for read */ |
| 119 | |
| 120 | u32 total_segs; /* segments required to complete a request */ |
| 121 | u32 cur_seg; /* index of current segment */ |
| 122 | u32 comp_seg; /* index of last completed segment */ |
| 123 | u32 ack_seg; /* index of last ack'ed segment */ |
| 124 | u32 alloc_seg; /* index of next segment to be allocated */ |
| 125 | u32 isge; /* index of "current" sge */ |
| 126 | u32 ack_pending; /* num acks pending for this request */ |
| 127 | |
| 128 | enum tid_rdma_req_state state; |
| 129 | }; |
| 130 | |
| 131 | /* |
| 132 | * When header suppression is used, PSNs associated with a "flow" are |
| 133 | * relevant (and not the PSNs maintained by verbs). Track per-flow |
| 134 | * PSNs here for a TID RDMA segment. |
| 135 | * |
| 136 | */ |
| 137 | struct flow_state { |
| 138 | u32 flags; |
| 139 | u32 resp_ib_psn; /* The IB PSN of the response for this flow */ |
| 140 | u32 generation; /* generation of flow */ |
| 141 | u32 spsn; /* starting PSN in TID space */ |
| 142 | u32 lpsn; /* last PSN in TID space */ |
| 143 | u32 r_next_psn; /* next PSN to be received (in TID space) */ |
| 144 | |
| 145 | /* For tid rdma read */ |
| 146 | u32 ib_spsn; /* starting PSN in Verbs space */ |
| 147 | u32 ib_lpsn; /* last PSn in Verbs space */ |
| 148 | }; |
| 149 | |
| 150 | struct tid_rdma_pageset { |
| 151 | dma_addr_t addr : 48; /* Only needed for the first page */ |
| 152 | u8 idx: 8; |
| 153 | u8 count : 7; |
| 154 | u8 mapped: 1; |
| 155 | }; |
| 156 | |
| 157 | /** |
| 158 | * kern_tid_node - used for managing TID's in TID groups |
| 159 | * |
| 160 | * @grp_idx: rcd relative index to tid_group |
| 161 | * @map: grp->map captured prior to programming this TID group in HW |
| 162 | * @cnt: Only @cnt of available group entries are actually programmed |
| 163 | */ |
| 164 | struct kern_tid_node { |
| 165 | struct tid_group *grp; |
| 166 | u8 map; |
| 167 | u8 cnt; |
| 168 | }; |
| 169 | |
| 170 | /* Overall info for a TID RDMA segment */ |
| 171 | struct tid_rdma_flow { |
| 172 | /* |
| 173 | * While a TID RDMA segment is being transferred, it uses a QP number |
| 174 | * from the "KDETH section of QP numbers" (which is different from the |
| 175 | * QP number that originated the request). Bits 11-15 of these QP |
| 176 | * numbers identify the "TID flow" for the segment. |
| 177 | */ |
| 178 | struct flow_state flow_state; |
| 179 | struct tid_rdma_request *req; |
| 180 | u32 tid_qpn; |
| 181 | u32 tid_offset; |
| 182 | u32 length; |
| 183 | u32 sent; |
| 184 | u8 tnode_cnt; |
| 185 | u8 tidcnt; |
| 186 | u8 tid_idx; |
| 187 | u8 idx; |
| 188 | u8 npagesets; |
| 189 | u8 npkts; |
| 190 | u8 pkt; |
| 191 | u8 resync_npkts; |
| 192 | struct kern_tid_node tnode[TID_RDMA_MAX_PAGES]; |
| 193 | struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES]; |
| 194 | u32 tid_entry[TID_RDMA_MAX_PAGES]; |
| 195 | }; |
| 196 | |
| 197 | enum tid_rnr_nak_state { |
| 198 | TID_RNR_NAK_INIT = 0, |
| 199 | TID_RNR_NAK_SEND, |
| 200 | TID_RNR_NAK_SENT, |
| 201 | }; |
| 202 | |
| 203 | bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data); |
| 204 | bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data); |
| 205 | bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data); |
| 206 | void tid_rdma_conn_error(struct rvt_qp *qp); |
| 207 | void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p); |
| 208 | |
| 209 | int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit); |
| 210 | int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req, |
| 211 | struct rvt_sge_state *ss, bool *last); |
| 212 | int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req); |
| 213 | void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req); |
| 214 | void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe); |
| 215 | |
| 216 | /** |
| 217 | * trdma_clean_swqe - clean flows for swqe if large send queue |
| 218 | * @qp: the qp |
| 219 | * @wqe: the send wqe |
| 220 | */ |
| 221 | static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) |
| 222 | { |
| 223 | if (!wqe->priv) |
| 224 | return; |
| 225 | __trdma_clean_swqe(qp, wqe); |
| 226 | } |
| 227 | |
| 228 | void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp); |
| 229 | |
| 230 | int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, |
| 231 | struct ib_qp_init_attr *init_attr); |
| 232 | void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); |
| 233 | |
| 234 | void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp); |
| 235 | |
| 236 | int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp); |
| 237 | void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp); |
| 238 | void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd); |
| 239 | |
| 240 | struct cntr_entry; |
| 241 | u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry, |
| 242 | void *context, int vl, int mode, u64 data); |
| 243 | |
| 244 | u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe, |
| 245 | struct ib_other_headers *ohdr, |
| 246 | u32 *bth1, u32 *bth2, u32 *len); |
| 247 | u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe, |
| 248 | struct ib_other_headers *ohdr, u32 *bth1, |
| 249 | u32 *bth2, u32 *len); |
| 250 | void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet); |
| 251 | u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, |
| 252 | struct ib_other_headers *ohdr, u32 *bth0, |
| 253 | u32 *bth1, u32 *bth2, u32 *len, bool *last); |
| 254 | void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet); |
| 255 | bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, |
| 256 | struct hfi1_pportdata *ppd, |
| 257 | struct hfi1_packet *packet); |
| 258 | void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe, |
| 259 | u32 *bth2); |
| 260 | void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp); |
| 261 | bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe); |
| 262 | |
| 263 | void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); |
| 264 | static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp, |
| 265 | struct rvt_swqe *wqe) |
| 266 | { |
| 267 | if (wqe->priv && |
| 268 | (wqe->wr.opcode == IB_WR_RDMA_READ || |
| 269 | wqe->wr.opcode == IB_WR_RDMA_WRITE) && |
| 270 | wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE) |
| 271 | setup_tid_rdma_wqe(qp, wqe); |
| 272 | } |
| 273 | |
| 274 | u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, |
| 275 | struct ib_other_headers *ohdr, |
| 276 | u32 *bth1, u32 *bth2, u32 *len); |
| 277 | |
| 278 | void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); |
| 279 | |
| 280 | u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, |
| 281 | struct ib_other_headers *ohdr, u32 *bth1, |
| 282 | u32 bth2, u32 *len, |
| 283 | struct rvt_sge_state **ss); |
| 284 | |
| 285 | void hfi1_del_tid_reap_timer(struct rvt_qp *qp); |
| 286 | |
| 287 | void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet); |
| 288 | |
| 289 | bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe, |
| 290 | struct ib_other_headers *ohdr, |
| 291 | u32 *bth1, u32 *bth2, u32 *len); |
| 292 | |
| 293 | void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet); |
| 294 | |
| 295 | u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e, |
| 296 | struct ib_other_headers *ohdr, u16 iflow, |
| 297 | u32 *bth1, u32 *bth2); |
| 298 | |
| 299 | void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet); |
| 300 | |
| 301 | void hfi1_add_tid_retry_timer(struct rvt_qp *qp); |
| 302 | void hfi1_del_tid_retry_timer(struct rvt_qp *qp); |
| 303 | |
| 304 | u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe, |
| 305 | struct ib_other_headers *ohdr, u32 *bth1, |
| 306 | u32 *bth2, u16 fidx); |
| 307 | |
| 308 | void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet); |
| 309 | |
| 310 | struct hfi1_pkt_state; |
| 311 | int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps); |
| 312 | |
| 313 | void _hfi1_do_tid_send(struct work_struct *work); |
| 314 | |
| 315 | bool hfi1_schedule_tid_send(struct rvt_qp *qp); |
| 316 | |
| 317 | bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e); |
| 318 | |
| 319 | #endif /* HFI1_TID_RDMA_H */ |
| 320 | |