2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * $Id: cq.c 4005 2005-11-09 20:17:19Z roland $
39 #include "mlnx_uvp_doorbell.h"
41 #if defined(EVENT_TRACING)
42 #include "mlnx_uvp_cq.tmh"
47 MTHCA_CQ_DOORBELL = 0x20
56 #define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24)
57 #define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24)
58 #define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24)
59 #define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24)
60 #define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
62 #define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24)
63 #define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
64 #define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
67 MTHCA_CQ_ENTRY_OWNER_SW = 0x00,
68 MTHCA_CQ_ENTRY_OWNER_HW = 0x80,
69 MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe
73 SYNDROME_LOCAL_LENGTH_ERR = 0x01,
74 SYNDROME_LOCAL_QP_OP_ERR = 0x02,
75 SYNDROME_LOCAL_EEC_OP_ERR = 0x03,
76 SYNDROME_LOCAL_PROT_ERR = 0x04,
77 SYNDROME_WR_FLUSH_ERR = 0x05,
78 SYNDROME_MW_BIND_ERR = 0x06,
79 SYNDROME_BAD_RESP_ERR = 0x10,
80 SYNDROME_LOCAL_ACCESS_ERR = 0x11,
81 SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
82 SYNDROME_REMOTE_ACCESS_ERR = 0x13,
83 SYNDROME_REMOTE_OP_ERR = 0x14,
84 SYNDROME_RETRY_EXC_ERR = 0x15,
85 SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
86 SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20,
87 SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
88 SYNDROME_REMOTE_ABORTED_ERR = 0x22,
89 SYNDROME_INVAL_EECN_ERR = 0x23,
90 SYNDROME_INVAL_EEC_STATE_ERR = 0x24
99 uint32_t imm_etype_pkey_eec;
108 struct mthca_err_cqe {
110 uint32_t reserved1[3];
117 uint8_t reserved3[2];
121 static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
123 return (struct mthca_cqe *)((uint8_t*)cq->buf + entry * MTHCA_CQ_ENTRY_SIZE);
126 static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
128 struct mthca_cqe *cqe = get_cqe(cq, i);
129 return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
132 static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
134 return cqe_sw(cq, cq->cons_index & cq->ibv_cq.cqe);
137 static inline void set_cqe_hw(struct mthca_cqe *cqe)
139 cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
143 * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
144 * should be correct before calling update_cons_index().
146 static inline void update_cons_index(struct mthca_cq *cq, int incr)
148 uint32_t doorbell[2];
150 if (mthca_is_memfree(cq->ibv_cq.context)) {
151 *cq->set_ci_db = cl_hton32(cq->cons_index);
154 doorbell[0] = cl_hton32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn);
155 doorbell[1] = cl_hton32(incr - 1);
157 mthca_write64(doorbell, to_mctx(cq->ibv_cq.context), MTHCA_CQ_DOORBELL);
162 static void dump_cqe(uint32_t print_lvl, void *cqe_ptr)
164 uint32_t *cqe = cqe_ptr;
166 (void) cqe; /* avoid warning if mthca_dbg compiled away... */
168 UVP_PRINT(print_lvl,UVP_DBG_CQ,("CQE content \n "));
169 UVP_PRINT(print_lvl,UVP_DBG_CQ,(" [%2x] %08x %08x %08x %08x \n",0
170 , cl_ntoh32(cqe[0]), cl_ntoh32(cqe[1]), cl_ntoh32(cqe[2]), cl_ntoh32(cqe[3])));
171 UVP_PRINT(print_lvl,UVP_DBG_CQ,(" [%2x] %08x %08x %08x %08x\n",16
172 , cl_ntoh32(cqe[4]), cl_ntoh32(cqe[5]), cl_ntoh32(cqe[6]), cl_ntoh32(cqe[7])));
176 static int handle_error_cqe(struct mthca_cq *cq,
177 struct mthca_qp *qp, int wqe_index, int is_send,
178 struct mthca_err_cqe *cqe,
179 struct _ib_wc *entry, int *free_cqe)
185 if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
186 UVP_PRINT(TRACE_LEVEL_ERROR , UVP_DBG_CQ,("local QP operation err "
187 "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
188 cl_ntoh32(cqe->my_qpn), cl_ntoh32(cqe->wqe),
189 cq->cqn, cq->cons_index));
190 dump_cqe(TRACE_LEVEL_VERBOSE, cqe);
194 * For completions in error, only work request ID, status, vendor error
195 * (and freed resource count for RD) have to be set.
197 switch (cqe->syndrome) {
198 case SYNDROME_LOCAL_LENGTH_ERR:
199 entry->status = IB_WCS_LOCAL_LEN_ERR;
201 case SYNDROME_LOCAL_QP_OP_ERR:
202 entry->status = IB_WCS_LOCAL_OP_ERR;
204 case SYNDROME_LOCAL_PROT_ERR:
205 entry->status = IB_WCS_LOCAL_PROTECTION_ERR;
207 case SYNDROME_WR_FLUSH_ERR:
208 entry->status = IB_WCS_WR_FLUSHED_ERR;
210 case SYNDROME_MW_BIND_ERR:
211 entry->status = IB_WCS_MEM_WINDOW_BIND_ERR;
213 case SYNDROME_BAD_RESP_ERR:
214 entry->status = IB_WCS_BAD_RESP_ERR;
216 case SYNDROME_LOCAL_ACCESS_ERR:
217 entry->status = IB_WCS_LOCAL_ACCESS_ERR;
219 case SYNDROME_REMOTE_INVAL_REQ_ERR:
220 entry->status = IB_WCS_REM_INVALID_REQ_ERR;
222 case SYNDROME_REMOTE_ACCESS_ERR:
223 entry->status = IB_WCS_REM_ACCESS_ERR;
225 case SYNDROME_REMOTE_OP_ERR:
226 entry->status = IB_WCS_REM_OP_ERR;
228 case SYNDROME_RETRY_EXC_ERR:
229 entry->status = IB_WCS_TIMEOUT_RETRY_ERR;
231 case SYNDROME_RNR_RETRY_EXC_ERR:
232 entry->status = IB_WCS_RNR_RETRY_ERR;
234 case SYNDROME_LOCAL_EEC_OP_ERR:
235 case SYNDROME_LOCAL_RDD_VIOL_ERR:
236 case SYNDROME_REMOTE_INVAL_RD_REQ_ERR:
237 case SYNDROME_REMOTE_ABORTED_ERR:
238 case SYNDROME_INVAL_EECN_ERR:
239 case SYNDROME_INVAL_EEC_STATE_ERR:
241 entry->status = IB_WCS_GENERAL_ERR;
245 entry->vendor_specific = cqe->vendor_err;
248 * Mem-free HCAs always generate one CQE per WQE, even in the
249 * error case, so we don't have to check the doorbell count, etc.
251 if (mthca_is_memfree(cq->ibv_cq.context))
254 err = mthca_free_err_wqe(qp, is_send, wqe_index, &dbd, &new_wqe);
259 * If we're at the end of the WQE chain, or we've used up our
260 * doorbell count, free the CQE. Otherwise just update it for
261 * the next poll operation.
263 * This doesn't apply to mem-free HCAs, which never use the
264 * doorbell count field. In that case we always free the CQE.
266 if (mthca_is_memfree(cq->ibv_cq.context) ||
267 !(new_wqe & cl_hton32(0x3f)) || (!cqe->db_cnt && dbd))
270 cqe->db_cnt = cl_hton16(cl_ntoh16(cqe->db_cnt) - dbd);
272 cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
279 static inline int mthca_poll_one(struct mthca_cq *cq,
280 struct mthca_qp **cur_qp,
282 struct _ib_wc *entry)
285 struct mthca_cqe *cqe;
293 UVP_ENTER(UVP_DBG_CQ);
295 cqe = next_cqe_sw(cq);
300 * Make sure we read CQ entry contents after we've checked the
306 UVP_PRINT(TRACE_LEVEL_VERBOSE,UVP_DBG_CQ,("%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
307 cq->cqn, cq->cons_index, cl_ntoh32(cqe->my_qpn),
308 cl_ntoh32(cqe->wqe)));
309 dump_cqe(TRACE_LEVEL_VERBOSE,cqe);
312 qpn = cl_ntoh32(cqe->my_qpn);
314 is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
315 MTHCA_ERROR_CQE_OPCODE_MASK;
316 is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
318 if (!*cur_qp || cl_ntoh32(cqe->my_qpn) != (*cur_qp)->ibv_qp.qp_num) {
320 * We do not have to take the QP table lock here,
321 * because CQs will be locked while QPs are removed
324 *cur_qp = mthca_find_qp(to_mctx(cq->ibv_cq.context), cl_ntoh32(cqe->my_qpn));
326 UVP_PRINT(TRACE_LEVEL_WARNING,UVP_DBG_CQ, ("CQ entry for unknown QP %06x\n",
327 cl_ntoh32(cqe->my_qpn) & 0xffffff));
335 wqe_index = ((cl_ntoh32(cqe->wqe) - (*cur_qp)->send_wqe_offset) >> wq->wqe_shift);
336 entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max];
337 } else if ((*cur_qp)->ibv_qp.srq) {
338 struct mthca_srq * srq = to_msrq((*cur_qp)->ibv_qp.srq);
339 uint32_t wqe = cl_hton32(cqe->wqe);
341 wqe_index = wqe >> srq->wqe_shift;
342 entry->wr_id = srq->wrid[wqe_index];
343 mthca_free_srq_wqe(srq, wqe_index);
346 wqe_index = cl_ntoh32(cqe->wqe) >> wq->wqe_shift;
347 entry->wr_id = (*cur_qp)->wrid[wqe_index];
351 if ((int)wq->last_comp < wqe_index)
352 wq->tail += wqe_index - wq->last_comp;
354 wq->tail += wqe_index + wq->max - wq->last_comp;
356 wq->last_comp = wqe_index;
360 entry->recv.ud.recv_opt = 0;
361 switch (cqe->opcode) {
362 case MTHCA_OPCODE_RDMA_WRITE:
363 entry->wc_type = IB_WC_RDMA_WRITE;
365 case MTHCA_OPCODE_RDMA_WRITE_IMM:
366 entry->wc_type = IB_WC_RDMA_WRITE;
367 entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;
369 case MTHCA_OPCODE_SEND:
370 entry->wc_type = IB_WC_SEND;
372 case MTHCA_OPCODE_SEND_IMM:
373 entry->wc_type = IB_WC_SEND;
374 entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;
376 case MTHCA_OPCODE_RDMA_READ:
377 entry->wc_type = IB_WC_RDMA_READ;
378 entry->length = cl_ntoh32(cqe->byte_cnt);
380 case MTHCA_OPCODE_ATOMIC_CS:
381 entry->wc_type = IB_WC_COMPARE_SWAP;
382 entry->length = cl_ntoh32(cqe->byte_cnt);
384 case MTHCA_OPCODE_ATOMIC_FA:
385 entry->wc_type = IB_WC_FETCH_ADD;
386 entry->length = cl_ntoh32(cqe->byte_cnt);
388 case MTHCA_OPCODE_BIND_MW:
389 entry->wc_type = IB_WC_MW_BIND;
392 /* assume it's a send completion */
393 entry->wc_type = IB_WC_SEND;
397 entry->length = cl_ntoh32(cqe->byte_cnt);
398 switch (cqe->opcode & 0x1f) {
399 case IBV_OPCODE_SEND_LAST_WITH_IMMEDIATE:
400 case IBV_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
401 entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;
402 entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec;
403 entry->wc_type = IB_WC_RECV;
405 case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
406 case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
407 entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;
408 entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec;
409 entry->wc_type = IB_WC_RECV;
412 entry->recv.ud.recv_opt = 0;
413 entry->wc_type = IB_WC_RECV;
416 entry->recv.ud.remote_lid = cqe->rlid;
417 entry->recv.ud.remote_qp = cqe->rqpn & 0xffffff00;
418 entry->recv.ud.pkey_index = (uint16_t)(cl_ntoh32(cqe->imm_etype_pkey_eec) >> 16);
419 entry->recv.ud.remote_sl = cl_ntoh16(cqe->sl_g_mlpath) >> 12;
420 entry->recv.ud.path_bits = cl_ntoh16(cqe->sl_g_mlpath) & 0x7f;
421 entry->recv.ud.recv_opt |= cl_ntoh16(cqe->sl_g_mlpath) & 0x80 ?
422 IB_RECV_OPT_GRH_VALID : 0;
427 err = handle_error_cqe(cq, *cur_qp, wqe_index, is_send,
428 (struct mthca_err_cqe *) cqe,
432 entry->status = IB_WCS_SUCCESS;
435 if (likely(free_cqe)) {
441 UVP_EXIT(UVP_DBG_CQ);
445 int mthca_poll_cq(struct ibv_cq *ibcq, int num_entries, struct _ib_wc *entry)
447 struct mthca_cq *cq = to_mcq(ibcq);
448 struct mthca_qp *qp = NULL;
453 cl_spinlock_acquire(&cq->lock);
455 for (npolled = 0; npolled < num_entries; ++npolled) {
456 err = mthca_poll_one(cq, &qp, &freed, entry + npolled);
463 update_cons_index(cq, freed);
466 cl_spinlock_release(&cq->lock);
468 return (err == 0 || err == -EAGAIN) ? npolled : err;
471 int mthca_poll_cq_list(
472 IN struct ibv_cq *ibcq,
473 IN OUT struct _ib_wc** const pp_free_wclist,
474 OUT struct _ib_wc** const pp_done_wclist )
476 struct mthca_cq *cq = to_mcq(ibcq);
477 struct mthca_qp *qp = NULL;
480 ib_wc_t *wc_p, **next_pp;
483 cl_spinlock_acquire(&cq->lock);
486 next_pp = pp_done_wclist;
487 wc_p = *pp_free_wclist;
490 err = mthca_poll_one(cq, &qp, &freed, wc_p);
494 // prepare for the next loop
496 next_pp = &wc_p->p_next;
500 // prepare the results
501 *pp_free_wclist = wc_p; /* Set the head of the free list. */
502 *next_pp = NULL; /* Clear the tail of the done list. */
504 // update consumer index
507 update_cons_index(cq, freed);
510 cl_spinlock_release(&cq->lock);
511 return (err == 0 || err == -EAGAIN)? 0 : err;
514 int mthca_tavor_arm_cq(struct ibv_cq *cq, enum ib_cq_notify notify)
516 uint32_t doorbell[2];
518 doorbell[0] = cl_hton32((notify == IB_CQ_SOLICITED ?
519 MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
520 MTHCA_TAVOR_CQ_DB_REQ_NOT) |
522 doorbell[1] = 0xffffffff;
524 mthca_write64(doorbell, to_mctx(cq->context), MTHCA_CQ_DOORBELL);
529 int mthca_arbel_arm_cq(struct ibv_cq *ibvcq, enum ib_cq_notify notify)
531 struct mthca_cq *cq = to_mcq(ibvcq);
532 uint32_t doorbell[2];
536 sn = *cq->p_u_arm_sn & 3;
537 ci = cl_hton32(cq->cons_index);
540 doorbell[1] = cl_hton32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
541 (notify == IB_CQ_SOLICITED ? 1 : 2));
543 mthca_write_db_rec(doorbell, cq->arm_db);
546 * Make sure that the doorbell record in host memory is
547 * written before ringing the doorbell via PCI MMIO.
551 doorbell[0] = cl_hton32((sn << 28) |
552 (notify == IB_CQ_SOLICITED ?
553 MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
554 MTHCA_ARBEL_CQ_DB_REQ_NOT) |
558 mthca_write64(doorbell, to_mctx(ibvcq->context), MTHCA_CQ_DOORBELL);
563 static inline int is_recv_cqe(struct mthca_cqe *cqe)
565 if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
566 MTHCA_ERROR_CQE_OPCODE_MASK)
567 return !(cqe->opcode & 0x01);
569 return !(cqe->is_send & 0x80);
572 void mthca_cq_clean(struct mthca_cq *cq, uint32_t qpn, struct mthca_srq *srq)
574 struct mthca_cqe *cqe;
578 cl_spinlock_acquire(&cq->lock);
581 * First we need to find the current producer index, so we
582 * know where to start cleaning from. It doesn't matter if HW
583 * adds new entries after this loop -- the QP we're worried
584 * about is already in RESET, so the new entries won't come
585 * from our QP and therefore don't need to be checked.
587 for (prod_index = cq->cons_index;
588 cqe_sw(cq, prod_index & cq->ibv_cq.cqe);
590 if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
594 * Now sweep backwards through the CQ, removing CQ entries
595 * that match our QP by copying older entries on top of them.
597 while ((int) --prod_index - (int) cq->cons_index >= 0) {
598 cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
599 if (cqe->my_qpn == cl_hton32(qpn)) {
600 if (srq && is_recv_cqe(cqe))
601 mthca_free_srq_wqe(srq,
602 cl_ntoh32(cqe->wqe) >> srq->wqe_shift);
605 memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe),
606 cqe, MTHCA_CQ_ENTRY_SIZE);
611 cq->cons_index += nfreed;
612 update_cons_index(cq, nfreed);
615 cl_spinlock_release(&cq->lock);
618 void mthca_init_cq_buf(struct mthca_cq *cq, int nent)
622 for (i = 0; i < nent; ++i)
623 set_cqe_hw(get_cqe(cq, i));