2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #include "mlnx_uvp_doorbell.h"
39 #include "mthca_wqe.h"
40 #include "mlnx_ual_data.h"
42 #if defined(EVENT_TRACING)
43 #include "mlnx_uvp_qp.tmh"
46 static const uint8_t mthca_opcode[] = {
47 MTHCA_OPCODE_RDMA_WRITE,
48 MTHCA_OPCODE_RDMA_WRITE_IMM,
50 MTHCA_OPCODE_SEND_IMM,
51 MTHCA_OPCODE_RDMA_READ,
52 MTHCA_OPCODE_ATOMIC_CS,
53 MTHCA_OPCODE_ATOMIC_FA
56 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
58 enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
60 switch (wr->wr_type) {
62 opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
65 opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
67 case WR_RDMA_READ: opcode = MTHCA_OPCODE_RDMA_READ; break;
68 case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;
69 case WR_FETCH_ADD: opcode = MTHCA_OPCODE_ATOMIC_FA; break;
70 default: opcode = MTHCA_OPCODE_INVALID;break;
76 static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)
78 net32_t *wqe = wqe_ptr;
80 (void) wqe; /* avoid warning if mthca_dbg compiled away... */
81 UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));
82 UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
83 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
84 UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
85 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
86 UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
87 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
88 UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
89 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
92 static void *get_recv_wqe(struct mthca_qp *qp, int n)
94 return qp->buf + (n << qp->rq.wqe_shift);
97 static void *get_send_wqe(struct mthca_qp *qp, int n)
99 void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
100 UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,
101 ("wqe %p, qp_buf %p, offset %#x, index %d, shift %d \n",
102 wqe_addr, qp->buf, qp->send_wqe_offset, n,
108 void mthca_init_qp_indices(struct mthca_qp *qp)
111 qp->sq.last_comp = qp->sq.max - 1;
114 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
117 qp->rq.last_comp = qp->rq.max - 1;
120 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
123 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
127 cur = wq->head - wq->tail;
128 if ((int)(cur + nreq) < wq->max)
131 cl_spinlock_acquire(&cq->lock);
132 cur = wq->head - wq->tail;
133 cl_spinlock_release(&cq->lock);
135 return (int)(cur + nreq) >= wq->max;
139 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
140 struct _ib_send_wr **bad_wr)
142 struct mthca_qp *qp = to_mqp(ibqp);
150 uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
153 enum ib_wr_opcode opcode;
155 UVP_ENTER(UVP_DBG_QP);
156 cl_spinlock_acquire(&qp->sq.lock);
158 /* XXX check that state is OK to post send */
160 ind = qp->sq.next_ind;
162 if(ibqp->state == IBV_QPS_RESET) {
169 for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
171 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
172 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"
173 " %d max, %d nreq)\n", ibqp->qp_num,
174 qp->sq.head, qp->sq.tail,
182 wqe = get_send_wqe(qp, ind);
183 prev_wqe = qp->sq.last;
185 opcode = conv_ibal_wr_opcode(wr);
186 if (opcode == MTHCA_OPCODE_INVALID) {
187 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
195 ((struct mthca_next_seg *) wqe)->nda_op = 0;
196 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
197 ((struct mthca_next_seg *) wqe)->flags =
198 ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
199 cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
200 ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
201 cl_hton32(MTHCA_NEXT_SOLICIT) : 0) |
203 if (opcode == MTHCA_OPCODE_SEND_IMM||
204 opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
205 ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
207 wqe += sizeof (struct mthca_next_seg);
208 size = sizeof (struct mthca_next_seg) / 16;
211 switch (ibqp->qp_type) {
212 case IB_QPT_RELIABLE_CONN:
214 case MTHCA_OPCODE_ATOMIC_CS:
215 case MTHCA_OPCODE_ATOMIC_FA:
216 ((struct mthca_raddr_seg *) wqe)->raddr =
217 cl_hton64(wr->remote_ops.vaddr);
218 ((struct mthca_raddr_seg *) wqe)->rkey =
220 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
222 wqe += sizeof (struct mthca_raddr_seg);
224 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
225 ((struct mthca_atomic_seg *) wqe)->swap_add =
226 cl_hton64(wr->remote_ops.atomic2);
227 ((struct mthca_atomic_seg *) wqe)->compare =
228 cl_hton64(wr->remote_ops.atomic1);
230 ((struct mthca_atomic_seg *) wqe)->swap_add =
231 cl_hton64(wr->remote_ops.atomic1);
232 ((struct mthca_atomic_seg *) wqe)->compare = 0;
235 wqe += sizeof (struct mthca_atomic_seg);
236 size += (sizeof (struct mthca_raddr_seg) +
237 sizeof (struct mthca_atomic_seg)) / 16;
240 case MTHCA_OPCODE_RDMA_WRITE:
241 case MTHCA_OPCODE_RDMA_WRITE_IMM:
242 case MTHCA_OPCODE_RDMA_READ:
243 ((struct mthca_raddr_seg *) wqe)->raddr =
244 cl_hton64(wr->remote_ops.vaddr);
245 ((struct mthca_raddr_seg *) wqe)->rkey =
247 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
248 wqe += sizeof (struct mthca_raddr_seg);
249 size += sizeof (struct mthca_raddr_seg) / 16;
253 /* No extra segments required for sends */
259 case IB_QPT_UNRELIABLE_CONN:
261 case MTHCA_OPCODE_RDMA_WRITE:
262 case MTHCA_OPCODE_RDMA_WRITE_IMM:
263 ((struct mthca_raddr_seg *) wqe)->raddr =
264 cl_hton64(wr->remote_ops.vaddr);
265 ((struct mthca_raddr_seg *) wqe)->rkey =
267 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
268 wqe += sizeof (struct mthca_raddr_seg);
269 size += sizeof (struct mthca_raddr_seg) / 16;
273 /* No extra segments required for sends */
279 case IB_QPT_UNRELIABLE_DGRM:
281 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
282 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
284 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
285 cl_hton64((uint64_t)ah->av);
286 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
287 ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
289 wqe += sizeof (struct mthca_tavor_ud_seg);
290 size += sizeof (struct mthca_tavor_ud_seg) / 16;
298 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
299 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));
306 if (wr->send_opt & IB_SEND_OPT_INLINE) {
308 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
312 for (i = 0; i < (int)wr->num_ds; ++i) {
313 struct _ib_local_ds *sge = &wr->ds_array[i];
317 if (s > (uint32_t)qp->max_inline_data) {
324 memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
329 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
330 size += align(s + sizeof *seg, 16) / 16;
333 for (i = 0; i < (int)wr->num_ds; ++i) {
334 ((struct mthca_data_seg *) wqe)->byte_count =
335 cl_hton32(wr->ds_array[i].length);
336 ((struct mthca_data_seg *) wqe)->lkey =
337 cl_hton32(wr->ds_array[i].lkey);
338 ((struct mthca_data_seg *) wqe)->addr =
339 cl_hton64(wr->ds_array[i].vaddr);
340 wqe += sizeof (struct mthca_data_seg);
341 size += sizeof (struct mthca_data_seg) / 16;
345 qp->wrid[ind + qp->rq.max] = wr->wr_id;
347 ((struct mthca_next_seg *) prev_wqe)->nda_op =
348 cl_hton32(((ind << qp->sq.wqe_shift) +
349 qp->send_wqe_offset) |opcode);
353 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
354 cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
355 ((wr->send_opt& IB_SEND_OPT_FENCE) ?
356 MTHCA_NEXT_FENCE : 0));
363 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);
366 if (unlikely(ind >= qp->sq.max))
373 uint32_t doorbell[2];
375 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
376 qp->send_wqe_offset) | f0 | op0);
377 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
381 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
384 qp->sq.next_ind = ind;
388 cl_spinlock_release(&qp->sq.lock);
390 UVP_EXIT(UVP_DBG_QP);
395 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
396 struct _ib_recv_wr **bad_wr)
398 struct mthca_qp *qp = to_mqp(ibqp);
399 uint32_t doorbell[2];
409 UVP_ENTER(UVP_DBG_QP);
411 cl_spinlock_acquire(&qp->rq.lock);
413 /* XXX check that state is OK to post receive */
415 ind = qp->rq.next_ind;
416 if(ibqp->state == IBV_QPS_RESET) {
423 for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
424 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
427 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
428 doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct
431 * Make sure that descriptors are written
432 * before doorbell is rung.
436 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
438 qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
442 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
443 UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"
444 " %d max, %d nreq)\n", ibqp->qp_num,
445 qp->rq.head, qp->rq.tail,
453 wqe = get_recv_wqe(qp, ind);
454 prev_wqe = qp->rq.last;
457 ((struct mthca_next_seg *) wqe)->nda_op = 0;
458 ((struct mthca_next_seg *) wqe)->ee_nds =
459 cl_hton32(MTHCA_NEXT_DBD);
460 ((struct mthca_next_seg *) wqe)->flags =
461 cl_hton32(MTHCA_NEXT_CQ_UPDATE);
463 wqe += sizeof (struct mthca_next_seg);
464 size = sizeof (struct mthca_next_seg) / 16;
466 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
467 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));
474 for (i = 0; i < (int)wr->num_ds; ++i) {
475 ((struct mthca_data_seg *) wqe)->byte_count =
476 cl_hton32(wr->ds_array[i].length);
477 ((struct mthca_data_seg *) wqe)->lkey =
478 cl_hton32(wr->ds_array[i].lkey);
479 ((struct mthca_data_seg *) wqe)->addr =
480 cl_hton64(wr->ds_array[i].vaddr);
481 wqe += sizeof (struct mthca_data_seg);
482 size += sizeof (struct mthca_data_seg) / 16;
485 qp->wrid[ind] = wr->wr_id;
487 ((struct mthca_next_seg *) prev_wqe)->nda_op =
488 cl_hton32((ind << qp->rq.wqe_shift) | 1);
489 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
490 cl_hton32(MTHCA_NEXT_DBD | size);
496 if (unlikely(ind >= qp->rq.max))
502 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
503 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));
506 * Make sure that descriptors are written before
511 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
514 qp->rq.next_ind = ind;
518 cl_spinlock_release(&qp->rq.lock);
519 UVP_EXIT(UVP_DBG_QP);
523 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
524 struct _ib_send_wr **bad_wr)
526 struct mthca_qp *qp = to_mqp(ibqp);
527 uint32_t doorbell[2];
535 uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
538 enum ib_wr_opcode opcode;
540 UVP_ENTER(UVP_DBG_QP);
542 cl_spinlock_acquire(&qp->sq.lock);
544 /* XXX check that state is OK to post send */
546 ind = qp->sq.head & (qp->sq.max - 1);
547 if(ibqp->state == IBV_QPS_RESET) {
554 for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
555 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
558 doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
559 ((qp->sq.head & 0xffff) << 8) | f0 | op0);
560 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
561 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
563 f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
566 * Make sure that descriptors are written before
570 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
573 * Make sure doorbell record is written before we
574 * write MMIO send doorbell.
577 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
581 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
582 UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"
583 " %d max, %d nreq)\n", ibqp->qp_num,
584 qp->sq.head, qp->sq.tail,
592 wqe = get_send_wqe(qp, ind);
593 prev_wqe = qp->sq.last;
595 opcode = conv_ibal_wr_opcode(wr);
597 ((struct mthca_next_seg *) wqe)->flags =
598 ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
599 cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
600 ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
601 cl_hton32(MTHCA_NEXT_SOLICIT) : 0) |
603 if (opcode == MTHCA_OPCODE_SEND_IMM||
604 opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
605 ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
607 wqe += sizeof (struct mthca_next_seg);
608 size = sizeof (struct mthca_next_seg) / 16;
610 switch (ibqp->qp_type) {
611 case IB_QPT_RELIABLE_CONN:
613 case MTHCA_OPCODE_ATOMIC_CS:
614 case MTHCA_OPCODE_ATOMIC_FA:
615 ((struct mthca_raddr_seg *) wqe)->raddr =
616 cl_hton64(wr->remote_ops.vaddr);
617 ((struct mthca_raddr_seg *) wqe)->rkey =
619 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
621 wqe += sizeof (struct mthca_raddr_seg);
623 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
624 ((struct mthca_atomic_seg *) wqe)->swap_add =
625 cl_hton64(wr->remote_ops.atomic2);
626 ((struct mthca_atomic_seg *) wqe)->compare =
627 cl_hton64(wr->remote_ops.atomic1);
629 ((struct mthca_atomic_seg *) wqe)->swap_add =
630 cl_hton64(wr->remote_ops.atomic1);
631 ((struct mthca_atomic_seg *) wqe)->compare = 0;
634 wqe += sizeof (struct mthca_atomic_seg);
635 size += (sizeof (struct mthca_raddr_seg) +
636 sizeof (struct mthca_atomic_seg)) / 16;
639 case MTHCA_OPCODE_RDMA_READ:
640 case MTHCA_OPCODE_RDMA_WRITE:
641 case MTHCA_OPCODE_RDMA_WRITE_IMM:
642 ((struct mthca_raddr_seg *) wqe)->raddr =
643 cl_hton64(wr->remote_ops.vaddr);
644 ((struct mthca_raddr_seg *) wqe)->rkey =
646 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
647 wqe += sizeof (struct mthca_raddr_seg);
648 size += sizeof (struct mthca_raddr_seg) / 16;
652 /* No extra segments required for sends */
658 case IB_QPT_UNRELIABLE_CONN:
660 case MTHCA_OPCODE_RDMA_WRITE:
661 case MTHCA_OPCODE_RDMA_WRITE_IMM:
662 ((struct mthca_raddr_seg *) wqe)->raddr =
663 cl_hton64(wr->remote_ops.vaddr);
664 ((struct mthca_raddr_seg *) wqe)->rkey =
666 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
667 wqe += sizeof (struct mthca_raddr_seg);
668 size += sizeof (struct mthca_raddr_seg) / 16;
672 /* No extra segments required for sends */
678 case IB_QPT_UNRELIABLE_DGRM:
680 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
681 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
682 ah->av, sizeof ( struct mthca_av));
683 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
684 ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
687 wqe += sizeof (struct mthca_arbel_ud_seg);
688 size += sizeof (struct mthca_arbel_ud_seg) / 16;
696 if ((int)wr->num_ds > qp->sq.max_gs) {
697 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));
704 if (wr->send_opt & IB_SEND_OPT_INLINE) {
706 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
710 for (i = 0; i < (int)wr->num_ds; ++i) {
711 struct _ib_local_ds *sge = &wr->ds_array[i];
715 if (s > (uint32_t)qp->max_inline_data) {
722 memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
727 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
728 size += align(s + sizeof *seg, 16) / 16;
732 for (i = 0; i < (int)wr->num_ds; ++i) {
733 ((struct mthca_data_seg *) wqe)->byte_count =
734 cl_hton32(wr->ds_array[i].length);
735 ((struct mthca_data_seg *) wqe)->lkey =
736 cl_hton32(wr->ds_array[i].lkey);
737 ((struct mthca_data_seg *) wqe)->addr =
738 cl_hton64(wr->ds_array[i].vaddr);
739 wqe += sizeof (struct mthca_data_seg);
740 size += sizeof (struct mthca_data_seg) / 16;
742 //TODO do this also in kernel
743 // size += wr->num_ds * (sizeof *seg / 16);
746 qp->wrid[ind + qp->rq.max] = wr->wr_id;
748 if (opcode == MTHCA_OPCODE_INVALID) {
749 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
756 ((struct mthca_next_seg *) prev_wqe)->nda_op =
757 cl_hton32(((ind << qp->sq.wqe_shift) +
758 qp->send_wqe_offset) |
761 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
762 cl_hton32(MTHCA_NEXT_DBD | size |
763 ((wr->send_opt & IB_SEND_OPT_FENCE) ?
764 MTHCA_NEXT_FENCE : 0));
772 if (unlikely(ind >= qp->sq.max))
778 doorbell[0] = cl_hton32((nreq << 24) |
779 ((qp->sq.head & 0xffff) << 8) | f0 | op0);
780 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
785 * Make sure that descriptors are written before
789 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
792 * Make sure doorbell record is written before we
793 * write MMIO send doorbell.
796 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
800 cl_spinlock_release(&qp->sq.lock);
802 UVP_EXIT(UVP_DBG_QP);
807 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
808 struct _ib_recv_wr **bad_wr)
810 struct mthca_qp *qp = to_mqp(ibqp);
817 UVP_ENTER(UVP_DBG_QP);
819 cl_spinlock_acquire(&qp->rq.lock);
821 /* XXX check that state is OK to post receive */
823 ind = qp->rq.head & (qp->rq.max - 1);
824 if(ibqp->state == IBV_QPS_RESET) {
830 for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
831 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq
832 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"
833 " %d max, %d nreq)\n", ibqp->qp_num,
834 qp->rq.head, qp->rq.tail,
842 wqe = get_recv_wqe(qp, ind);
844 ((struct mthca_next_seg *) wqe)->flags = 0;
846 wqe += sizeof (struct mthca_next_seg);
848 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
849 UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));
856 for (i = 0; i < (int)wr->num_ds; ++i) {
857 ((struct mthca_data_seg *) wqe)->byte_count =
858 cl_hton32(wr->ds_array[i].length);
859 ((struct mthca_data_seg *) wqe)->lkey =
860 cl_hton32(wr->ds_array[i].lkey);
861 ((struct mthca_data_seg *) wqe)->addr =
862 cl_hton64(wr->ds_array[i].vaddr);
863 wqe += sizeof (struct mthca_data_seg);
866 if (i < qp->rq.max_gs) {
867 ((struct mthca_data_seg *) wqe)->byte_count = 0;
868 ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
869 ((struct mthca_data_seg *) wqe)->addr = 0;
872 qp->wrid[ind] = wr->wr_id;
875 if (unlikely(ind >= qp->rq.max))
883 * Make sure that descriptors are written before
887 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
891 cl_spinlock_release(&qp->rq.lock);
893 UVP_EXIT(UVP_DBG_QP);
898 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
899 ib_qp_type_t type, struct mthca_qp *qp)
904 qp->rq.max_gs = cap->max_recv_sge;
905 qp->sq.max_gs = cap->max_send_sge;
906 max_sq_sge = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
907 sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
908 if (max_sq_sge < (int)cap->max_send_sge)
909 max_sq_sge = cap->max_send_sge;
911 qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
915 size = sizeof (struct mthca_next_seg) +
916 qp->rq.max_gs * sizeof (struct mthca_data_seg);
918 for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
922 size = max_sq_sge * sizeof (struct mthca_data_seg);
924 case IB_QPT_UNRELIABLE_DGRM:
925 size += mthca_is_memfree(pd->context) ?
926 sizeof (struct mthca_arbel_ud_seg) :
927 sizeof (struct mthca_tavor_ud_seg);
930 case IB_QPT_UNRELIABLE_CONN:
931 size += sizeof (struct mthca_raddr_seg);
934 case IB_QPT_RELIABLE_CONN:
935 size += sizeof (struct mthca_raddr_seg);
937 * An atomic op will require an atomic segment, a
938 * remote address segment and one scatter entry.
940 if (size < (sizeof (struct mthca_atomic_seg) +
941 sizeof (struct mthca_raddr_seg) +
942 sizeof (struct mthca_data_seg)))
943 size = (sizeof (struct mthca_atomic_seg) +
944 sizeof (struct mthca_raddr_seg) +
945 sizeof (struct mthca_data_seg));
952 /* Make sure that we have enough space for a bind request */
953 if (size < sizeof (struct mthca_bind_seg))
954 size = sizeof (struct mthca_bind_seg);
956 size += sizeof (struct mthca_next_seg);
958 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
962 qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
963 1 << qp->sq.wqe_shift);
965 qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
967 if (posix_memalign(&qp->buf, g_page_size,
968 align(qp->buf_size, g_page_size))) {
973 memset(qp->buf, 0, qp->buf_size);
975 if (mthca_is_memfree(pd->context)) {
976 struct mthca_next_seg *next;
977 struct mthca_data_seg *scatter;
981 sz = cl_hton32((sizeof (struct mthca_next_seg) +
982 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
984 for (i = 0; i < qp->rq.max; ++i) {
985 next = get_recv_wqe(qp, i);
986 next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
990 for (scatter = (void *) (next + 1);
991 (void *) scatter < (void *) ((char *)next + (1 << qp->rq.wqe_shift));
993 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
996 for (i = 0; i < qp->sq.max; ++i) {
997 next = get_send_wqe(qp, i);
998 next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
1000 qp->send_wqe_offset);
1004 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
1005 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
1010 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
1012 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1014 if (ctx->qp_table[tind].refcnt)
1015 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
1020 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
1022 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1025 WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1027 if (!ctx->qp_table[tind].refcnt) {
1028 ctx->qp_table[tind].table = cl_malloc(
1029 (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));
1030 if (!ctx->qp_table[tind].table) {
1035 ++ctx->qp_table[tind].refcnt;
1036 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
1039 ReleaseMutex( ctx->qp_table_mutex );
1043 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
1045 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1047 WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1049 if (!--ctx->qp_table[tind].refcnt)
1050 cl_free(ctx->qp_table[tind].table);
1052 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
1054 ReleaseMutex( ctx->qp_table_mutex );
1057 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
1058 int index, int *dbd, uint32_t *new_wqe)
1060 struct mthca_next_seg *next;
1063 * For SRQs, all WQEs generate a CQE, so we're always at the
1064 * end of the doorbell chain.
1066 if (qp->ibv_qp.srq) {
1072 next = get_send_wqe(qp, index);
1074 next = get_recv_wqe(qp, index);
1076 *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
1077 if (next->ee_nds & cl_hton32(0x3f))
1078 *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |
1079 (next->ee_nds & cl_hton32(0x3f));