[MTHCA, IBAL] added FMR support; [MTHCA] 1. fixed (and now works) "livefish" support;
[mirror/winof/.git] / hw / mthca / user / mlnx_uvp_qp.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: qp.c 4214 2005-11-29 17:43:08Z roland $
34  */
35
36 #include <mt_l2w.h>
37 #include "mlnx_uvp.h"
38 #include "mlnx_uvp_doorbell.h"
39 #include "mlnx_uvp_wqe.h"
40 #include "mlnx_ual_data.h"
41
42 #if defined(EVENT_TRACING)
43 #include "mlnx_uvp_qp.tmh"
44 #endif
45
46 static const uint8_t mthca_opcode[] = {
47         MTHCA_OPCODE_RDMA_WRITE,
48         MTHCA_OPCODE_RDMA_WRITE_IMM,
49         MTHCA_OPCODE_SEND,
50         MTHCA_OPCODE_SEND_IMM,
51         MTHCA_OPCODE_RDMA_READ,
52         MTHCA_OPCODE_ATOMIC_CS,
53         MTHCA_OPCODE_ATOMIC_FA
54 };
55
56 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
57 {
58         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
59
60         switch (wr->wr_type) {
61                 case WR_SEND: 
62                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
63                         break;
64                 case WR_RDMA_WRITE:     
65                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
66                         break;
67                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
68                 case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;
69                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
70                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;
71         }
72         return opcode;
73 }
74
75
76 static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)
77 {
78         net32_t *wqe = wqe_ptr;
79
80         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
81         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));
82         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
83                 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
84         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
85                 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
86         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
87                 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
88         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
89                 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
90
91 }
92 static void *get_recv_wqe(struct mthca_qp *qp, int n)
93 {
94         return qp->buf + (n << qp->rq.wqe_shift);
95 }
96
97 static void *get_send_wqe(struct mthca_qp *qp, int n)
98 {
99         void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
100         UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,
101                 ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",
102                  wqe_addr, qp->buf, qp->send_wqe_offset, n, 
103                 qp->sq.wqe_shift));
104         
105         return wqe_addr;
106 }
107
108 void mthca_init_qp_indices(struct mthca_qp *qp)
109 {
110         qp->sq.next_ind  = 0;
111         qp->sq.last_comp = qp->sq.max - 1;
112         qp->sq.head      = 0;
113         qp->sq.tail      = 0;
114         qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);
115
116         qp->rq.next_ind  = 0;
117         qp->rq.last_comp = qp->rq.max - 1;
118         qp->rq.head      = 0;
119         qp->rq.tail      = 0;
120         qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);
121 }
122
123 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
124 {
125         unsigned cur;
126
127         cur = wq->head - wq->tail;
128         if ((int)(cur + nreq) < wq->max)
129                 return 0;
130
131         cl_spinlock_acquire(&cq->lock);
132         cur = wq->head - wq->tail;
133         cl_spinlock_release(&cq->lock);
134
135         return (int)(cur + nreq) >= wq->max;
136 }
137
138
139 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
140                           struct _ib_send_wr **bad_wr)
141 {
142         struct mthca_qp *qp = to_mqp(ibqp);
143         uint8_t *wqe;
144         uint8_t *prev_wqe;
145         int ret = 0;
146         int nreq;
147         int i;
148         int size;
149         int size0 = 0;
150         uint32_t f0 = 0;
151         int ind;
152         int op0 = 0;
153         enum ib_wr_opcode opcode;
154         
155         UVP_ENTER(UVP_DBG_QP);
156         cl_spinlock_acquire(&qp->sq.lock);
157
158         /* XXX check that state is OK to post send */
159
160         ind = qp->sq.next_ind;
161
162         if(ibqp->state == IBV_QPS_RESET) {
163                 ret = -EBUSY;
164                 if (bad_wr)
165                         *bad_wr = wr;
166                 goto err_busy;
167         }
168         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
169
170                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
171                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"
172                                         " %d max, %d nreq)\n", ibqp->qp_num,
173                                         qp->sq.head, qp->sq.tail,
174                                         qp->sq.max, nreq));
175                         ret = -ENOMEM;
176                         if (bad_wr)
177                                 *bad_wr = wr;
178                         goto out;
179                 }
180
181                 wqe = get_send_wqe(qp, ind);
182                 prev_wqe = qp->sq.last;
183                 qp->sq.last = wqe;
184                 opcode = conv_ibal_wr_opcode(wr);
185                 if (opcode == MTHCA_OPCODE_INVALID) {
186                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
187                         ret = -EINVAL;
188                         if (bad_wr)
189                                 *bad_wr = wr;
190                         goto out;
191                 }
192
193
194                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
195                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
196                 ((struct mthca_next_seg *) wqe)->flags =
197                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
198                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
199                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
200                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
201                         cl_hton32(1);
202                 if (opcode == MTHCA_OPCODE_SEND_IMM||
203                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
204                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
205
206                 wqe += sizeof (struct mthca_next_seg);
207                 size = sizeof (struct mthca_next_seg) / 16;
208
209
210                 switch (ibqp->qp_type) {
211                 case IB_QPT_RELIABLE_CONN:
212                         switch (opcode) {
213                         case MTHCA_OPCODE_ATOMIC_CS:
214                         case MTHCA_OPCODE_ATOMIC_FA:
215                                 ((struct mthca_raddr_seg *) wqe)->raddr =
216                                         cl_hton64(wr->remote_ops.vaddr);
217                                 ((struct mthca_raddr_seg *) wqe)->rkey =
218                                         wr->remote_ops.rkey;
219                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
220
221                                 wqe += sizeof (struct mthca_raddr_seg);
222
223                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
224                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
225                                                 cl_hton64(wr->remote_ops.atomic2);
226                                         ((struct mthca_atomic_seg *) wqe)->compare =
227                                                 cl_hton64(wr->remote_ops.atomic1);
228                                 } else {
229                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
230                                                 cl_hton64(wr->remote_ops.atomic1);
231                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
232                                 }
233
234                                 wqe += sizeof (struct mthca_atomic_seg);
235                                 size += (sizeof (struct mthca_raddr_seg) +
236                                          sizeof (struct mthca_atomic_seg)) / 16;
237                                 break;
238
239                         case MTHCA_OPCODE_RDMA_WRITE:
240                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
241                         case MTHCA_OPCODE_RDMA_READ:
242                                 ((struct mthca_raddr_seg *) wqe)->raddr =
243                                         cl_hton64(wr->remote_ops.vaddr);
244                                 ((struct mthca_raddr_seg *) wqe)->rkey =
245                                         wr->remote_ops.rkey;
246                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
247                                 wqe += sizeof (struct mthca_raddr_seg);
248                                 size += sizeof (struct mthca_raddr_seg) / 16;
249                                 break;
250
251                         default:
252                                 /* No extra segments required for sends */
253                                 break;
254                         }
255
256                         break;
257
258                 case IB_QPT_UNRELIABLE_CONN:
259                         switch (opcode) {
260                         case MTHCA_OPCODE_RDMA_WRITE:
261                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
262                                 ((struct mthca_raddr_seg *) wqe)->raddr =
263                                         cl_hton64(wr->remote_ops.vaddr);
264                                 ((struct mthca_raddr_seg *) wqe)->rkey =
265                                         wr->remote_ops.rkey;
266                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
267                                 wqe += sizeof (struct mthca_raddr_seg);
268                                 size += sizeof (struct mthca_raddr_seg) / 16;
269                                 break;
270
271                         default:
272                                 /* No extra segments required for sends */
273                                 break;
274                         }
275
276                         break;
277
278                 case IB_QPT_UNRELIABLE_DGRM:
279                         {
280                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
281                                 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
282                                         cl_hton32(ah->key);
283                                 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
284                                         cl_hton64((uint64_t)ah->av);
285                                 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
286                                 ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
287
288                                 wqe += sizeof (struct mthca_tavor_ud_seg);
289                                 size += sizeof (struct mthca_tavor_ud_seg) / 16;
290                                 break;
291                         }
292
293                 default:
294                         break;
295                 }
296
297                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
298                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));
299                         ret = -ERANGE;
300                         if (bad_wr)
301                                 *bad_wr = wr;
302                         goto out;
303                 }
304 //TODO sleybo:
305                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
306                         if (wr->num_ds) {
307                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
308                                 int s = 0;
309
310                                 wqe += sizeof *seg;
311                                 for (i = 0; i < (int)wr->num_ds; ++i) {
312                                         struct _ib_local_ds *sge = &wr->ds_array[i];
313
314                                         s += sge->length;
315
316                                         if (s > qp->max_inline_data) {
317                                                 ret = -1;
318                                                 if (bad_wr)
319                                                         *bad_wr = wr;
320                                                 goto out;
321                                         }
322
323                                         memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
324                                                sge->length);
325                                         wqe += sge->length;
326                                 }
327
328                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
329                                 size += align(s + sizeof *seg, 16) / 16;
330                         }
331                 } else {
332                         for (i = 0; i < (int)wr->num_ds; ++i) {
333                                 ((struct mthca_data_seg *) wqe)->byte_count =
334                                         cl_hton32(wr->ds_array[i].length);
335                                 ((struct mthca_data_seg *) wqe)->lkey =
336                                         cl_hton32(wr->ds_array[i].lkey);
337                                 ((struct mthca_data_seg *) wqe)->addr =
338                                         cl_hton64(wr->ds_array[i].vaddr);
339                                 wqe += sizeof (struct mthca_data_seg);
340                                 size += sizeof (struct mthca_data_seg) / 16;
341                         }
342                 }
343
344                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
345
346                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
347                         cl_hton32(((ind << qp->sq.wqe_shift) +
348                         qp->send_wqe_offset) |opcode);
349                 
350                 wmb();
351                 
352                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
353                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
354                         ((wr->send_opt& IB_SEND_OPT_FENCE) ?
355                          MTHCA_NEXT_FENCE : 0));
356
357                 if (!size0) {
358                         size0 = size;
359                         op0   = opcode;
360                 }
361                 
362                 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);
363                 
364                 ++ind;
365                 if (unlikely(ind >= qp->sq.max))
366                         ind -= qp->sq.max;
367
368         }
369
370 out:
371         if (likely(nreq)) {
372                 uint32_t doorbell[2];
373
374                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
375                                      qp->send_wqe_offset) | f0 | op0);
376                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
377
378                 wmb();
379
380                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
381         }
382
383         qp->sq.next_ind = ind;
384         qp->sq.head    += nreq;
385
386 err_busy:
387         cl_spinlock_release(&qp->sq.lock);
388         
389         UVP_EXIT(UVP_DBG_QP);
390         return ret;
391 }
392
393
394 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
395                           struct _ib_recv_wr **bad_wr)
396 {
397         struct mthca_qp *qp = to_mqp(ibqp);
398         uint32_t doorbell[2];
399         int ret = 0;
400         int nreq;
401         int i;
402         int size;
403         int size0 = 0;
404         int ind;
405         uint8_t *wqe;
406         uint8_t *prev_wqe;
407         
408         UVP_ENTER(UVP_DBG_QP);
409         
410         cl_spinlock_acquire(&qp->rq.lock);
411
412         /* XXX check that state is OK to post receive */
413         
414         ind = qp->rq.next_ind;
415         if(ibqp->state == IBV_QPS_RESET) {
416                 ret = -EBUSY;
417                 if (bad_wr)
418                         *bad_wr = wr;
419                 goto err_busy;
420         }
421         
422         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
423                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
424                         nreq = 0;
425
426                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
427                         doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct 
428
429                         /*
430                          * Make sure that descriptors are written
431                          * before doorbell is rung.
432                          */
433                         mb();
434
435                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
436
437                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
438                         size0 = 0;
439                 }
440
441                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
442                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"
443                                         " %d max, %d nreq)\n", ibqp->qp_num,
444                                         qp->rq.head, qp->rq.tail,
445                                         qp->rq.max, nreq));
446                         ret = -ENOMEM;
447                         if (bad_wr)
448                                 *bad_wr = wr;
449                         goto out;
450                 }
451
452                 wqe = get_recv_wqe(qp, ind);
453                 prev_wqe = qp->rq.last;
454                 qp->rq.last = wqe;
455
456                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
457                 ((struct mthca_next_seg *) wqe)->ee_nds =
458                         cl_hton32(MTHCA_NEXT_DBD);
459                 ((struct mthca_next_seg *) wqe)->flags =
460                         cl_hton32(MTHCA_NEXT_CQ_UPDATE);
461
462                 wqe += sizeof (struct mthca_next_seg);
463                 size = sizeof (struct mthca_next_seg) / 16;
464
465                 if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {
466                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));
467                         ret = -ERANGE;
468                         if (bad_wr)
469                                 *bad_wr = wr;
470                         goto out;
471                 }
472
473                 for (i = 0; i < (int)wr->num_ds; ++i) {
474                         ((struct mthca_data_seg *) wqe)->byte_count =
475                                 cl_hton32(wr->ds_array[i].length);
476                         ((struct mthca_data_seg *) wqe)->lkey =
477                                 cl_hton32(wr->ds_array[i].lkey);
478                         ((struct mthca_data_seg *) wqe)->addr =
479                                 cl_hton64(wr->ds_array[i].vaddr);
480                         wqe += sizeof (struct mthca_data_seg);
481                         size += sizeof (struct mthca_data_seg) / 16;
482                 }
483
484                 qp->wrid[ind] = wr->wr_id;
485
486                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
487                         cl_hton32((ind << qp->rq.wqe_shift) | 1);
488                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
489                         cl_hton32(MTHCA_NEXT_DBD | size);
490
491                 if (!size0)
492                         size0 = size;
493
494                 ++ind;
495                 if (unlikely(ind >= qp->rq.max))
496                         ind -= qp->rq.max;
497         }
498
499 out:
500         if (likely(nreq)) {
501                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
502                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));
503
504                 /*
505                  * Make sure that descriptors are written before
506                  * doorbell is rung.
507                  */
508                 mb();
509
510                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
511         }
512
513         qp->rq.next_ind = ind;
514         qp->rq.head    += nreq;
515
516 err_busy:
517         cl_spinlock_release(&qp->rq.lock);
518         UVP_EXIT(UVP_DBG_QP);
519         return ret;
520 }
521
522 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
523                           struct _ib_send_wr **bad_wr)
524 {
525         struct mthca_qp *qp = to_mqp(ibqp);
526         uint32_t doorbell[2];
527         uint8_t *wqe;
528         uint8_t *prev_wqe;
529         int ret = 0;
530         int nreq;       
531         int i;
532         int size;
533         int size0 = 0;
534         uint32_t f0 = 0;
535         int ind;
536         uint8_t op0 = 0;
537         enum ib_wr_opcode opcode;
538         
539         UVP_ENTER(UVP_DBG_QP);
540         
541         cl_spinlock_acquire(&qp->sq.lock);
542
543         /* XXX check that state is OK to post send */
544
545         ind = qp->sq.head & (qp->sq.max - 1);
546         if(ibqp->state == IBV_QPS_RESET) {
547                 ret = -EBUSY;
548                 if (bad_wr)
549                         *bad_wr = wr;
550                 goto err_busy;
551         }
552         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
553                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
554                         nreq = 0;
555
556                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
557                                             ((qp->sq.head & 0xffff) << 8) | f0 | op0);
558                         doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
559                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
560                         size0 = 0;
561
562                         /*
563                          * Make sure that descriptors are written before
564                          * doorbell record.
565                          */
566                         wmb();
567                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
568
569                         /*
570                          * Make sure doorbell record is written before we
571                          * write MMIO send doorbell.
572                          */
573                         wmb();
574                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
575
576                 }
577
578                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
579                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"
580                                         " %d max, %d nreq)\n", ibqp->qp_num,
581                                         qp->sq.head, qp->sq.tail,
582                                         qp->sq.max, nreq));                     
583                         ret = -ENOMEM;
584                         if (bad_wr)
585                                 *bad_wr = wr;
586                         goto out;
587                 }
588
589                 wqe = get_send_wqe(qp, ind);
590                 prev_wqe = qp->sq.last;
591                 qp->sq.last = wqe;
592                 opcode = conv_ibal_wr_opcode(wr);
593
594                 ((struct mthca_next_seg *) wqe)->flags =
595                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
596                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
597                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
598                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
599                         cl_hton32(1);
600                 if (opcode == MTHCA_OPCODE_SEND_IMM||
601                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
602                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
603
604                 wqe += sizeof (struct mthca_next_seg);
605                 size = sizeof (struct mthca_next_seg) / 16;
606
607                 switch (ibqp->qp_type) {
608                 case IB_QPT_RELIABLE_CONN:
609                         switch (opcode) {
610                         case MTHCA_OPCODE_ATOMIC_CS:
611                         case MTHCA_OPCODE_ATOMIC_FA:
612                                 ((struct mthca_raddr_seg *) wqe)->raddr =
613                                         cl_hton64(wr->remote_ops.vaddr);
614                                 ((struct mthca_raddr_seg *) wqe)->rkey =
615                                         wr->remote_ops.rkey;
616                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
617
618                                 wqe += sizeof (struct mthca_raddr_seg);
619
620                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
621                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
622                                                 cl_hton64(wr->remote_ops.atomic2);
623                                         ((struct mthca_atomic_seg *) wqe)->compare =
624                                                 cl_hton64(wr->remote_ops.atomic1);
625                                 } else {
626                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
627                                                 cl_hton64(wr->remote_ops.atomic1);
628                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
629                                 }
630
631                                 wqe += sizeof (struct mthca_atomic_seg);
632                                 size += (sizeof (struct mthca_raddr_seg) +
633                                          sizeof (struct mthca_atomic_seg)) / 16;
634                                 break;
635
636                         case MTHCA_OPCODE_RDMA_READ:
637                         case MTHCA_OPCODE_RDMA_WRITE:
638                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
639                                 ((struct mthca_raddr_seg *) wqe)->raddr =
640                                         cl_hton64(wr->remote_ops.vaddr);
641                                 ((struct mthca_raddr_seg *) wqe)->rkey =
642                                         wr->remote_ops.rkey;
643                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
644                                 wqe += sizeof (struct mthca_raddr_seg);
645                                 size += sizeof (struct mthca_raddr_seg) / 16;
646                                 break;
647
648                         default:
649                                 /* No extra segments required for sends */
650                                 break;
651                         }
652
653                         break;
654
655                 case IB_QPT_UNRELIABLE_CONN:
656                         switch (opcode) {
657                         case MTHCA_OPCODE_RDMA_WRITE:
658                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
659                                 ((struct mthca_raddr_seg *) wqe)->raddr =
660                                         cl_hton64(wr->remote_ops.vaddr);
661                                 ((struct mthca_raddr_seg *) wqe)->rkey =
662                                         wr->remote_ops.rkey;
663                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
664                                 wqe += sizeof (struct mthca_raddr_seg);
665                                 size += sizeof (struct mthca_raddr_seg) / 16;
666                                 break;
667
668                         default:
669                                 /* No extra segments required for sends */
670                                 break;
671                         }
672
673                         break;
674
675                 case IB_QPT_UNRELIABLE_DGRM:
676                         {
677                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
678                                 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
679                                        ah->av, sizeof ( struct mthca_av));
680                                 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
681                                 ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
682
683
684                                 wqe += sizeof (struct mthca_arbel_ud_seg);
685                                 size += sizeof (struct mthca_arbel_ud_seg) / 16;
686                                 break;
687                         }
688
689                 default:
690                         break;
691                 }
692
693                 if ((int)wr->num_ds > qp->sq.max_gs) {
694                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));
695                         ret = -ERANGE;
696                         if (bad_wr)
697                                 *bad_wr = wr;
698                         goto out;
699                 }
700
701                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
702                         if (wr->num_ds) {
703                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
704                                 int s = 0;
705
706                                 wqe += sizeof *seg;
707                                 for (i = 0; i < (int)wr->num_ds; ++i) {
708                                         struct _ib_local_ds *sge = &wr->ds_array[i];
709
710                                         s += sge->length;
711
712                                         if (s > qp->max_inline_data) {
713                                                 ret = -1;
714                                                 if (bad_wr)
715                                                         *bad_wr = wr;
716                                                 goto out;
717                                         }
718
719                                         memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
720                                                sge->length);
721                                         wqe += sge->length;
722                                 }
723
724                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
725                                 size += align(s + sizeof *seg, 16) / 16;
726                         }
727                 } else {
728
729                         for (i = 0; i < (int)wr->num_ds; ++i) {
730                                 ((struct mthca_data_seg *) wqe)->byte_count =
731                                         cl_hton32(wr->ds_array[i].length);
732                                 ((struct mthca_data_seg *) wqe)->lkey =
733                                         cl_hton32(wr->ds_array[i].lkey);
734                                 ((struct mthca_data_seg *) wqe)->addr =
735                                         cl_hton64(wr->ds_array[i].vaddr);
736                                 wqe += sizeof (struct mthca_data_seg);
737                                 size += sizeof (struct mthca_data_seg) / 16;
738                         }
739 //TODO do this also in kernel
740 //                      size += wr->num_ds * (sizeof *seg / 16);
741                 }
742
743                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
744
745                 if (opcode == MTHCA_OPCODE_INVALID) {
746                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
747                         ret = -EINVAL;
748                         if (bad_wr)
749                                 *bad_wr = wr;
750                         goto out;
751                 }
752
753                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
754                         cl_hton32(((ind << qp->sq.wqe_shift) +
755                                qp->send_wqe_offset) |
756                               opcode);
757                 wmb();
758                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
759                         cl_hton32(MTHCA_NEXT_DBD | size);
760                         cl_hton32(MTHCA_NEXT_DBD | size |
761                           ((wr->send_opt & IB_SEND_OPT_FENCE) ?
762                                                    MTHCA_NEXT_FENCE : 0));
763
764                 if (!size0) {
765                         size0 = size;
766                         op0   = opcode;
767                 }
768
769                 ++ind;
770                 if (unlikely(ind >= qp->sq.max))
771                         ind -= qp->sq.max;
772         }
773
774 out:
775         if (likely(nreq)) {
776                 doorbell[0] = cl_hton32((nreq << 24) |
777                                     ((qp->sq.head & 0xffff) << 8) | f0 | op0);
778                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
779
780                 qp->sq.head += nreq;
781
782                 /*
783                  * Make sure that descriptors are written before
784                  * doorbell record.
785                  */
786                 wmb();
787                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
788
789                 /*
790                  * Make sure doorbell record is written before we
791                  * write MMIO send doorbell.
792                  */
793                 wmb();
794                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
795         }
796
797 err_busy:
798         cl_spinlock_release(&qp->sq.lock);
799
800         UVP_EXIT(UVP_DBG_QP);
801         
802         return ret;
803 }
804
805 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
806                           struct _ib_recv_wr **bad_wr)
807 {
808         struct mthca_qp *qp = to_mqp(ibqp);
809         int ret = 0;
810         int nreq;
811         int ind;
812         int i;
813         uint8_t *wqe;
814         
815         UVP_ENTER(UVP_DBG_QP);
816         
817         cl_spinlock_acquire(&qp->rq.lock);
818
819         /* XXX check that state is OK to post receive */
820
821         ind = qp->rq.head & (qp->rq.max - 1);
822         if(ibqp->state == IBV_QPS_RESET) {
823                 ret = -EBUSY;
824                 if (bad_wr)
825                         *bad_wr = wr;
826                 goto err_busy;
827         }
828         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
829                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq
830                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"
831                                         " %d max, %d nreq)\n", ibqp->qp_num,
832                                         qp->rq.head, qp->rq.tail,
833                                         qp->rq.max, nreq));
834                         ret = -ENOMEM;
835                         if (bad_wr)
836                                 *bad_wr = wr;
837                         goto out;
838                 }
839
840                 wqe = get_recv_wqe(qp, ind);
841
842                 ((struct mthca_next_seg *) wqe)->flags = 0;
843
844                 wqe += sizeof (struct mthca_next_seg);
845
846                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
847                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));
848                         ret = -ERANGE;
849                         if (bad_wr)
850                                 *bad_wr = wr;
851                         goto out;
852                 }
853
854                 for (i = 0; i < (int)wr->num_ds; ++i) {
855                         ((struct mthca_data_seg *) wqe)->byte_count =
856                                 cl_hton32(wr->ds_array[i].length);
857                         ((struct mthca_data_seg *) wqe)->lkey =
858                                 cl_hton32(wr->ds_array[i].lkey);
859                         ((struct mthca_data_seg *) wqe)->addr =
860                                 cl_hton64(wr->ds_array[i].vaddr);
861                         wqe += sizeof (struct mthca_data_seg);
862                 }
863
864                 if (i < qp->rq.max_gs) {
865                         ((struct mthca_data_seg *) wqe)->byte_count = 0;
866                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
867                         ((struct mthca_data_seg *) wqe)->addr = 0;
868                 }
869
870                 qp->wrid[ind] = wr->wr_id;
871
872                 ++ind;
873                 if (unlikely(ind >= qp->rq.max))
874                         ind -= qp->rq.max;
875         }
876 out:
877         if (likely(nreq)) {
878                 qp->rq.head += nreq;
879
880                 /*
881                  * Make sure that descriptors are written before
882                  * doorbell record.
883                  */
884                 mb();
885                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
886         }
887
888 err_busy:
889         cl_spinlock_release(&qp->rq.lock);
890         
891         UVP_EXIT(UVP_DBG_QP);
892         
893         return ret;
894 }
895
896 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
897                        ib_qp_type_t type, struct mthca_qp *qp)
898 {
899         int size;
900         int max_sq_sge;
901
902         qp->rq.max_gs    = cap->max_recv_sge;
903         qp->sq.max_gs    = cap->max_send_sge;
904         max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
905                                  sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
906         if (max_sq_sge < (int)cap->max_send_sge)
907                 max_sq_sge = cap->max_send_sge;
908
909         qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
910         if (!qp->wrid)
911                 return -1;
912
913         size = sizeof (struct mthca_next_seg) +
914                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
915
916         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
917              qp->rq.wqe_shift++)
918                 ; /* nothing */
919
920         size = max_sq_sge * sizeof (struct mthca_data_seg);
921         switch (type) {
922         case IB_QPT_UNRELIABLE_DGRM:
923                 size += mthca_is_memfree(pd->context) ?
924                         sizeof (struct mthca_arbel_ud_seg) :
925                         sizeof (struct mthca_tavor_ud_seg);
926                 break;
927
928         case IB_QPT_UNRELIABLE_CONN:
929                 size += sizeof (struct mthca_raddr_seg);
930                 break;
931
932         case IB_QPT_RELIABLE_CONN:
933                 size += sizeof (struct mthca_raddr_seg);
934                 /*
935                  * An atomic op will require an atomic segment, a
936                  * remote address segment and one scatter entry.
937                  */
938                 if (size < (sizeof (struct mthca_atomic_seg) +
939                             sizeof (struct mthca_raddr_seg) +
940                             sizeof (struct mthca_data_seg)))
941                         size = (sizeof (struct mthca_atomic_seg) +
942                                 sizeof (struct mthca_raddr_seg) +
943                                 sizeof (struct mthca_data_seg));
944                 break;
945
946         default:
947                 break;
948         }
949
950         /* Make sure that we have enough space for a bind request */
951         if (size < sizeof (struct mthca_bind_seg))
952                 size = sizeof (struct mthca_bind_seg);
953
954         size += sizeof (struct mthca_next_seg);
955
956         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
957              qp->sq.wqe_shift++)
958                 ; /* nothing */
959
960         qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
961                                     1 << qp->sq.wqe_shift);
962
963         qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
964
965         if (posix_memalign(&qp->buf, g_page_size,
966                            align(qp->buf_size, g_page_size))) {
967                 cl_free(qp->wrid);
968                 return -1;
969         }
970
971         memset(qp->buf, 0, qp->buf_size);
972
973         if (mthca_is_memfree(pd->context)) {
974                 struct mthca_next_seg *next;
975                 struct mthca_data_seg *scatter;
976                 int i;
977                 uint32_t sz;
978
979                 sz = cl_hton32((sizeof (struct mthca_next_seg) +
980                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
981
982                 for (i = 0; i < qp->rq.max; ++i) {
983                         next = get_recv_wqe(qp, i);
984                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
985                                              qp->rq.wqe_shift);
986                         next->ee_nds = sz;
987
988                         for (scatter = (void *) (next + 1);
989                              (void *) scatter < (void *) ((char *)next + (1 << qp->rq.wqe_shift));
990                              ++scatter)
991                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
992                 }
993
994                 for (i = 0; i < qp->sq.max; ++i) {
995                         next = get_send_wqe(qp, i);
996                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
997                                               qp->sq.wqe_shift) +
998                                              qp->send_wqe_offset);
999                 }
1000         }
1001
1002         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
1003         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
1004
1005         return 0;
1006 }
1007
1008 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
1009 {
1010         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1011
1012         if (ctx->qp_table[tind].refcnt)
1013                 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
1014         else
1015                 return NULL;
1016 }
1017
1018 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
1019 {
1020         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1021         int ret = 0;
1022
1023         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1024
1025         if (!ctx->qp_table[tind].refcnt) {
1026                 ctx->qp_table[tind].table = cl_malloc(
1027                         (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));
1028                 if (!ctx->qp_table[tind].table) {
1029                         ret = -1;
1030                         goto out;
1031                 }
1032         }
1033         ++ctx->qp_table[tind].refcnt;
1034         ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
1035
1036 out:
1037         ReleaseMutex( ctx->qp_table_mutex );
1038         return ret;
1039 }
1040
1041 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
1042 {
1043         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1044
1045         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1046
1047         if (!--ctx->qp_table[tind].refcnt)
1048                 cl_free(ctx->qp_table[tind].table);
1049         else
1050                 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
1051         
1052         ReleaseMutex( ctx->qp_table_mutex );
1053 }
1054
1055 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
1056                        int index, int *dbd, uint32_t *new_wqe)
1057 {
1058         struct mthca_next_seg *next;
1059
1060         /*
1061          * For SRQs, all WQEs generate a CQE, so we're always at the
1062          * end of the doorbell chain.
1063          */
1064         if (qp->ibv_qp.srq) {
1065                 *new_wqe = 0;
1066                 return 0;
1067         }
1068
1069         if (is_send)
1070                 next = get_send_wqe(qp, index);
1071         else
1072                 next = get_recv_wqe(qp, index);
1073
1074         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
1075         if (next->ee_nds & cl_hton32(0x3f))
1076                 *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |
1077                         (next->ee_nds & cl_hton32(0x3f));
1078         else
1079                 *new_wqe = 0;
1080
1081         return 0;
1082 }
1083