16cfbb3b48062f701d6168a613f34ed7eed01ef2
[mirror/winof/.git] / hw / mthca / user / mlnx_uvp_qp.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: qp.c 4214 2005-11-29 17:43:08Z roland $
34  */
35
36 #include <mt_l2w.h>
37 #include "mlnx_uvp.h"
38 #include "mlnx_uvp_doorbell.h"
39 #include "mlnx_uvp_wqe.h"
40 #include "mlnx_ual_data.h"
41
42 #if defined(EVENT_TRACING)
43 #include "mlnx_uvp_qp.tmh"
44 #endif
45
46 static const uint8_t mthca_opcode[] = {
47         MTHCA_OPCODE_RDMA_WRITE,
48         MTHCA_OPCODE_RDMA_WRITE_IMM,
49         MTHCA_OPCODE_SEND,
50         MTHCA_OPCODE_SEND_IMM,
51         MTHCA_OPCODE_RDMA_READ,
52         MTHCA_OPCODE_ATOMIC_CS,
53         MTHCA_OPCODE_ATOMIC_FA
54 };
55
56 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
57 {
58         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
59
60         switch (wr->wr_type) {
61                 case WR_SEND: 
62                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
63                         break;
64                 case WR_RDMA_WRITE:     
65                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
66                         break;
67                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
68                 case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;
69                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
70                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;
71         }
72         return opcode;
73 }
74
75
76 static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)
77 {
78         net32_t *wqe = wqe_ptr;
79
80         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
81         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));
82         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
83                 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
84         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
85                 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
86         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
87                 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
88         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
89                 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
90
91 }
92 static void *get_recv_wqe(struct mthca_qp *qp, int n)
93 {
94         return qp->buf + (n << qp->rq.wqe_shift);
95 }
96
97 static void *get_send_wqe(struct mthca_qp *qp, int n)
98 {
99         void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
100         UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,
101                 ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",
102                  wqe_addr, qp->buf, qp->send_wqe_offset, n, 
103                 qp->sq.wqe_shift));
104         
105         return wqe_addr;
106 }
107
108 void mthca_init_qp_indices(struct mthca_qp *qp)
109 {
110         qp->sq.next_ind  = 0;
111         qp->sq.last_comp = qp->sq.max - 1;
112         qp->sq.head      = 0;
113         qp->sq.tail      = 0;
114         qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);
115
116         qp->rq.next_ind  = 0;
117         qp->rq.last_comp = qp->rq.max - 1;
118         qp->rq.head      = 0;
119         qp->rq.tail      = 0;
120         qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);
121 }
122
123 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
124 {
125         unsigned cur;
126
127         cur = wq->head - wq->tail;
128         if ((int)(cur + nreq) < wq->max)
129                 return 0;
130
131         cl_spinlock_acquire(&cq->lock);
132         cur = wq->head - wq->tail;
133         cl_spinlock_release(&cq->lock);
134
135         return (int)(cur + nreq) >= wq->max;
136 }
137
138
139 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
140                           struct _ib_send_wr **bad_wr)
141 {
142         struct mthca_qp *qp = to_mqp(ibqp);
143         uint8_t *wqe;
144         uint8_t *prev_wqe;
145         int ret = 0;
146         int nreq;
147         int i;
148         int size;
149         int size0 = 0;
150         uint32_t f0 = 0;
151         int ind;
152         int op0 = 0;
153         enum ib_wr_opcode opcode;
154         
155         UVP_ENTER(UVP_DBG_QP);
156         cl_spinlock_acquire(&qp->sq.lock);
157
158         /* XXX check that state is OK to post send */
159
160         ind = qp->sq.next_ind;
161
162         if(ibqp->state == IBV_QPS_RESET)
163                 return -EBUSY;
164         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
165
166                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
167                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"
168                                         " %d max, %d nreq)\n", ibqp->qp_num,
169                                         qp->sq.head, qp->sq.tail,
170                                         qp->sq.max, nreq));
171                         ret = -ENOMEM;
172                         *bad_wr = wr;
173                         goto out;
174                 }
175
176                 wqe = get_send_wqe(qp, ind);
177                 prev_wqe = qp->sq.last;
178                 qp->sq.last = wqe;
179                 opcode = conv_ibal_wr_opcode(wr);
180                 if (opcode == MTHCA_OPCODE_INVALID) {
181                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
182                         ret = -EINVAL;
183                         *bad_wr = wr;
184                         goto out;
185                 }
186
187
188                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
189                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
190                 ((struct mthca_next_seg *) wqe)->flags =
191                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
192                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
193                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
194                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
195                         cl_hton32(1);
196                 if (opcode == MTHCA_OPCODE_SEND_IMM||
197                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
198                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
199
200                 wqe += sizeof (struct mthca_next_seg);
201                 size = sizeof (struct mthca_next_seg) / 16;
202
203
204                 switch (ibqp->qp_type) {
205                 case IB_QPT_RELIABLE_CONN:
206                         switch (opcode) {
207                         case MTHCA_OPCODE_ATOMIC_CS:
208                         case MTHCA_OPCODE_ATOMIC_FA:
209                                 ((struct mthca_raddr_seg *) wqe)->raddr =
210                                         cl_hton64(wr->remote_ops.vaddr);
211                                 ((struct mthca_raddr_seg *) wqe)->rkey =
212                                         cl_hton32(wr->remote_ops.rkey);
213                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
214
215                                 wqe += sizeof (struct mthca_raddr_seg);
216
217                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
218                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
219                                                 cl_hton64(wr->remote_ops.atomic2);
220                                         ((struct mthca_atomic_seg *) wqe)->compare =
221                                                 cl_hton64(wr->remote_ops.atomic1);
222                                 } else {
223                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
224                                                 cl_hton64(wr->remote_ops.atomic1);
225                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
226                                 }
227
228                                 wqe += sizeof (struct mthca_atomic_seg);
229                                 size += (sizeof (struct mthca_raddr_seg) +
230                                          sizeof (struct mthca_atomic_seg)) / 16;
231                                 break;
232
233                         case MTHCA_OPCODE_RDMA_WRITE:
234                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
235                         case MTHCA_OPCODE_RDMA_READ:
236                                 ((struct mthca_raddr_seg *) wqe)->raddr =
237                                         cl_hton64(wr->remote_ops.vaddr);
238                                 ((struct mthca_raddr_seg *) wqe)->rkey =
239                                         cl_hton32(wr->remote_ops.rkey);
240                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
241                                 wqe += sizeof (struct mthca_raddr_seg);
242                                 size += sizeof (struct mthca_raddr_seg) / 16;
243                                 break;
244
245                         default:
246                                 /* No extra segments required for sends */
247                                 break;
248                         }
249
250                         break;
251
252                 case IB_QPT_UNRELIABLE_CONN:
253                         switch (opcode) {
254                         case MTHCA_OPCODE_RDMA_WRITE:
255                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
256                                 ((struct mthca_raddr_seg *) wqe)->raddr =
257                                         cl_hton64(wr->remote_ops.vaddr);
258                                 ((struct mthca_raddr_seg *) wqe)->rkey =
259                                         cl_hton32(wr->remote_ops.rkey);
260                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
261                                 wqe += sizeof (struct mthca_raddr_seg);
262                                 size += sizeof (struct mthca_raddr_seg) / 16;
263                                 break;
264
265                         default:
266                                 /* No extra segments required for sends */
267                                 break;
268                         }
269
270                         break;
271
272                 case IB_QPT_UNRELIABLE_DGRM:
273                         {
274                                 struct ibv_ah *ibv_ah = ((mlnx_ual_av_info_t*)wr->dgrm.ud.h_av)->ibv_ah;
275                                 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
276                                         cl_hton32(to_mah(ibv_ah)->key);
277                                 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
278                                         cl_hton64((uint64_t)to_mah(ibv_ah)->av);
279                                 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
280                                 ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
281
282                                 wqe += sizeof (struct mthca_tavor_ud_seg);
283                                 size += sizeof (struct mthca_tavor_ud_seg) / 16;
284                                 break;
285                         }
286
287                 default:
288                         break;
289                 }
290
291                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
292                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));
293                         ret = -ERANGE;
294                         *bad_wr = wr;
295                         goto out;
296                 }
297 //TODO sleybo:
298                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
299                         if (wr->num_ds) {
300                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
301                                 int s = 0;
302
303                                 wqe += sizeof *seg;
304                                 for (i = 0; i < (int)wr->num_ds; ++i) {
305                                         struct _ib_local_ds *sge = &wr->ds_array[i];
306
307                                         s += sge->length;
308
309                                         if (s > qp->max_inline_data) {
310                                                 ret = -1;
311                                                 *bad_wr = wr;
312                                                 goto out;
313                                         }
314
315                                         memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
316                                                sge->length);
317                                         wqe += sge->length;
318                                 }
319
320                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
321                                 size += align(s + sizeof *seg, 16) / 16;
322                         }
323                 } else {
324                         for (i = 0; i < (int)wr->num_ds; ++i) {
325                                 ((struct mthca_data_seg *) wqe)->byte_count =
326                                         cl_hton32(wr->ds_array[i].length);
327                                 ((struct mthca_data_seg *) wqe)->lkey =
328                                         cl_hton32(wr->ds_array[i].lkey);
329                                 ((struct mthca_data_seg *) wqe)->addr =
330                                         cl_hton64(wr->ds_array[i].vaddr);
331                                 wqe += sizeof (struct mthca_data_seg);
332                                 size += sizeof (struct mthca_data_seg) / 16;
333                         }
334                 }
335
336                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
337
338                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
339                         cl_hton32(((ind << qp->sq.wqe_shift) +
340                         qp->send_wqe_offset) |opcode);
341                 
342                 wmb();
343                 
344                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
345                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
346                         ((wr->send_opt& IB_SEND_OPT_FENCE) ?
347                          MTHCA_NEXT_FENCE : 0));
348
349                 if (!size0) {
350                         size0 = size;
351                         op0   = opcode;
352                 }
353                 
354                 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);
355                 
356                 ++ind;
357                 if (unlikely(ind >= qp->sq.max))
358                         ind -= qp->sq.max;
359
360         }
361
362 out:
363         if (likely(nreq)) {
364                 uint32_t doorbell[2];
365
366                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
367                                      qp->send_wqe_offset) | f0 | op0);
368                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
369
370                 wmb();
371
372                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
373         }
374
375         qp->sq.next_ind = ind;
376         qp->sq.head    += nreq;
377
378         cl_spinlock_release(&qp->sq.lock);
379         
380         UVP_EXIT(UVP_DBG_QP);
381         return ret;
382 }
383
384
385 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
386                           struct _ib_recv_wr **bad_wr)
387 {
388         struct mthca_qp *qp = to_mqp(ibqp);
389         uint32_t doorbell[2];
390         int ret = 0;
391         int nreq;
392         int i;
393         int size;
394         int size0 = 0;
395         int ind;
396         uint8_t *wqe;
397         uint8_t *prev_wqe;
398         
399         UVP_ENTER(UVP_DBG_QP);
400         
401         cl_spinlock_acquire(&qp->rq.lock);
402
403         /* XXX check that state is OK to post receive */
404         
405         ind = qp->rq.next_ind;
406         if(ibqp->state == IBV_QPS_RESET)
407                 return -EBUSY;
408         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
409                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
410                         nreq = 0;
411
412                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
413                         doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct 
414
415                         /*
416                          * Make sure that descriptors are written
417                          * before doorbell is rung.
418                          */
419                         mb();
420
421                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
422
423                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
424                         size0 = 0;
425                 }
426
427                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
428                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"
429                                         " %d max, %d nreq)\n", ibqp->qp_num,
430                                         qp->rq.head, qp->rq.tail,
431                                         qp->rq.max, nreq));
432                         ret = -ENOMEM;
433                         *bad_wr = wr;
434                         goto out;
435                 }
436
437                 wqe = get_recv_wqe(qp, ind);
438                 prev_wqe = qp->rq.last;
439                 qp->rq.last = wqe;
440
441                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
442                 ((struct mthca_next_seg *) wqe)->ee_nds =
443                         cl_hton32(MTHCA_NEXT_DBD);
444                 ((struct mthca_next_seg *) wqe)->flags =
445                         cl_hton32(MTHCA_NEXT_CQ_UPDATE);
446
447                 wqe += sizeof (struct mthca_next_seg);
448                 size = sizeof (struct mthca_next_seg) / 16;
449
450                 if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {
451                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));
452                         ret = -ERANGE;
453                         *bad_wr = wr;
454                         goto out;
455                 }
456
457                 for (i = 0; i < (int)wr->num_ds; ++i) {
458                         ((struct mthca_data_seg *) wqe)->byte_count =
459                                 cl_hton32(wr->ds_array[i].length);
460                         ((struct mthca_data_seg *) wqe)->lkey =
461                                 cl_hton32(wr->ds_array[i].lkey);
462                         ((struct mthca_data_seg *) wqe)->addr =
463                                 cl_hton64(wr->ds_array[i].vaddr);
464                         wqe += sizeof (struct mthca_data_seg);
465                         size += sizeof (struct mthca_data_seg) / 16;
466                 }
467
468                 qp->wrid[ind] = wr->wr_id;
469
470                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
471                         cl_hton32((ind << qp->rq.wqe_shift) | 1);
472                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
473                         cl_hton32(MTHCA_NEXT_DBD | size);
474
475                 if (!size0)
476                         size0 = size;
477
478                 ++ind;
479                 if (unlikely(ind >= qp->rq.max))
480                         ind -= qp->rq.max;
481         }
482
483 out:
484         if (likely(nreq)) {
485                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
486                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | nreq);
487
488                 /*
489                  * Make sure that descriptors are written before
490                  * doorbell is rung.
491                  */
492                 mb();
493
494                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
495         }
496
497         qp->rq.next_ind = ind;
498         qp->rq.head    += nreq;
499
500         cl_spinlock_release(&qp->rq.lock);
501         UVP_EXIT(UVP_DBG_QP);
502         return ret;
503 }
504
505 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
506                           struct _ib_send_wr **bad_wr)
507 {
508         struct mthca_qp *qp = to_mqp(ibqp);
509         uint32_t doorbell[2];
510         uint8_t *wqe;
511         uint8_t *prev_wqe;
512         int ret = 0;
513         int nreq;       
514         int i;
515         int size;
516         int size0 = 0;
517         uint32_t f0 = 0;
518         int ind;
519         uint8_t op0 = 0;
520         enum ib_wr_opcode opcode;
521         
522         UVP_ENTER(UVP_DBG_QP);
523         
524         cl_spinlock_acquire(&qp->sq.lock);
525
526         /* XXX check that state is OK to post send */
527
528         ind = qp->sq.head & (qp->sq.max - 1);
529         if(ibqp->state == IBV_QPS_RESET)
530                 return -EBUSY;
531         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
532                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
533                         nreq = 0;
534
535                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
536                                             ((qp->sq.head & 0xffff) << 8) | f0 | op0);
537                         doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
538                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
539                         size0 = 0;
540
541                         /*
542                          * Make sure that descriptors are written before
543                          * doorbell record.
544                          */
545                         wmb();
546                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
547
548                         /*
549                          * Make sure doorbell record is written before we
550                          * write MMIO send doorbell.
551                          */
552                         wmb();
553                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
554
555                 }
556
557                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
558                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"
559                                         " %d max, %d nreq)\n", ibqp->qp_num,
560                                         qp->sq.head, qp->sq.tail,
561                                         qp->sq.max, nreq));                     
562                         ret = -ENOMEM;
563                         *bad_wr = wr;
564                         goto out;
565                 }
566
567                 wqe = get_send_wqe(qp, ind);
568                 prev_wqe = qp->sq.last;
569                 qp->sq.last = wqe;
570                 opcode = conv_ibal_wr_opcode(wr);
571
572                 ((struct mthca_next_seg *) wqe)->flags =
573                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
574                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
575                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
576                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
577                         cl_hton32(1);
578                 if (opcode == MTHCA_OPCODE_SEND_IMM||
579                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
580                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
581
582                 wqe += sizeof (struct mthca_next_seg);
583                 size = sizeof (struct mthca_next_seg) / 16;
584
585                 switch (ibqp->qp_type) {
586                 case IB_QPT_RELIABLE_CONN:
587                         switch (opcode) {
588                         case MTHCA_OPCODE_ATOMIC_CS:
589                         case MTHCA_OPCODE_ATOMIC_FA:
590                                 ((struct mthca_raddr_seg *) wqe)->raddr =
591                                         cl_hton64(wr->remote_ops.vaddr);
592                                 ((struct mthca_raddr_seg *) wqe)->rkey =
593                                         cl_hton32((wr->remote_ops.rkey));
594                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
595
596                                 wqe += sizeof (struct mthca_raddr_seg);
597
598                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
599                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
600                                                 cl_hton64(wr->remote_ops.atomic2);
601                                         ((struct mthca_atomic_seg *) wqe)->compare =
602                                                 cl_hton64(wr->remote_ops.atomic1);
603                                 } else {
604                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
605                                                 cl_hton64(wr->remote_ops.atomic1);
606                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
607                                 }
608
609                                 wqe += sizeof (struct mthca_atomic_seg);
610                                 size += (sizeof (struct mthca_raddr_seg) +
611                                          sizeof (struct mthca_atomic_seg)) / 16;
612                                 break;
613
614                         case MTHCA_OPCODE_RDMA_READ:
615                         case MTHCA_OPCODE_RDMA_WRITE:
616                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
617                                 ((struct mthca_raddr_seg *) wqe)->raddr =
618                                         cl_hton64(wr->remote_ops.vaddr);
619                                 ((struct mthca_raddr_seg *) wqe)->rkey =
620                                         cl_hton32(wr->remote_ops.rkey);
621                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
622                                 wqe += sizeof (struct mthca_raddr_seg);
623                                 size += sizeof (struct mthca_raddr_seg) / 16;
624                                 break;
625
626                         default:
627                                 /* No extra segments required for sends */
628                                 break;
629                         }
630
631                         break;
632
633                 case IB_QPT_UNRELIABLE_CONN:
634                         switch (opcode) {
635                         case MTHCA_OPCODE_RDMA_WRITE:
636                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
637                                 ((struct mthca_raddr_seg *) wqe)->raddr =
638                                         cl_hton64(wr->remote_ops.vaddr);
639                                 ((struct mthca_raddr_seg *) wqe)->rkey =
640                                         cl_hton32(wr->remote_ops.rkey);
641                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
642                                 wqe += sizeof (struct mthca_raddr_seg);
643                                 size += sizeof (struct mthca_raddr_seg) / 16;
644                                 break;
645
646                         default:
647                                 /* No extra segments required for sends */
648                                 break;
649                         }
650
651                         break;
652
653                 case IB_QPT_UNRELIABLE_DGRM:
654                         {
655                                 struct ibv_ah *ibv_ah = ((mlnx_ual_av_info_t*)wr->dgrm.ud.h_av)->ibv_ah;
656                                 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
657                                        to_mah(ibv_ah)->av, sizeof ( struct mthca_av));
658                                 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
659                                 ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
660
661
662                                 wqe += sizeof (struct mthca_arbel_ud_seg);
663                                 size += sizeof (struct mthca_arbel_ud_seg) / 16;
664                                 break;
665                         }
666
667                 default:
668                         break;
669                 }
670
671                 if ((int)wr->num_ds > qp->sq.max_gs) {
672                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));
673                         ret = -ERANGE;
674                         *bad_wr = wr;
675                         goto out;
676                 }
677
678                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
679                         if (wr->num_ds) {
680                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
681                                 int s = 0;
682
683                                 wqe += sizeof *seg;
684                                 for (i = 0; i < (int)wr->num_ds; ++i) {
685                                         struct _ib_local_ds *sge = &wr->ds_array[i];
686
687                                         s += sge->length;
688
689                                         if (s > qp->max_inline_data) {
690                                                 ret = -1;
691                                                 *bad_wr = wr;
692                                                 goto out;
693                                         }
694
695                                         memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
696                                                sge->length);
697                                         wqe += sge->length;
698                                 }
699
700                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
701                                 size += align(s + sizeof *seg, 16) / 16;
702                         }
703                 } else {
704
705                         for (i = 0; i < (int)wr->num_ds; ++i) {
706                                 ((struct mthca_data_seg *) wqe)->byte_count =
707                                         cl_hton32(wr->ds_array[i].length);
708                                 ((struct mthca_data_seg *) wqe)->lkey =
709                                         cl_hton32(wr->ds_array[i].lkey);
710                                 ((struct mthca_data_seg *) wqe)->addr =
711                                         cl_hton64(wr->ds_array[i].vaddr);
712                                 wqe += sizeof (struct mthca_data_seg);
713                                 size += sizeof (struct mthca_data_seg) / 16;
714                         }
715 //TODO do this also in kernel
716 //                      size += wr->num_ds * (sizeof *seg / 16);
717                 }
718
719                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
720
721                 if (opcode == MTHCA_OPCODE_INVALID) {
722                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
723                         ret = -EINVAL;
724                         *bad_wr = wr;
725                         goto out;
726                 }
727
728                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
729                         cl_hton32(((ind << qp->sq.wqe_shift) +
730                                qp->send_wqe_offset) |
731                               opcode);
732                 wmb();
733                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
734                         cl_hton32(MTHCA_NEXT_DBD | size);
735                         cl_hton32(MTHCA_NEXT_DBD | size |
736                           ((wr->send_opt & IB_SEND_OPT_FENCE) ?
737                                                    MTHCA_NEXT_FENCE : 0));
738
739                 if (!size0) {
740                         size0 = size;
741                         op0   = opcode;
742                 }
743
744                 ++ind;
745                 if (unlikely(ind >= qp->sq.max))
746                         ind -= qp->sq.max;
747         }
748
749 out:
750         if (likely(nreq)) {
751                 doorbell[0] = cl_hton32((nreq << 24) |
752                                     ((qp->sq.head & 0xffff) << 8) | f0 | op0);
753                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
754
755                 qp->sq.head += nreq;
756
757                 /*
758                  * Make sure that descriptors are written before
759                  * doorbell record.
760                  */
761                 wmb();
762                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
763
764                 /*
765                  * Make sure doorbell record is written before we
766                  * write MMIO send doorbell.
767                  */
768                 wmb();
769                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
770         }
771
772         cl_spinlock_release(&qp->sq.lock);
773
774         UVP_EXIT(UVP_DBG_QP);
775         
776         return ret;
777 }
778
779 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
780                           struct _ib_recv_wr **bad_wr)
781 {
782         struct mthca_qp *qp = to_mqp(ibqp);
783         int ret = 0;
784         int nreq;
785         int ind;
786         int i;
787         uint8_t *wqe;
788         
789         UVP_ENTER(UVP_DBG_QP);
790         
791         cl_spinlock_acquire(&qp->rq.lock);
792
793         /* XXX check that state is OK to post receive */
794
795         ind = qp->rq.head & (qp->rq.max - 1);
796         if(ibqp->state == IBV_QPS_RESET)
797                 return -EBUSY;
798         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
799                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq
800                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"
801                                         " %d max, %d nreq)\n", ibqp->qp_num,
802                                         qp->rq.head, qp->rq.tail,
803                                         qp->rq.max, nreq));
804                         ret = -ENOMEM;
805                         *bad_wr = wr;
806                         goto out;
807                 }
808
809                 wqe = get_recv_wqe(qp, ind);
810
811                 ((struct mthca_next_seg *) wqe)->flags = 0;
812
813                 wqe += sizeof (struct mthca_next_seg);
814
815                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
816                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));
817                         ret = -ERANGE;
818                         *bad_wr = wr;
819                         goto out;
820                 }
821
822                 for (i = 0; i < (int)wr->num_ds; ++i) {
823                         ((struct mthca_data_seg *) wqe)->byte_count =
824                                 cl_hton32(wr->ds_array[i].length);
825                         ((struct mthca_data_seg *) wqe)->lkey =
826                                 cl_hton32(wr->ds_array[i].lkey);
827                         ((struct mthca_data_seg *) wqe)->addr =
828                                 cl_hton64(wr->ds_array[i].vaddr);
829                         wqe += sizeof (struct mthca_data_seg);
830                 }
831
832                 if (i < qp->rq.max_gs) {
833                         ((struct mthca_data_seg *) wqe)->byte_count = 0;
834                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
835                         ((struct mthca_data_seg *) wqe)->addr = 0;
836                 }
837
838                 qp->wrid[ind] = wr->wr_id;
839
840                 ++ind;
841                 if (unlikely(ind >= qp->rq.max))
842                         ind -= qp->rq.max;
843         }
844 out:
845         if (likely(nreq)) {
846                 qp->rq.head += nreq;
847
848                 /*
849                  * Make sure that descriptors are written before
850                  * doorbell record.
851                  */
852                 mb();
853                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
854         }
855
856         cl_spinlock_release(&qp->rq.lock);
857         
858         UVP_EXIT(UVP_DBG_QP);
859         
860         return ret;
861 }
862
863 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
864                        ib_qp_type_t type, struct mthca_qp *qp)
865 {
866         int size;
867         int max_sq_sge;
868
869         qp->rq.max_gs    = cap->max_recv_sge;
870         qp->sq.max_gs    = cap->max_send_sge;
871         max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
872                                  sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
873         if (max_sq_sge < (int)cap->max_send_sge)
874                 max_sq_sge = cap->max_send_sge;
875
876         qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
877         if (!qp->wrid)
878                 return -1;
879
880         size = sizeof (struct mthca_next_seg) +
881                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
882
883         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
884              qp->rq.wqe_shift++)
885                 ; /* nothing */
886
887         size = max_sq_sge * sizeof (struct mthca_data_seg);
888         switch (type) {
889         case IB_QPT_UNRELIABLE_DGRM:
890                 size += mthca_is_memfree(pd->context) ?
891                         sizeof (struct mthca_arbel_ud_seg) :
892                         sizeof (struct mthca_tavor_ud_seg);
893                 break;
894
895         case IB_QPT_UNRELIABLE_CONN:
896                 size += sizeof (struct mthca_raddr_seg);
897                 break;
898
899         case IB_QPT_RELIABLE_CONN:
900                 size += sizeof (struct mthca_raddr_seg);
901                 /*
902                  * An atomic op will require an atomic segment, a
903                  * remote address segment and one scatter entry.
904                  */
905                 if (size < (sizeof (struct mthca_atomic_seg) +
906                             sizeof (struct mthca_raddr_seg) +
907                             sizeof (struct mthca_data_seg)))
908                         size = (sizeof (struct mthca_atomic_seg) +
909                                 sizeof (struct mthca_raddr_seg) +
910                                 sizeof (struct mthca_data_seg));
911                 break;
912
913         default:
914                 break;
915         }
916
917         /* Make sure that we have enough space for a bind request */
918         if (size < sizeof (struct mthca_bind_seg))
919                 size = sizeof (struct mthca_bind_seg);
920
921         size += sizeof (struct mthca_next_seg);
922
923         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
924              qp->sq.wqe_shift++)
925                 ; /* nothing */
926
927         qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
928                                     1 << qp->sq.wqe_shift);
929
930         qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
931
932         if (posix_memalign(&qp->buf, g_page_size,
933                            align(qp->buf_size, g_page_size))) {
934                 cl_free(qp->wrid);
935                 return -1;
936         }
937
938         memset(qp->buf, 0, qp->buf_size);
939
940         if (mthca_is_memfree(pd->context)) {
941                 struct mthca_next_seg *next;
942                 struct mthca_data_seg *scatter;
943                 int i;
944                 uint32_t sz;
945
946                 sz = cl_hton32((sizeof (struct mthca_next_seg) +
947                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
948
949                 for (i = 0; i < qp->rq.max; ++i) {
950                         next = get_recv_wqe(qp, i);
951                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
952                                              qp->rq.wqe_shift);
953                         next->ee_nds = sz;
954
955                         for (scatter = (void *) (next + 1);
956                              (void *) scatter < (void *) ((char*)next + (1 << qp->rq.wqe_shift));
957                              ++scatter)
958                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
959                 }
960
961                 for (i = 0; i < qp->sq.max; ++i) {
962                         next = get_send_wqe(qp, i);
963                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
964                                               qp->sq.wqe_shift) +
965                                              qp->send_wqe_offset);
966                 }
967         }
968
969         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
970         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
971
972         return 0;
973 }
974
975 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
976 {
977         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
978
979         if (ctx->qp_table[tind].refcnt)
980                 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
981         else
982                 return NULL;
983 }
984
985 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
986 {
987         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
988         int ret = 0;
989
990         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
991
992         if (!ctx->qp_table[tind].refcnt) {
993                 ctx->qp_table[tind].table = cl_malloc(
994                         (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));
995                 if (!ctx->qp_table[tind].table) {
996                         ret = -1;
997                         goto out;
998                 }
999         }
1000         ++ctx->qp_table[tind].refcnt;
1001         ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
1002
1003 out:
1004         ReleaseMutex( ctx->qp_table_mutex );
1005         return ret;
1006 }
1007
1008 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
1009 {
1010         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1011
1012         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1013
1014         if (!--ctx->qp_table[tind].refcnt)
1015                 cl_free(ctx->qp_table[tind].table);
1016         else
1017                 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
1018         
1019         ReleaseMutex( ctx->qp_table_mutex );
1020 }
1021
1022 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
1023                        int index, int *dbd, uint32_t *new_wqe)
1024 {
1025         struct mthca_next_seg *next;
1026
1027         /*
1028          * For SRQs, all WQEs generate a CQE, so we're always at the
1029          * end of the doorbell chain.
1030          */
1031         if (qp->ibv_qp.srq) {
1032                 *new_wqe = 0;
1033                 return 0;
1034         }
1035
1036         if (is_send)
1037                 next = get_send_wqe(qp, index);
1038         else
1039                 next = get_recv_wqe(qp, index);
1040
1041         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
1042         if (next->ee_nds & cl_hton32(0x3f))
1043                 *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |
1044                         (next->ee_nds & cl_hton32(0x3f));
1045         else
1046                 *new_wqe = 0;
1047
1048         return 0;
1049 }
1050