[MTHCA]bug fixes:
[mirror/winof/.git] / hw / mthca / user / mlnx_uvp_qp.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id$
34  */
35
36 #include <mt_l2w.h>
37 #include "mlnx_uvp.h"
38 #include "mlnx_uvp_doorbell.h"
39 #include "mthca_wqe.h"
40 #include "mlnx_ual_data.h"
41
42 #if defined(EVENT_TRACING)
43 #include "mlnx_uvp_qp.tmh"
44 #endif
45
46 static const uint8_t mthca_opcode[] = {
47         MTHCA_OPCODE_RDMA_WRITE,
48         MTHCA_OPCODE_RDMA_WRITE_IMM,
49         MTHCA_OPCODE_SEND,
50         MTHCA_OPCODE_SEND_IMM,
51         MTHCA_OPCODE_RDMA_READ,
52         MTHCA_OPCODE_ATOMIC_CS,
53         MTHCA_OPCODE_ATOMIC_FA
54 };
55
56 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
57 {
58         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
59
60         switch (wr->wr_type) {
61                 case WR_SEND: 
62                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
63                         break;
64                 case WR_RDMA_WRITE:     
65                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
66                         break;
67                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
68                 case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;
69                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
70                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;
71         }
72         return opcode;
73 }
74
75
76 static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)
77 {
78         net32_t *wqe = wqe_ptr;
79
80         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
81         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));
82         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
83                 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
84         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
85                 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
86         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
87                 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
88         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
89                 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
90
91 }
92 static void *get_recv_wqe(struct mthca_qp *qp, int n)
93 {
94         return qp->buf + (n << qp->rq.wqe_shift);
95 }
96
97 static void *get_send_wqe(struct mthca_qp *qp, int n)
98 {
99         void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
100         UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,
101                 ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",
102                  wqe_addr, qp->buf, qp->send_wqe_offset, n, 
103                 qp->sq.wqe_shift));
104         
105         return wqe_addr;
106 }
107
108 void mthca_init_qp_indices(struct mthca_qp *qp)
109 {
110         qp->sq.next_ind  = 0;
111         qp->sq.last_comp = qp->sq.max - 1;
112         qp->sq.head      = 0;
113         qp->sq.tail      = 0;
114         qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);
115
116         qp->rq.next_ind  = 0;
117         qp->rq.last_comp = qp->rq.max - 1;
118         qp->rq.head      = 0;
119         qp->rq.tail      = 0;
120         qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);
121 }
122
123 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
124 {
125         unsigned cur;
126
127         cur = wq->head - wq->tail;
128         if ((int)(cur + nreq) < wq->max)
129                 return 0;
130
131         cl_spinlock_acquire(&cq->lock);
132         cur = wq->head - wq->tail;
133         cl_spinlock_release(&cq->lock);
134
135         return (int)(cur + nreq) >= wq->max;
136 }
137
138
139 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
140                           struct _ib_send_wr **bad_wr)
141 {
142         struct mthca_qp *qp = to_mqp(ibqp);
143         uint8_t *wqe;
144         uint8_t *prev_wqe;
145         int ret = 0;
146         int nreq;
147         int i;
148         int size;
149         int size0 = 0;
150         uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
151         int ind;
152         int op0 = 0;
153         enum ib_wr_opcode opcode;
154         
155         UVP_ENTER(UVP_DBG_QP);
156         cl_spinlock_acquire(&qp->sq.lock);
157
158         /* XXX check that state is OK to post send */
159
160         ind = qp->sq.next_ind;
161
162         if(ibqp->state == IBV_QPS_RESET) {
163                 ret = -EBUSY;
164                 if (bad_wr)
165                         *bad_wr = wr;
166                 goto err_busy;
167         }
168
169         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
170
171                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
172                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"
173                                         " %d max, %d nreq)\n", ibqp->qp_num,
174                                         qp->sq.head, qp->sq.tail,
175                                         qp->sq.max, nreq));
176                         ret = -ENOMEM;
177                         if (bad_wr)
178                                 *bad_wr = wr;
179                         goto out;
180                 }
181
182                 wqe = get_send_wqe(qp, ind);
183                 prev_wqe = qp->sq.last;
184                 qp->sq.last = wqe;
185                 opcode = conv_ibal_wr_opcode(wr);
186                 if (opcode == MTHCA_OPCODE_INVALID) {
187                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
188                         ret = -EINVAL;
189                         if (bad_wr)
190                                 *bad_wr = wr;
191                         goto out;
192                 }
193
194
195                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
196                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
197                 ((struct mthca_next_seg *) wqe)->flags =
198                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
199                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
200                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
201                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
202                         cl_hton32(1);
203                 if (opcode == MTHCA_OPCODE_SEND_IMM||
204                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
205                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
206
207                 wqe += sizeof (struct mthca_next_seg);
208                 size = sizeof (struct mthca_next_seg) / 16;
209
210
211                 switch (ibqp->qp_type) {
212                 case IB_QPT_RELIABLE_CONN:
213                         switch (opcode) {
214                         case MTHCA_OPCODE_ATOMIC_CS:
215                         case MTHCA_OPCODE_ATOMIC_FA:
216                                 ((struct mthca_raddr_seg *) wqe)->raddr =
217                                         cl_hton64(wr->remote_ops.vaddr);
218                                 ((struct mthca_raddr_seg *) wqe)->rkey =
219                                         wr->remote_ops.rkey;
220                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
221
222                                 wqe += sizeof (struct mthca_raddr_seg);
223
224                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
225                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
226                                                 cl_hton64(wr->remote_ops.atomic2);
227                                         ((struct mthca_atomic_seg *) wqe)->compare =
228                                                 cl_hton64(wr->remote_ops.atomic1);
229                                 } else {
230                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
231                                                 cl_hton64(wr->remote_ops.atomic1);
232                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
233                                 }
234
235                                 wqe += sizeof (struct mthca_atomic_seg);
236                                 size += (sizeof (struct mthca_raddr_seg) +
237                                          sizeof (struct mthca_atomic_seg)) / 16;
238                                 break;
239
240                         case MTHCA_OPCODE_RDMA_WRITE:
241                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
242                         case MTHCA_OPCODE_RDMA_READ:
243                                 ((struct mthca_raddr_seg *) wqe)->raddr =
244                                         cl_hton64(wr->remote_ops.vaddr);
245                                 ((struct mthca_raddr_seg *) wqe)->rkey =
246                                         wr->remote_ops.rkey;
247                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
248                                 wqe += sizeof (struct mthca_raddr_seg);
249                                 size += sizeof (struct mthca_raddr_seg) / 16;
250                                 break;
251
252                         default:
253                                 /* No extra segments required for sends */
254                                 break;
255                         }
256
257                         break;
258
259                 case IB_QPT_UNRELIABLE_CONN:
260                         switch (opcode) {
261                         case MTHCA_OPCODE_RDMA_WRITE:
262                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
263                                 ((struct mthca_raddr_seg *) wqe)->raddr =
264                                         cl_hton64(wr->remote_ops.vaddr);
265                                 ((struct mthca_raddr_seg *) wqe)->rkey =
266                                         wr->remote_ops.rkey;
267                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
268                                 wqe += sizeof (struct mthca_raddr_seg);
269                                 size += sizeof (struct mthca_raddr_seg) / 16;
270                                 break;
271
272                         default:
273                                 /* No extra segments required for sends */
274                                 break;
275                         }
276
277                         break;
278
279                 case IB_QPT_UNRELIABLE_DGRM:
280                         {
281                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
282                                 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
283                                         cl_hton32(ah->key);
284                                 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
285                                         cl_hton64((uint64_t)ah->av);
286                                 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
287                                 ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
288
289                                 wqe += sizeof (struct mthca_tavor_ud_seg);
290                                 size += sizeof (struct mthca_tavor_ud_seg) / 16;
291                                 break;
292                         }
293
294                 default:
295                         break;
296                 }
297
298                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
299                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));
300                         ret = -ERANGE;
301                         if (bad_wr)
302                                 *bad_wr = wr;
303                         goto out;
304                 }
305 //TODO sleybo:
306                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
307                         if (wr->num_ds) {
308                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
309                                 uint32_t s = 0;
310
311                                 wqe += sizeof *seg;
312                                 for (i = 0; i < (int)wr->num_ds; ++i) {
313                                         struct _ib_local_ds *sge = &wr->ds_array[i];
314
315                                         s += sge->length;
316
317                                         if (s > (uint32_t)qp->max_inline_data) {
318                                                 ret = -1;
319                                                 if (bad_wr)
320                                                         *bad_wr = wr;
321                                                 goto out;
322                                         }
323
324                                         memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
325                                                sge->length);
326                                         wqe += sge->length;
327                                 }
328
329                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
330                                 size += align(s + sizeof *seg, 16) / 16;
331                         }
332                 } else {
333                         for (i = 0; i < (int)wr->num_ds; ++i) {
334                                 ((struct mthca_data_seg *) wqe)->byte_count =
335                                         cl_hton32(wr->ds_array[i].length);
336                                 ((struct mthca_data_seg *) wqe)->lkey =
337                                         cl_hton32(wr->ds_array[i].lkey);
338                                 ((struct mthca_data_seg *) wqe)->addr =
339                                         cl_hton64(wr->ds_array[i].vaddr);
340                                 wqe += sizeof (struct mthca_data_seg);
341                                 size += sizeof (struct mthca_data_seg) / 16;
342                         }
343                 }
344
345                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
346
347                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
348                         cl_hton32(((ind << qp->sq.wqe_shift) +
349                         qp->send_wqe_offset) |opcode);
350                 
351                 wmb();
352                 
353                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
354                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
355                         ((wr->send_opt& IB_SEND_OPT_FENCE) ?
356                          MTHCA_NEXT_FENCE : 0));
357
358                 if (!size0) {
359                         size0 = size;
360                         op0   = opcode;
361                 }
362                 
363                 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);
364                 
365                 ++ind;
366                 if (unlikely(ind >= qp->sq.max))
367                         ind -= qp->sq.max;
368
369         }
370
371 out:
372         if (likely(nreq)) {
373                 uint32_t doorbell[2];
374
375                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
376                                      qp->send_wqe_offset) | f0 | op0);
377                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
378
379                 wmb();
380
381                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
382         }
383
384         qp->sq.next_ind = ind;
385         qp->sq.head    += nreq;
386
387 err_busy:
388         cl_spinlock_release(&qp->sq.lock);
389         
390         UVP_EXIT(UVP_DBG_QP);
391         return ret;
392 }
393
394
395 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
396                           struct _ib_recv_wr **bad_wr)
397 {
398         struct mthca_qp *qp = to_mqp(ibqp);
399         uint32_t doorbell[2];
400         int ret = 0;
401         int nreq;
402         int i;
403         int size;
404         int size0 = 0;
405         int ind;
406         uint8_t *wqe;
407         uint8_t *prev_wqe;
408         
409         UVP_ENTER(UVP_DBG_QP);
410         
411         cl_spinlock_acquire(&qp->rq.lock);
412
413         /* XXX check that state is OK to post receive */
414         
415         ind = qp->rq.next_ind;
416         if(ibqp->state == IBV_QPS_RESET) {
417                 ret = -EBUSY;
418                 if (bad_wr)
419                         *bad_wr = wr;
420                 goto err_busy;
421         }
422         
423         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
424                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
425                         nreq = 0;
426
427                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
428                         doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct 
429
430                         /*
431                          * Make sure that descriptors are written
432                          * before doorbell is rung.
433                          */
434                         mb();
435
436                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
437
438                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
439                         size0 = 0;
440                 }
441
442                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
443                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"
444                                         " %d max, %d nreq)\n", ibqp->qp_num,
445                                         qp->rq.head, qp->rq.tail,
446                                         qp->rq.max, nreq));
447                         ret = -ENOMEM;
448                         if (bad_wr)
449                                 *bad_wr = wr;
450                         goto out;
451                 }
452
453                 wqe = get_recv_wqe(qp, ind);
454                 prev_wqe = qp->rq.last;
455                 qp->rq.last = wqe;
456
457                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
458                 ((struct mthca_next_seg *) wqe)->ee_nds =
459                         cl_hton32(MTHCA_NEXT_DBD);
460                 ((struct mthca_next_seg *) wqe)->flags =
461                         cl_hton32(MTHCA_NEXT_CQ_UPDATE);
462
463                 wqe += sizeof (struct mthca_next_seg);
464                 size = sizeof (struct mthca_next_seg) / 16;
465
466                 if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {
467                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));
468                         ret = -ERANGE;
469                         if (bad_wr)
470                                 *bad_wr = wr;
471                         goto out;
472                 }
473
474                 for (i = 0; i < (int)wr->num_ds; ++i) {
475                         ((struct mthca_data_seg *) wqe)->byte_count =
476                                 cl_hton32(wr->ds_array[i].length);
477                         ((struct mthca_data_seg *) wqe)->lkey =
478                                 cl_hton32(wr->ds_array[i].lkey);
479                         ((struct mthca_data_seg *) wqe)->addr =
480                                 cl_hton64(wr->ds_array[i].vaddr);
481                         wqe += sizeof (struct mthca_data_seg);
482                         size += sizeof (struct mthca_data_seg) / 16;
483                 }
484
485                 qp->wrid[ind] = wr->wr_id;
486
487                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
488                         cl_hton32((ind << qp->rq.wqe_shift) | 1);
489                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
490                         cl_hton32(MTHCA_NEXT_DBD | size);
491
492                 if (!size0)
493                         size0 = size;
494
495                 ++ind;
496                 if (unlikely(ind >= qp->rq.max))
497                         ind -= qp->rq.max;
498         }
499
500 out:
501         if (likely(nreq)) {
502                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
503                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));
504
505                 /*
506                  * Make sure that descriptors are written before
507                  * doorbell is rung.
508                  */
509                 mb();
510
511                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
512         }
513
514         qp->rq.next_ind = ind;
515         qp->rq.head    += nreq;
516
517 err_busy:
518         cl_spinlock_release(&qp->rq.lock);
519         UVP_EXIT(UVP_DBG_QP);
520         return ret;
521 }
522
523 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
524                           struct _ib_send_wr **bad_wr)
525 {
526         struct mthca_qp *qp = to_mqp(ibqp);
527         uint32_t doorbell[2];
528         uint8_t *wqe;
529         uint8_t *prev_wqe;
530         int ret = 0;
531         int nreq;       
532         int i;
533         int size;
534         int size0 = 0;
535         uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
536         int ind;
537         uint8_t op0 = 0;
538         enum ib_wr_opcode opcode;
539         
540         UVP_ENTER(UVP_DBG_QP);
541         
542         cl_spinlock_acquire(&qp->sq.lock);
543
544         /* XXX check that state is OK to post send */
545
546         ind = qp->sq.head & (qp->sq.max - 1);
547         if(ibqp->state == IBV_QPS_RESET) {
548                 ret = -EBUSY;
549                 if (bad_wr)
550                         *bad_wr = wr;
551                 goto err_busy;
552         }
553
554         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
555                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
556                         nreq = 0;
557
558                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
559                                             ((qp->sq.head & 0xffff) << 8) | f0 | op0);
560                         doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
561                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
562                         size0 = 0;
563                         f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
564
565                         /*
566                          * Make sure that descriptors are written before
567                          * doorbell record.
568                          */
569                         wmb();
570                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
571
572                         /*
573                          * Make sure doorbell record is written before we
574                          * write MMIO send doorbell.
575                          */
576                         wmb();
577                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
578
579                 }
580
581                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
582                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"
583                                         " %d max, %d nreq)\n", ibqp->qp_num,
584                                         qp->sq.head, qp->sq.tail,
585                                         qp->sq.max, nreq));                     
586                         ret = -ENOMEM;
587                         if (bad_wr)
588                                 *bad_wr = wr;
589                         goto out;
590                 }
591
592                 wqe = get_send_wqe(qp, ind);
593                 prev_wqe = qp->sq.last;
594                 qp->sq.last = wqe;
595                 opcode = conv_ibal_wr_opcode(wr);
596
597                 ((struct mthca_next_seg *) wqe)->flags =
598                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
599                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
600                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
601                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
602                         cl_hton32(1);
603                 if (opcode == MTHCA_OPCODE_SEND_IMM||
604                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
605                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
606
607                 wqe += sizeof (struct mthca_next_seg);
608                 size = sizeof (struct mthca_next_seg) / 16;
609
610                 switch (ibqp->qp_type) {
611                 case IB_QPT_RELIABLE_CONN:
612                         switch (opcode) {
613                         case MTHCA_OPCODE_ATOMIC_CS:
614                         case MTHCA_OPCODE_ATOMIC_FA:
615                                 ((struct mthca_raddr_seg *) wqe)->raddr =
616                                         cl_hton64(wr->remote_ops.vaddr);
617                                 ((struct mthca_raddr_seg *) wqe)->rkey =
618                                         wr->remote_ops.rkey;
619                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
620
621                                 wqe += sizeof (struct mthca_raddr_seg);
622
623                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
624                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
625                                                 cl_hton64(wr->remote_ops.atomic2);
626                                         ((struct mthca_atomic_seg *) wqe)->compare =
627                                                 cl_hton64(wr->remote_ops.atomic1);
628                                 } else {
629                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
630                                                 cl_hton64(wr->remote_ops.atomic1);
631                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
632                                 }
633
634                                 wqe += sizeof (struct mthca_atomic_seg);
635                                 size += (sizeof (struct mthca_raddr_seg) +
636                                          sizeof (struct mthca_atomic_seg)) / 16;
637                                 break;
638
639                         case MTHCA_OPCODE_RDMA_READ:
640                         case MTHCA_OPCODE_RDMA_WRITE:
641                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
642                                 ((struct mthca_raddr_seg *) wqe)->raddr =
643                                         cl_hton64(wr->remote_ops.vaddr);
644                                 ((struct mthca_raddr_seg *) wqe)->rkey =
645                                         wr->remote_ops.rkey;
646                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
647                                 wqe += sizeof (struct mthca_raddr_seg);
648                                 size += sizeof (struct mthca_raddr_seg) / 16;
649                                 break;
650
651                         default:
652                                 /* No extra segments required for sends */
653                                 break;
654                         }
655
656                         break;
657
658                 case IB_QPT_UNRELIABLE_CONN:
659                         switch (opcode) {
660                         case MTHCA_OPCODE_RDMA_WRITE:
661                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
662                                 ((struct mthca_raddr_seg *) wqe)->raddr =
663                                         cl_hton64(wr->remote_ops.vaddr);
664                                 ((struct mthca_raddr_seg *) wqe)->rkey =
665                                         wr->remote_ops.rkey;
666                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
667                                 wqe += sizeof (struct mthca_raddr_seg);
668                                 size += sizeof (struct mthca_raddr_seg) / 16;
669                                 break;
670
671                         default:
672                                 /* No extra segments required for sends */
673                                 break;
674                         }
675
676                         break;
677
678                 case IB_QPT_UNRELIABLE_DGRM:
679                         {
680                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
681                                 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
682                                        ah->av, sizeof ( struct mthca_av));
683                                 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
684                                 ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
685
686
687                                 wqe += sizeof (struct mthca_arbel_ud_seg);
688                                 size += sizeof (struct mthca_arbel_ud_seg) / 16;
689                                 break;
690                         }
691
692                 default:
693                         break;
694                 }
695
696                 if ((int)wr->num_ds > qp->sq.max_gs) {
697                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));
698                         ret = -ERANGE;
699                         if (bad_wr)
700                                 *bad_wr = wr;
701                         goto out;
702                 }
703
704                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
705                         if (wr->num_ds) {
706                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
707                                 uint32_t s = 0;
708
709                                 wqe += sizeof *seg;
710                                 for (i = 0; i < (int)wr->num_ds; ++i) {
711                                         struct _ib_local_ds *sge = &wr->ds_array[i];
712
713                                         s += sge->length;
714
715                                         if (s > (uint32_t)qp->max_inline_data) {
716                                                 ret = -1;
717                                                 if (bad_wr)
718                                                         *bad_wr = wr;
719                                                 goto out;
720                                         }
721
722                                         memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
723                                                sge->length);
724                                         wqe += sge->length;
725                                 }
726
727                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
728                                 size += align(s + sizeof *seg, 16) / 16;
729                         }
730                 } else {
731
732                         for (i = 0; i < (int)wr->num_ds; ++i) {
733                                 ((struct mthca_data_seg *) wqe)->byte_count =
734                                         cl_hton32(wr->ds_array[i].length);
735                                 ((struct mthca_data_seg *) wqe)->lkey =
736                                         cl_hton32(wr->ds_array[i].lkey);
737                                 ((struct mthca_data_seg *) wqe)->addr =
738                                         cl_hton64(wr->ds_array[i].vaddr);
739                                 wqe += sizeof (struct mthca_data_seg);
740                                 size += sizeof (struct mthca_data_seg) / 16;
741                         }
742 //TODO do this also in kernel
743 //                      size += wr->num_ds * (sizeof *seg / 16);
744                 }
745
746                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
747
748                 if (opcode == MTHCA_OPCODE_INVALID) {
749                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
750                         ret = -EINVAL;
751                         if (bad_wr)
752                                 *bad_wr = wr;
753                         goto out;
754                 }
755
756                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
757                         cl_hton32(((ind << qp->sq.wqe_shift) +
758                                qp->send_wqe_offset) |
759                               opcode);
760                 wmb();
761                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
762                         cl_hton32(MTHCA_NEXT_DBD | size |
763                           ((wr->send_opt & IB_SEND_OPT_FENCE) ?
764                                                    MTHCA_NEXT_FENCE : 0));
765
766                 if (!size0) {
767                         size0 = size;
768                         op0   = opcode;
769                 }
770
771                 ++ind;
772                 if (unlikely(ind >= qp->sq.max))
773                         ind -= qp->sq.max;
774         }
775
776 out:
777         if (likely(nreq)) {
778                 doorbell[0] = cl_hton32((nreq << 24) |
779                                     ((qp->sq.head & 0xffff) << 8) | f0 | op0);
780                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
781
782                 qp->sq.head += nreq;
783
784                 /*
785                  * Make sure that descriptors are written before
786                  * doorbell record.
787                  */
788                 wmb();
789                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
790
791                 /*
792                  * Make sure doorbell record is written before we
793                  * write MMIO send doorbell.
794                  */
795                 wmb();
796                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
797         }
798
799 err_busy:
800         cl_spinlock_release(&qp->sq.lock);
801
802         UVP_EXIT(UVP_DBG_QP);
803         
804         return ret;
805 }
806
807 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
808                           struct _ib_recv_wr **bad_wr)
809 {
810         struct mthca_qp *qp = to_mqp(ibqp);
811         int ret = 0;
812         int nreq;
813         int ind;
814         int i;
815         uint8_t *wqe;
816         
817         UVP_ENTER(UVP_DBG_QP);
818         
819         cl_spinlock_acquire(&qp->rq.lock);
820
821         /* XXX check that state is OK to post receive */
822
823         ind = qp->rq.head & (qp->rq.max - 1);
824         if(ibqp->state == IBV_QPS_RESET) {
825                 ret = -EBUSY;
826                 if (bad_wr)
827                         *bad_wr = wr;
828                 goto err_busy;
829         }
830         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
831                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq
832                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"
833                                         " %d max, %d nreq)\n", ibqp->qp_num,
834                                         qp->rq.head, qp->rq.tail,
835                                         qp->rq.max, nreq));
836                         ret = -ENOMEM;
837                         if (bad_wr)
838                                 *bad_wr = wr;
839                         goto out;
840                 }
841
842                 wqe = get_recv_wqe(qp, ind);
843
844                 ((struct mthca_next_seg *) wqe)->flags = 0;
845
846                 wqe += sizeof (struct mthca_next_seg);
847
848                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
849                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));
850                         ret = -ERANGE;
851                         if (bad_wr)
852                                 *bad_wr = wr;
853                         goto out;
854                 }
855
856                 for (i = 0; i < (int)wr->num_ds; ++i) {
857                         ((struct mthca_data_seg *) wqe)->byte_count =
858                                 cl_hton32(wr->ds_array[i].length);
859                         ((struct mthca_data_seg *) wqe)->lkey =
860                                 cl_hton32(wr->ds_array[i].lkey);
861                         ((struct mthca_data_seg *) wqe)->addr =
862                                 cl_hton64(wr->ds_array[i].vaddr);
863                         wqe += sizeof (struct mthca_data_seg);
864                 }
865
866                 if (i < qp->rq.max_gs) {
867                         ((struct mthca_data_seg *) wqe)->byte_count = 0;
868                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
869                         ((struct mthca_data_seg *) wqe)->addr = 0;
870                 }
871
872                 qp->wrid[ind] = wr->wr_id;
873
874                 ++ind;
875                 if (unlikely(ind >= qp->rq.max))
876                         ind -= qp->rq.max;
877         }
878 out:
879         if (likely(nreq)) {
880                 qp->rq.head += nreq;
881
882                 /*
883                  * Make sure that descriptors are written before
884                  * doorbell record.
885                  */
886                 mb();
887                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
888         }
889
890 err_busy:
891         cl_spinlock_release(&qp->rq.lock);
892         
893         UVP_EXIT(UVP_DBG_QP);
894         
895         return ret;
896 }
897
898 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
899                        ib_qp_type_t type, struct mthca_qp *qp)
900 {
901         int size;
902         int max_sq_sge;
903
904         qp->rq.max_gs    = cap->max_recv_sge;
905         qp->sq.max_gs    = cap->max_send_sge;
906         max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
907                                  sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
908         if (max_sq_sge < (int)cap->max_send_sge)
909                 max_sq_sge = cap->max_send_sge;
910
911         qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
912         if (!qp->wrid)
913                 return -1;
914
915         size = sizeof (struct mthca_next_seg) +
916                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
917
918         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
919              qp->rq.wqe_shift++)
920                 ; /* nothing */
921
922         size = max_sq_sge * sizeof (struct mthca_data_seg);
923         switch (type) {
924         case IB_QPT_UNRELIABLE_DGRM:
925                 size += mthca_is_memfree(pd->context) ?
926                         sizeof (struct mthca_arbel_ud_seg) :
927                         sizeof (struct mthca_tavor_ud_seg);
928                 break;
929
930         case IB_QPT_UNRELIABLE_CONN:
931                 size += sizeof (struct mthca_raddr_seg);
932                 break;
933
934         case IB_QPT_RELIABLE_CONN:
935                 size += sizeof (struct mthca_raddr_seg);
936                 /*
937                  * An atomic op will require an atomic segment, a
938                  * remote address segment and one scatter entry.
939                  */
940                 if (size < (sizeof (struct mthca_atomic_seg) +
941                             sizeof (struct mthca_raddr_seg) +
942                             sizeof (struct mthca_data_seg)))
943                         size = (sizeof (struct mthca_atomic_seg) +
944                                 sizeof (struct mthca_raddr_seg) +
945                                 sizeof (struct mthca_data_seg));
946                 break;
947
948         default:
949                 break;
950         }
951
952         /* Make sure that we have enough space for a bind request */
953         if (size < sizeof (struct mthca_bind_seg))
954                 size = sizeof (struct mthca_bind_seg);
955
956         size += sizeof (struct mthca_next_seg);
957
958         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
959              qp->sq.wqe_shift++)
960                 ; /* nothing */
961
962         qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
963                                     1 << qp->sq.wqe_shift);
964
965         qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
966
967         if (posix_memalign(&qp->buf, g_page_size,
968                            align(qp->buf_size, g_page_size))) {
969                 cl_free(qp->wrid);
970                 return -1;
971         }
972
973         memset(qp->buf, 0, qp->buf_size);
974
975         if (mthca_is_memfree(pd->context)) {
976                 struct mthca_next_seg *next;
977                 struct mthca_data_seg *scatter;
978                 int i;
979                 uint32_t sz;
980
981                 sz = cl_hton32((sizeof (struct mthca_next_seg) +
982                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
983
984                 for (i = 0; i < qp->rq.max; ++i) {
985                         next = get_recv_wqe(qp, i);
986                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
987                                              qp->rq.wqe_shift);
988                         next->ee_nds = sz;
989
990                         for (scatter = (void *) (next + 1);
991                              (void *) scatter < (void *) ((char *)next + (1 << qp->rq.wqe_shift));
992                              ++scatter)
993                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
994                 }
995
996                 for (i = 0; i < qp->sq.max; ++i) {
997                         next = get_send_wqe(qp, i);
998                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
999                                               qp->sq.wqe_shift) +
1000                                              qp->send_wqe_offset);
1001                 }
1002         }
1003
1004         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
1005         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
1006
1007         return 0;
1008 }
1009
1010 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
1011 {
1012         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1013
1014         if (ctx->qp_table[tind].refcnt)
1015                 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
1016         else
1017                 return NULL;
1018 }
1019
1020 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
1021 {
1022         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1023         int ret = 0;
1024
1025         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1026
1027         if (!ctx->qp_table[tind].refcnt) {
1028                 ctx->qp_table[tind].table = cl_malloc(
1029                         (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));
1030                 if (!ctx->qp_table[tind].table) {
1031                         ret = -1;
1032                         goto out;
1033                 }
1034         }
1035         ++ctx->qp_table[tind].refcnt;
1036         ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
1037
1038 out:
1039         ReleaseMutex( ctx->qp_table_mutex );
1040         return ret;
1041 }
1042
1043 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
1044 {
1045         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1046
1047         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1048
1049         if (!--ctx->qp_table[tind].refcnt)
1050                 cl_free(ctx->qp_table[tind].table);
1051         else
1052                 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
1053         
1054         ReleaseMutex( ctx->qp_table_mutex );
1055 }
1056
1057 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
1058                        int index, int *dbd, uint32_t *new_wqe)
1059 {
1060         struct mthca_next_seg *next;
1061
1062         /*
1063          * For SRQs, all WQEs generate a CQE, so we're always at the
1064          * end of the doorbell chain.
1065          */
1066         if (qp->ibv_qp.srq) {
1067                 *new_wqe = 0;
1068                 return 0;
1069         }
1070
1071         if (is_send)
1072                 next = get_send_wqe(qp, index);
1073         else
1074                 next = get_recv_wqe(qp, index);
1075
1076         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
1077         if (next->ee_nds & cl_hton32(0x3f))
1078                 *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |
1079                         (next->ee_nds & cl_hton32(0x3f));
1080         else
1081                 *new_wqe = 0;
1082
1083         return 0;
1084 }
1085