fa11e735d5288ae1f824ea0e7780e59e2ebce1e8
[mirror/winof/.git] / hw / mthca / user / mlnx_uvp_qp.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id$
34  */
35
36 #include <mt_l2w.h>
37 #include "mlnx_uvp.h"
38 #include "mlnx_uvp_doorbell.h"
39 #include "mlnx_uvp_wqe.h"
40 #include "mlnx_ual_data.h"
41
42 #if defined(EVENT_TRACING)
43 #include "mlnx_uvp_qp.tmh"
44 #endif
45
46 static const uint8_t mthca_opcode[] = {
47         MTHCA_OPCODE_RDMA_WRITE,
48         MTHCA_OPCODE_RDMA_WRITE_IMM,
49         MTHCA_OPCODE_SEND,
50         MTHCA_OPCODE_SEND_IMM,
51         MTHCA_OPCODE_RDMA_READ,
52         MTHCA_OPCODE_ATOMIC_CS,
53         MTHCA_OPCODE_ATOMIC_FA
54 };
55
56 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
57 {
58         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
59
60         switch (wr->wr_type) {
61                 case WR_SEND: 
62                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
63                         break;
64                 case WR_RDMA_WRITE:     
65                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
66                         break;
67                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
68                 case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;
69                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
70                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;
71         }
72         return opcode;
73 }
74
75
76 static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)
77 {
78         net32_t *wqe = wqe_ptr;
79
80         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
81         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));
82         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
83                 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
84         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
85                 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
86         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
87                 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
88         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
89                 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
90
91 }
92 static void *get_recv_wqe(struct mthca_qp *qp, int n)
93 {
94         return qp->buf + (n << qp->rq.wqe_shift);
95 }
96
97 static void *get_send_wqe(struct mthca_qp *qp, int n)
98 {
99         void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
100         UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,
101                 ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",
102                  wqe_addr, qp->buf, qp->send_wqe_offset, n, 
103                 qp->sq.wqe_shift));
104         
105         return wqe_addr;
106 }
107
108 void mthca_init_qp_indices(struct mthca_qp *qp)
109 {
110         qp->sq.next_ind  = 0;
111         qp->sq.last_comp = qp->sq.max - 1;
112         qp->sq.head      = 0;
113         qp->sq.tail      = 0;
114         qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);
115
116         qp->rq.next_ind  = 0;
117         qp->rq.last_comp = qp->rq.max - 1;
118         qp->rq.head      = 0;
119         qp->rq.tail      = 0;
120         qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);
121 }
122
123 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
124 {
125         unsigned cur;
126
127         cur = wq->head - wq->tail;
128         if ((int)(cur + nreq) < wq->max)
129                 return 0;
130
131         cl_spinlock_acquire(&cq->lock);
132         cur = wq->head - wq->tail;
133         cl_spinlock_release(&cq->lock);
134
135         return (int)(cur + nreq) >= wq->max;
136 }
137
138
139 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
140                           struct _ib_send_wr **bad_wr)
141 {
142         struct mthca_qp *qp = to_mqp(ibqp);
143         uint8_t *wqe;
144         uint8_t *prev_wqe;
145         int ret = 0;
146         int nreq;
147         int i;
148         int size;
149         int size0 = 0;
150         uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
151         int ind;
152         int op0 = 0;
153         enum ib_wr_opcode opcode;
154         
155         UVP_ENTER(UVP_DBG_QP);
156         cl_spinlock_acquire(&qp->sq.lock);
157
158         /* XXX check that state is OK to post send */
159
160         ind = qp->sq.next_ind;
161
162         if(ibqp->state == IBV_QPS_RESET) {
163                 ret = -EBUSY;
164                 if (bad_wr)
165                         *bad_wr = wr;
166                 goto err_busy;
167         }
168
169         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
170
171                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
172                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"
173                                         " %d max, %d nreq)\n", ibqp->qp_num,
174                                         qp->sq.head, qp->sq.tail,
175                                         qp->sq.max, nreq));
176                         ret = -ENOMEM;
177                         if (bad_wr)
178                                 *bad_wr = wr;
179                         goto out;
180                 }
181
182                 wqe = get_send_wqe(qp, ind);
183                 prev_wqe = qp->sq.last;
184                 qp->sq.last = wqe;
185                 opcode = conv_ibal_wr_opcode(wr);
186                 if (opcode == MTHCA_OPCODE_INVALID) {
187                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
188                         ret = -EINVAL;
189                         if (bad_wr)
190                                 *bad_wr = wr;
191                         goto out;
192                 }
193
194
195                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
196                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
197                 ((struct mthca_next_seg *) wqe)->flags =
198                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
199                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
200                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
201                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
202                         cl_hton32(1);
203                 if (opcode == MTHCA_OPCODE_SEND_IMM||
204                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
205                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
206
207                 wqe += sizeof (struct mthca_next_seg);
208                 size = sizeof (struct mthca_next_seg) / 16;
209
210
211                 switch (ibqp->qp_type) {
212                 case IB_QPT_RELIABLE_CONN:
213                         switch (opcode) {
214                         case MTHCA_OPCODE_ATOMIC_CS:
215                         case MTHCA_OPCODE_ATOMIC_FA:
216                                 ((struct mthca_raddr_seg *) wqe)->raddr =
217                                         cl_hton64(wr->remote_ops.vaddr);
218                                 ((struct mthca_raddr_seg *) wqe)->rkey =
219                                         wr->remote_ops.rkey;
220                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
221
222                                 wqe += sizeof (struct mthca_raddr_seg);
223
224                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
225                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
226                                                 cl_hton64(wr->remote_ops.atomic2);
227                                         ((struct mthca_atomic_seg *) wqe)->compare =
228                                                 cl_hton64(wr->remote_ops.atomic1);
229                                 } else {
230                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
231                                                 cl_hton64(wr->remote_ops.atomic1);
232                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
233                                 }
234
235                                 wqe += sizeof (struct mthca_atomic_seg);
236                                 size += (sizeof (struct mthca_raddr_seg) +
237                                          sizeof (struct mthca_atomic_seg)) / 16;
238                                 break;
239
240                         case MTHCA_OPCODE_RDMA_WRITE:
241                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
242                         case MTHCA_OPCODE_RDMA_READ:
243                                 ((struct mthca_raddr_seg *) wqe)->raddr =
244                                         cl_hton64(wr->remote_ops.vaddr);
245                                 ((struct mthca_raddr_seg *) wqe)->rkey =
246                                         wr->remote_ops.rkey;
247                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
248                                 wqe += sizeof (struct mthca_raddr_seg);
249                                 size += sizeof (struct mthca_raddr_seg) / 16;
250                                 break;
251
252                         default:
253                                 /* No extra segments required for sends */
254                                 break;
255                         }
256
257                         break;
258
259                 case IB_QPT_UNRELIABLE_CONN:
260                         switch (opcode) {
261                         case MTHCA_OPCODE_RDMA_WRITE:
262                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
263                                 ((struct mthca_raddr_seg *) wqe)->raddr =
264                                         cl_hton64(wr->remote_ops.vaddr);
265                                 ((struct mthca_raddr_seg *) wqe)->rkey =
266                                         wr->remote_ops.rkey;
267                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
268                                 wqe += sizeof (struct mthca_raddr_seg);
269                                 size += sizeof (struct mthca_raddr_seg) / 16;
270                                 break;
271
272                         default:
273                                 /* No extra segments required for sends */
274                                 break;
275                         }
276
277                         break;
278
279                 case IB_QPT_UNRELIABLE_DGRM:
280                         {
281                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
282                                 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
283                                         cl_hton32(ah->key);
284                                 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
285                                         cl_hton64((uint64_t)ah->av);
286                                 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
287                                 ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
288
289                                 wqe += sizeof (struct mthca_tavor_ud_seg);
290                                 size += sizeof (struct mthca_tavor_ud_seg) / 16;
291                                 break;
292                         }
293
294                 default:
295                         break;
296                 }
297
298                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
299                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));
300                         ret = -ERANGE;
301                         if (bad_wr)
302                                 *bad_wr = wr;
303                         goto out;
304                 }
305 //TODO sleybo:
306                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
307                         if (wr->num_ds) {
308                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
309                                 int s = 0;
310
311                                 wqe += sizeof *seg;
312                                 for (i = 0; i < (int)wr->num_ds; ++i) {
313                                         struct _ib_local_ds *sge = &wr->ds_array[i];
314
315                                         s += sge->length;
316
317                                         if (s > qp->max_inline_data) {
318                                                 ret = -1;
319                                                 if (bad_wr)
320                                                         *bad_wr = wr;
321                                                 goto out;
322                                         }
323
324                                         memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
325                                                sge->length);
326                                         wqe += sge->length;
327                                 }
328
329                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
330                                 size += align(s + sizeof *seg, 16) / 16;
331                         }
332                 } else {
333                         for (i = 0; i < (int)wr->num_ds; ++i) {
334                                 ((struct mthca_data_seg *) wqe)->byte_count =
335                                         cl_hton32(wr->ds_array[i].length);
336                                 ((struct mthca_data_seg *) wqe)->lkey =
337                                         cl_hton32(wr->ds_array[i].lkey);
338                                 ((struct mthca_data_seg *) wqe)->addr =
339                                         cl_hton64(wr->ds_array[i].vaddr);
340                                 wqe += sizeof (struct mthca_data_seg);
341                                 size += sizeof (struct mthca_data_seg) / 16;
342                         }
343                 }
344
345                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
346
347                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
348                         cl_hton32(((ind << qp->sq.wqe_shift) +
349                         qp->send_wqe_offset) |opcode);
350                 
351                 wmb();
352                 
353                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
354                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
355                         ((wr->send_opt& IB_SEND_OPT_FENCE) ?
356                          MTHCA_NEXT_FENCE : 0));
357
358                 if (!size0) {
359                         size0 = size;
360                         op0   = opcode;
361                 }
362                 
363                 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);
364                 
365                 ++ind;
366                 if (unlikely(ind >= qp->sq.max))
367                         ind -= qp->sq.max;
368
369         }
370
371 out:
372         if (likely(nreq)) {
373                 uint32_t doorbell[2];
374
375                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
376                                      qp->send_wqe_offset) | f0 | op0);
377                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
378
379                 wmb();
380
381                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
382         }
383
384         qp->sq.next_ind = ind;
385         qp->sq.head    += nreq;
386
387 err_busy:
388         cl_spinlock_release(&qp->sq.lock);
389         
390         UVP_EXIT(UVP_DBG_QP);
391         return ret;
392 }
393
394
395 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
396                           struct _ib_recv_wr **bad_wr)
397 {
398         struct mthca_qp *qp = to_mqp(ibqp);
399         uint32_t doorbell[2];
400         int ret = 0;
401         int nreq;
402         int i;
403         int size;
404         int size0 = 0;
405         int ind;
406         uint8_t *wqe;
407         uint8_t *prev_wqe;
408         
409         UVP_ENTER(UVP_DBG_QP);
410         
411         cl_spinlock_acquire(&qp->rq.lock);
412
413         /* XXX check that state is OK to post receive */
414         
415         ind = qp->rq.next_ind;
416         if(ibqp->state == IBV_QPS_RESET) {
417                 ret = -EBUSY;
418                 if (bad_wr)
419                         *bad_wr = wr;
420                 goto err_busy;
421         }
422         
423         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
424                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
425                         nreq = 0;
426
427                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
428                         doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct 
429
430                         /*
431                          * Make sure that descriptors are written
432                          * before doorbell is rung.
433                          */
434                         mb();
435
436                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
437
438                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
439                         size0 = 0;
440                 }
441
442                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
443                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"
444                                         " %d max, %d nreq)\n", ibqp->qp_num,
445                                         qp->rq.head, qp->rq.tail,
446                                         qp->rq.max, nreq));
447                         ret = -ENOMEM;
448                         if (bad_wr)
449                                 *bad_wr = wr;
450                         goto out;
451                 }
452
453                 wqe = get_recv_wqe(qp, ind);
454                 prev_wqe = qp->rq.last;
455                 qp->rq.last = wqe;
456
457                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
458                 ((struct mthca_next_seg *) wqe)->ee_nds =
459                         cl_hton32(MTHCA_NEXT_DBD);
460                 ((struct mthca_next_seg *) wqe)->flags =
461                         cl_hton32(MTHCA_NEXT_CQ_UPDATE);
462
463                 wqe += sizeof (struct mthca_next_seg);
464                 size = sizeof (struct mthca_next_seg) / 16;
465
466                 if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {
467                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));
468                         ret = -ERANGE;
469                         if (bad_wr)
470                                 *bad_wr = wr;
471                         goto out;
472                 }
473
474                 for (i = 0; i < (int)wr->num_ds; ++i) {
475                         ((struct mthca_data_seg *) wqe)->byte_count =
476                                 cl_hton32(wr->ds_array[i].length);
477                         ((struct mthca_data_seg *) wqe)->lkey =
478                                 cl_hton32(wr->ds_array[i].lkey);
479                         ((struct mthca_data_seg *) wqe)->addr =
480                                 cl_hton64(wr->ds_array[i].vaddr);
481                         wqe += sizeof (struct mthca_data_seg);
482                         size += sizeof (struct mthca_data_seg) / 16;
483                 }
484
485                 qp->wrid[ind] = wr->wr_id;
486
487                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
488                         cl_hton32((ind << qp->rq.wqe_shift) | 1);
489                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
490                         cl_hton32(MTHCA_NEXT_DBD | size);
491
492                 if (!size0)
493                         size0 = size;
494
495                 ++ind;
496                 if (unlikely(ind >= qp->rq.max))
497                         ind -= qp->rq.max;
498         }
499
500 out:
501         if (likely(nreq)) {
502                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
503                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));
504
505                 /*
506                  * Make sure that descriptors are written before
507                  * doorbell is rung.
508                  */
509                 mb();
510
511                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
512         }
513
514         qp->rq.next_ind = ind;
515         qp->rq.head    += nreq;
516
517 err_busy:
518         cl_spinlock_release(&qp->rq.lock);
519         UVP_EXIT(UVP_DBG_QP);
520         return ret;
521 }
522
523 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
524                           struct _ib_send_wr **bad_wr)
525 {
526         struct mthca_qp *qp = to_mqp(ibqp);
527         uint32_t doorbell[2];
528         uint8_t *wqe;
529         uint8_t *prev_wqe;
530         int ret = 0;
531         int nreq;       
532         int i;
533         int size;
534         int size0 = 0;
535         uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
536         int ind;
537         uint8_t op0 = 0;
538         enum ib_wr_opcode opcode;
539         
540         UVP_ENTER(UVP_DBG_QP);
541         
542         cl_spinlock_acquire(&qp->sq.lock);
543
544         /* XXX check that state is OK to post send */
545
546         ind = qp->sq.head & (qp->sq.max - 1);
547         if(ibqp->state == IBV_QPS_RESET) {
548                 ret = -EBUSY;
549                 if (bad_wr)
550                         *bad_wr = wr;
551                 goto err_busy;
552         }
553
554         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
555                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
556                         nreq = 0;
557
558                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
559                                             ((qp->sq.head & 0xffff) << 8) | f0 | op0);
560                         doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
561                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
562                         size0 = 0;
563                         f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
564
565                         /*
566                          * Make sure that descriptors are written before
567                          * doorbell record.
568                          */
569                         wmb();
570                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
571
572                         /*
573                          * Make sure doorbell record is written before we
574                          * write MMIO send doorbell.
575                          */
576                         wmb();
577                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
578
579                 }
580
581                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
582                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"
583                                         " %d max, %d nreq)\n", ibqp->qp_num,
584                                         qp->sq.head, qp->sq.tail,
585                                         qp->sq.max, nreq));                     
586                         ret = -ENOMEM;
587                         if (bad_wr)
588                                 *bad_wr = wr;
589                         goto out;
590                 }
591
592                 wqe = get_send_wqe(qp, ind);
593                 prev_wqe = qp->sq.last;
594                 qp->sq.last = wqe;
595                 opcode = conv_ibal_wr_opcode(wr);
596
597                 ((struct mthca_next_seg *) wqe)->flags =
598                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
599                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
600                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
601                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
602                         cl_hton32(1);
603                 if (opcode == MTHCA_OPCODE_SEND_IMM||
604                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
605                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
606
607                 wqe += sizeof (struct mthca_next_seg);
608                 size = sizeof (struct mthca_next_seg) / 16;
609
610                 switch (ibqp->qp_type) {
611                 case IB_QPT_RELIABLE_CONN:
612                         switch (opcode) {
613                         case MTHCA_OPCODE_ATOMIC_CS:
614                         case MTHCA_OPCODE_ATOMIC_FA:
615                                 ((struct mthca_raddr_seg *) wqe)->raddr =
616                                         cl_hton64(wr->remote_ops.vaddr);
617                                 ((struct mthca_raddr_seg *) wqe)->rkey =
618                                         wr->remote_ops.rkey;
619                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
620
621                                 wqe += sizeof (struct mthca_raddr_seg);
622
623                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
624                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
625                                                 cl_hton64(wr->remote_ops.atomic2);
626                                         ((struct mthca_atomic_seg *) wqe)->compare =
627                                                 cl_hton64(wr->remote_ops.atomic1);
628                                 } else {
629                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
630                                                 cl_hton64(wr->remote_ops.atomic1);
631                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
632                                 }
633
634                                 wqe += sizeof (struct mthca_atomic_seg);
635                                 size += (sizeof (struct mthca_raddr_seg) +
636                                          sizeof (struct mthca_atomic_seg)) / 16;
637                                 break;
638
639                         case MTHCA_OPCODE_RDMA_READ:
640                         case MTHCA_OPCODE_RDMA_WRITE:
641                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
642                                 ((struct mthca_raddr_seg *) wqe)->raddr =
643                                         cl_hton64(wr->remote_ops.vaddr);
644                                 ((struct mthca_raddr_seg *) wqe)->rkey =
645                                         wr->remote_ops.rkey;
646                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
647                                 wqe += sizeof (struct mthca_raddr_seg);
648                                 size += sizeof (struct mthca_raddr_seg) / 16;
649                                 break;
650
651                         default:
652                                 /* No extra segments required for sends */
653                                 break;
654                         }
655
656                         break;
657
658                 case IB_QPT_UNRELIABLE_CONN:
659                         switch (opcode) {
660                         case MTHCA_OPCODE_RDMA_WRITE:
661                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
662                                 ((struct mthca_raddr_seg *) wqe)->raddr =
663                                         cl_hton64(wr->remote_ops.vaddr);
664                                 ((struct mthca_raddr_seg *) wqe)->rkey =
665                                         wr->remote_ops.rkey;
666                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
667                                 wqe += sizeof (struct mthca_raddr_seg);
668                                 size += sizeof (struct mthca_raddr_seg) / 16;
669                                 break;
670
671                         default:
672                                 /* No extra segments required for sends */
673                                 break;
674                         }
675
676                         break;
677
678                 case IB_QPT_UNRELIABLE_DGRM:
679                         {
680                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
681                                 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
682                                        ah->av, sizeof ( struct mthca_av));
683                                 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
684                                 ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
685
686
687                                 wqe += sizeof (struct mthca_arbel_ud_seg);
688                                 size += sizeof (struct mthca_arbel_ud_seg) / 16;
689                                 break;
690                         }
691
692                 default:
693                         break;
694                 }
695
696                 if ((int)wr->num_ds > qp->sq.max_gs) {
697                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));
698                         ret = -ERANGE;
699                         if (bad_wr)
700                                 *bad_wr = wr;
701                         goto out;
702                 }
703
704                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
705                         if (wr->num_ds) {
706                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
707                                 int s = 0;
708
709                                 wqe += sizeof *seg;
710                                 for (i = 0; i < (int)wr->num_ds; ++i) {
711                                         struct _ib_local_ds *sge = &wr->ds_array[i];
712
713                                         s += sge->length;
714
715                                         if (s > qp->max_inline_data) {
716                                                 ret = -1;
717                                                 if (bad_wr)
718                                                         *bad_wr = wr;
719                                                 goto out;
720                                         }
721
722                                         memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
723                                                sge->length);
724                                         wqe += sge->length;
725                                 }
726
727                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
728                                 size += align(s + sizeof *seg, 16) / 16;
729                         }
730                 } else {
731
732                         for (i = 0; i < (int)wr->num_ds; ++i) {
733                                 ((struct mthca_data_seg *) wqe)->byte_count =
734                                         cl_hton32(wr->ds_array[i].length);
735                                 ((struct mthca_data_seg *) wqe)->lkey =
736                                         cl_hton32(wr->ds_array[i].lkey);
737                                 ((struct mthca_data_seg *) wqe)->addr =
738                                         cl_hton64(wr->ds_array[i].vaddr);
739                                 wqe += sizeof (struct mthca_data_seg);
740                                 size += sizeof (struct mthca_data_seg) / 16;
741                         }
742 //TODO do this also in kernel
743 //                      size += wr->num_ds * (sizeof *seg / 16);
744                 }
745
746                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
747
748                 if (opcode == MTHCA_OPCODE_INVALID) {
749                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
750                         ret = -EINVAL;
751                         if (bad_wr)
752                                 *bad_wr = wr;
753                         goto out;
754                 }
755
756                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
757                         cl_hton32(((ind << qp->sq.wqe_shift) +
758                                qp->send_wqe_offset) |
759                               opcode);
760                 wmb();
761                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
762                         cl_hton32(MTHCA_NEXT_DBD | size);
763                         cl_hton32(MTHCA_NEXT_DBD | size |
764                           ((wr->send_opt & IB_SEND_OPT_FENCE) ?
765                                                    MTHCA_NEXT_FENCE : 0));
766
767                 if (!size0) {
768                         size0 = size;
769                         op0   = opcode;
770                 }
771
772                 ++ind;
773                 if (unlikely(ind >= qp->sq.max))
774                         ind -= qp->sq.max;
775         }
776
777 out:
778         if (likely(nreq)) {
779                 doorbell[0] = cl_hton32((nreq << 24) |
780                                     ((qp->sq.head & 0xffff) << 8) | f0 | op0);
781                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
782
783                 qp->sq.head += nreq;
784
785                 /*
786                  * Make sure that descriptors are written before
787                  * doorbell record.
788                  */
789                 wmb();
790                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
791
792                 /*
793                  * Make sure doorbell record is written before we
794                  * write MMIO send doorbell.
795                  */
796                 wmb();
797                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
798         }
799
800 err_busy:
801         cl_spinlock_release(&qp->sq.lock);
802
803         UVP_EXIT(UVP_DBG_QP);
804         
805         return ret;
806 }
807
808 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
809                           struct _ib_recv_wr **bad_wr)
810 {
811         struct mthca_qp *qp = to_mqp(ibqp);
812         int ret = 0;
813         int nreq;
814         int ind;
815         int i;
816         uint8_t *wqe;
817         
818         UVP_ENTER(UVP_DBG_QP);
819         
820         cl_spinlock_acquire(&qp->rq.lock);
821
822         /* XXX check that state is OK to post receive */
823
824         ind = qp->rq.head & (qp->rq.max - 1);
825         if(ibqp->state == IBV_QPS_RESET) {
826                 ret = -EBUSY;
827                 if (bad_wr)
828                         *bad_wr = wr;
829                 goto err_busy;
830         }
831         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
832                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq
833                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"
834                                         " %d max, %d nreq)\n", ibqp->qp_num,
835                                         qp->rq.head, qp->rq.tail,
836                                         qp->rq.max, nreq));
837                         ret = -ENOMEM;
838                         if (bad_wr)
839                                 *bad_wr = wr;
840                         goto out;
841                 }
842
843                 wqe = get_recv_wqe(qp, ind);
844
845                 ((struct mthca_next_seg *) wqe)->flags = 0;
846
847                 wqe += sizeof (struct mthca_next_seg);
848
849                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
850                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));
851                         ret = -ERANGE;
852                         if (bad_wr)
853                                 *bad_wr = wr;
854                         goto out;
855                 }
856
857                 for (i = 0; i < (int)wr->num_ds; ++i) {
858                         ((struct mthca_data_seg *) wqe)->byte_count =
859                                 cl_hton32(wr->ds_array[i].length);
860                         ((struct mthca_data_seg *) wqe)->lkey =
861                                 cl_hton32(wr->ds_array[i].lkey);
862                         ((struct mthca_data_seg *) wqe)->addr =
863                                 cl_hton64(wr->ds_array[i].vaddr);
864                         wqe += sizeof (struct mthca_data_seg);
865                 }
866
867                 if (i < qp->rq.max_gs) {
868                         ((struct mthca_data_seg *) wqe)->byte_count = 0;
869                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
870                         ((struct mthca_data_seg *) wqe)->addr = 0;
871                 }
872
873                 qp->wrid[ind] = wr->wr_id;
874
875                 ++ind;
876                 if (unlikely(ind >= qp->rq.max))
877                         ind -= qp->rq.max;
878         }
879 out:
880         if (likely(nreq)) {
881                 qp->rq.head += nreq;
882
883                 /*
884                  * Make sure that descriptors are written before
885                  * doorbell record.
886                  */
887                 mb();
888                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
889         }
890
891 err_busy:
892         cl_spinlock_release(&qp->rq.lock);
893         
894         UVP_EXIT(UVP_DBG_QP);
895         
896         return ret;
897 }
898
899 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
900                        ib_qp_type_t type, struct mthca_qp *qp)
901 {
902         int size;
903         int max_sq_sge;
904
905         qp->rq.max_gs    = cap->max_recv_sge;
906         qp->sq.max_gs    = cap->max_send_sge;
907         max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
908                                  sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
909         if (max_sq_sge < (int)cap->max_send_sge)
910                 max_sq_sge = cap->max_send_sge;
911
912         qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
913         if (!qp->wrid)
914                 return -1;
915
916         size = sizeof (struct mthca_next_seg) +
917                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
918
919         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
920              qp->rq.wqe_shift++)
921                 ; /* nothing */
922
923         size = max_sq_sge * sizeof (struct mthca_data_seg);
924         switch (type) {
925         case IB_QPT_UNRELIABLE_DGRM:
926                 size += mthca_is_memfree(pd->context) ?
927                         sizeof (struct mthca_arbel_ud_seg) :
928                         sizeof (struct mthca_tavor_ud_seg);
929                 break;
930
931         case IB_QPT_UNRELIABLE_CONN:
932                 size += sizeof (struct mthca_raddr_seg);
933                 break;
934
935         case IB_QPT_RELIABLE_CONN:
936                 size += sizeof (struct mthca_raddr_seg);
937                 /*
938                  * An atomic op will require an atomic segment, a
939                  * remote address segment and one scatter entry.
940                  */
941                 if (size < (sizeof (struct mthca_atomic_seg) +
942                             sizeof (struct mthca_raddr_seg) +
943                             sizeof (struct mthca_data_seg)))
944                         size = (sizeof (struct mthca_atomic_seg) +
945                                 sizeof (struct mthca_raddr_seg) +
946                                 sizeof (struct mthca_data_seg));
947                 break;
948
949         default:
950                 break;
951         }
952
953         /* Make sure that we have enough space for a bind request */
954         if (size < sizeof (struct mthca_bind_seg))
955                 size = sizeof (struct mthca_bind_seg);
956
957         size += sizeof (struct mthca_next_seg);
958
959         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
960              qp->sq.wqe_shift++)
961                 ; /* nothing */
962
963         qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
964                                     1 << qp->sq.wqe_shift);
965
966         qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
967
968         if (posix_memalign(&qp->buf, g_page_size,
969                            align(qp->buf_size, g_page_size))) {
970                 cl_free(qp->wrid);
971                 return -1;
972         }
973
974         memset(qp->buf, 0, qp->buf_size);
975
976         if (mthca_is_memfree(pd->context)) {
977                 struct mthca_next_seg *next;
978                 struct mthca_data_seg *scatter;
979                 int i;
980                 uint32_t sz;
981
982                 sz = cl_hton32((sizeof (struct mthca_next_seg) +
983                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
984
985                 for (i = 0; i < qp->rq.max; ++i) {
986                         next = get_recv_wqe(qp, i);
987                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
988                                              qp->rq.wqe_shift);
989                         next->ee_nds = sz;
990
991                         for (scatter = (void *) (next + 1);
992                              (void *) scatter < (void *) ((char *)next + (1 << qp->rq.wqe_shift));
993                              ++scatter)
994                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
995                 }
996
997                 for (i = 0; i < qp->sq.max; ++i) {
998                         next = get_send_wqe(qp, i);
999                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
1000                                               qp->sq.wqe_shift) +
1001                                              qp->send_wqe_offset);
1002                 }
1003         }
1004
1005         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
1006         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
1007
1008         return 0;
1009 }
1010
1011 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
1012 {
1013         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1014
1015         if (ctx->qp_table[tind].refcnt)
1016                 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
1017         else
1018                 return NULL;
1019 }
1020
1021 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
1022 {
1023         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1024         int ret = 0;
1025
1026         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1027
1028         if (!ctx->qp_table[tind].refcnt) {
1029                 ctx->qp_table[tind].table = cl_malloc(
1030                         (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));
1031                 if (!ctx->qp_table[tind].table) {
1032                         ret = -1;
1033                         goto out;
1034                 }
1035         }
1036         ++ctx->qp_table[tind].refcnt;
1037         ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
1038
1039 out:
1040         ReleaseMutex( ctx->qp_table_mutex );
1041         return ret;
1042 }
1043
1044 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
1045 {
1046         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1047
1048         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1049
1050         if (!--ctx->qp_table[tind].refcnt)
1051                 cl_free(ctx->qp_table[tind].table);
1052         else
1053                 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
1054         
1055         ReleaseMutex( ctx->qp_table_mutex );
1056 }
1057
1058 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
1059                        int index, int *dbd, uint32_t *new_wqe)
1060 {
1061         struct mthca_next_seg *next;
1062
1063         /*
1064          * For SRQs, all WQEs generate a CQE, so we're always at the
1065          * end of the doorbell chain.
1066          */
1067         if (qp->ibv_qp.srq) {
1068                 *new_wqe = 0;
1069                 return 0;
1070         }
1071
1072         if (is_send)
1073                 next = get_send_wqe(qp, index);
1074         else
1075                 next = get_recv_wqe(qp, index);
1076
1077         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
1078         if (next->ee_nds & cl_hton32(0x3f))
1079                 *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |
1080                         (next->ee_nds & cl_hton32(0x3f));
1081         else
1082                 *new_wqe = 0;
1083
1084         return 0;
1085 }
1086