a7e0d39e01bc024014d32f7dc42e265f2d649ca7
[mirror/winof/.git] / hw / mthca / user / mlnx_uvp_qp.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: qp.c 4214 2005-11-29 17:43:08Z roland $
34  */
35
36 #include <mt_l2w.h>
37 #include "mlnx_uvp.h"
38 #include "mlnx_uvp_doorbell.h"
39 #include "mlnx_uvp_wqe.h"
40 #include "mlnx_ual_data.h"
41
42 #if defined(EVENT_TRACING)
43 #include "mlnx_uvp_qp.tmh"
44 #endif
45
46 static const uint8_t mthca_opcode[] = {
47         MTHCA_OPCODE_RDMA_WRITE,
48         MTHCA_OPCODE_RDMA_WRITE_IMM,
49         MTHCA_OPCODE_SEND,
50         MTHCA_OPCODE_SEND_IMM,
51         MTHCA_OPCODE_RDMA_READ,
52         MTHCA_OPCODE_ATOMIC_CS,
53         MTHCA_OPCODE_ATOMIC_FA
54 };
55
56 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
57 {
58         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
59
60         switch (wr->wr_type) {
61                 case WR_SEND: 
62                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
63                         break;
64                 case WR_RDMA_WRITE:     
65                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
66                         break;
67                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
68                 case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;
69                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
70                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;
71         }
72         return opcode;
73 }
74
75
76 static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr)
77 {
78         net32_t *wqe = wqe_ptr;
79
80         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
81         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num));
82         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
83                 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
84         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
85                 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
86         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
87                 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
88         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
89                 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
90
91 }
92 static void *get_recv_wqe(struct mthca_qp *qp, int n)
93 {
94         return qp->buf + (n << qp->rq.wqe_shift);
95 }
96
97 static void *get_send_wqe(struct mthca_qp *qp, int n)
98 {
99         void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
100         UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,
101                 ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",
102                  wqe_addr, qp->buf, qp->send_wqe_offset, n, 
103                 qp->sq.wqe_shift));
104         
105         return wqe_addr;
106 }
107
108 void mthca_init_qp_indices(struct mthca_qp *qp)
109 {
110         qp->sq.next_ind  = 0;
111         qp->sq.last_comp = qp->sq.max - 1;
112         qp->sq.head      = 0;
113         qp->sq.tail      = 0;
114         qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);
115
116         qp->rq.next_ind  = 0;
117         qp->rq.last_comp = qp->rq.max - 1;
118         qp->rq.head      = 0;
119         qp->rq.tail      = 0;
120         qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);
121 }
122
123 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
124 {
125         unsigned cur;
126
127         cur = wq->head - wq->tail;
128         if ((int)(cur + nreq) < wq->max)
129                 return 0;
130
131         cl_spinlock_acquire(&cq->lock);
132         cur = wq->head - wq->tail;
133         cl_spinlock_release(&cq->lock);
134
135         return (int)(cur + nreq) >= wq->max;
136 }
137
138
139 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
140                           struct _ib_send_wr **bad_wr)
141 {
142         struct mthca_qp *qp = to_mqp(ibqp);
143         uint8_t *wqe;
144         uint8_t *prev_wqe;
145         int ret = 0;
146         int nreq;
147         int i;
148         int size;
149         int size0 = 0;
150         uint32_t f0 = 0;
151         int ind;
152         int op0 = 0;
153         enum ib_wr_opcode opcode;
154         
155         UVP_ENTER(UVP_DBG_QP);
156         cl_spinlock_acquire(&qp->sq.lock);
157
158         /* XXX check that state is OK to post send */
159
160         ind = qp->sq.next_ind;
161
162         if(ibqp->state == IBV_QPS_RESET) {
163                 ret = -EBUSY;
164                 *bad_wr = wr;
165                 goto err_busy;
166         }
167         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
168
169                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
170                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"
171                                         " %d max, %d nreq)\n", ibqp->qp_num,
172                                         qp->sq.head, qp->sq.tail,
173                                         qp->sq.max, nreq));
174                         ret = -ENOMEM;
175                         *bad_wr = wr;
176                         goto out;
177                 }
178
179                 wqe = get_send_wqe(qp, ind);
180                 prev_wqe = qp->sq.last;
181                 qp->sq.last = wqe;
182                 opcode = conv_ibal_wr_opcode(wr);
183                 if (opcode == MTHCA_OPCODE_INVALID) {
184                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
185                         ret = -EINVAL;
186                         *bad_wr = wr;
187                         goto out;
188                 }
189
190
191                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
192                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
193                 ((struct mthca_next_seg *) wqe)->flags =
194                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
195                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
196                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
197                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
198                         cl_hton32(1);
199                 if (opcode == MTHCA_OPCODE_SEND_IMM||
200                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
201                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
202
203                 wqe += sizeof (struct mthca_next_seg);
204                 size = sizeof (struct mthca_next_seg) / 16;
205
206
207                 switch (ibqp->qp_type) {
208                 case IB_QPT_RELIABLE_CONN:
209                         switch (opcode) {
210                         case MTHCA_OPCODE_ATOMIC_CS:
211                         case MTHCA_OPCODE_ATOMIC_FA:
212                                 ((struct mthca_raddr_seg *) wqe)->raddr =
213                                         cl_hton64(wr->remote_ops.vaddr);
214                                 ((struct mthca_raddr_seg *) wqe)->rkey =
215                                         wr->remote_ops.rkey;
216                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
217
218                                 wqe += sizeof (struct mthca_raddr_seg);
219
220                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
221                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
222                                                 cl_hton64(wr->remote_ops.atomic2);
223                                         ((struct mthca_atomic_seg *) wqe)->compare =
224                                                 cl_hton64(wr->remote_ops.atomic1);
225                                 } else {
226                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
227                                                 cl_hton64(wr->remote_ops.atomic1);
228                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
229                                 }
230
231                                 wqe += sizeof (struct mthca_atomic_seg);
232                                 size += (sizeof (struct mthca_raddr_seg) +
233                                          sizeof (struct mthca_atomic_seg)) / 16;
234                                 break;
235
236                         case MTHCA_OPCODE_RDMA_WRITE:
237                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
238                         case MTHCA_OPCODE_RDMA_READ:
239                                 ((struct mthca_raddr_seg *) wqe)->raddr =
240                                         cl_hton64(wr->remote_ops.vaddr);
241                                 ((struct mthca_raddr_seg *) wqe)->rkey =
242                                         wr->remote_ops.rkey;
243                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
244                                 wqe += sizeof (struct mthca_raddr_seg);
245                                 size += sizeof (struct mthca_raddr_seg) / 16;
246                                 break;
247
248                         default:
249                                 /* No extra segments required for sends */
250                                 break;
251                         }
252
253                         break;
254
255                 case IB_QPT_UNRELIABLE_CONN:
256                         switch (opcode) {
257                         case MTHCA_OPCODE_RDMA_WRITE:
258                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
259                                 ((struct mthca_raddr_seg *) wqe)->raddr =
260                                         cl_hton64(wr->remote_ops.vaddr);
261                                 ((struct mthca_raddr_seg *) wqe)->rkey =
262                                         wr->remote_ops.rkey;
263                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
264                                 wqe += sizeof (struct mthca_raddr_seg);
265                                 size += sizeof (struct mthca_raddr_seg) / 16;
266                                 break;
267
268                         default:
269                                 /* No extra segments required for sends */
270                                 break;
271                         }
272
273                         break;
274
275                 case IB_QPT_UNRELIABLE_DGRM:
276                         {
277                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
278                                 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
279                                         cl_hton32(ah->key);
280                                 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
281                                         cl_hton64((uint64_t)ah->av);
282                                 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
283                                 ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
284
285                                 wqe += sizeof (struct mthca_tavor_ud_seg);
286                                 size += sizeof (struct mthca_tavor_ud_seg) / 16;
287                                 break;
288                         }
289
290                 default:
291                         break;
292                 }
293
294                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
295                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));
296                         ret = -ERANGE;
297                         *bad_wr = wr;
298                         goto out;
299                 }
300 //TODO sleybo:
301                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
302                         if (wr->num_ds) {
303                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
304                                 int s = 0;
305
306                                 wqe += sizeof *seg;
307                                 for (i = 0; i < (int)wr->num_ds; ++i) {
308                                         struct _ib_local_ds *sge = &wr->ds_array[i];
309
310                                         s += sge->length;
311
312                                         if (s > qp->max_inline_data) {
313                                                 ret = -1;
314                                                 *bad_wr = wr;
315                                                 goto out;
316                                         }
317
318                                         memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
319                                                sge->length);
320                                         wqe += sge->length;
321                                 }
322
323                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
324                                 size += align(s + sizeof *seg, 16) / 16;
325                         }
326                 } else {
327                         for (i = 0; i < (int)wr->num_ds; ++i) {
328                                 ((struct mthca_data_seg *) wqe)->byte_count =
329                                         cl_hton32(wr->ds_array[i].length);
330                                 ((struct mthca_data_seg *) wqe)->lkey =
331                                         cl_hton32(wr->ds_array[i].lkey);
332                                 ((struct mthca_data_seg *) wqe)->addr =
333                                         cl_hton64(wr->ds_array[i].vaddr);
334                                 wqe += sizeof (struct mthca_data_seg);
335                                 size += sizeof (struct mthca_data_seg) / 16;
336                         }
337                 }
338
339                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
340
341                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
342                         cl_hton32(((ind << qp->sq.wqe_shift) +
343                         qp->send_wqe_offset) |opcode);
344                 
345                 wmb();
346                 
347                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
348                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
349                         ((wr->send_opt& IB_SEND_OPT_FENCE) ?
350                          MTHCA_NEXT_FENCE : 0));
351
352                 if (!size0) {
353                         size0 = size;
354                         op0   = opcode;
355                 }
356                 
357                 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp);
358                 
359                 ++ind;
360                 if (unlikely(ind >= qp->sq.max))
361                         ind -= qp->sq.max;
362
363         }
364
365 out:
366         if (likely(nreq)) {
367                 uint32_t doorbell[2];
368
369                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
370                                      qp->send_wqe_offset) | f0 | op0);
371                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
372
373                 wmb();
374
375                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
376         }
377
378         qp->sq.next_ind = ind;
379         qp->sq.head    += nreq;
380
381 err_busy:
382         cl_spinlock_release(&qp->sq.lock);
383         
384         UVP_EXIT(UVP_DBG_QP);
385         return ret;
386 }
387
388
389 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
390                           struct _ib_recv_wr **bad_wr)
391 {
392         struct mthca_qp *qp = to_mqp(ibqp);
393         uint32_t doorbell[2];
394         int ret = 0;
395         int nreq;
396         int i;
397         int size;
398         int size0 = 0;
399         int ind;
400         uint8_t *wqe;
401         uint8_t *prev_wqe;
402         
403         UVP_ENTER(UVP_DBG_QP);
404         
405         cl_spinlock_acquire(&qp->rq.lock);
406
407         /* XXX check that state is OK to post receive */
408         
409         ind = qp->rq.next_ind;
410         if(ibqp->state == IBV_QPS_RESET) {
411                 ret = -EBUSY;
412                 *bad_wr = wr;
413                 goto err_busy;
414         }
415         
416         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
417                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
418                         nreq = 0;
419
420                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
421                         doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct 
422
423                         /*
424                          * Make sure that descriptors are written
425                          * before doorbell is rung.
426                          */
427                         mb();
428
429                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
430
431                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
432                         size0 = 0;
433                 }
434
435                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
436                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"
437                                         " %d max, %d nreq)\n", ibqp->qp_num,
438                                         qp->rq.head, qp->rq.tail,
439                                         qp->rq.max, nreq));
440                         ret = -ENOMEM;
441                         *bad_wr = wr;
442                         goto out;
443                 }
444
445                 wqe = get_recv_wqe(qp, ind);
446                 prev_wqe = qp->rq.last;
447                 qp->rq.last = wqe;
448
449                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
450                 ((struct mthca_next_seg *) wqe)->ee_nds =
451                         cl_hton32(MTHCA_NEXT_DBD);
452                 ((struct mthca_next_seg *) wqe)->flags =
453                         cl_hton32(MTHCA_NEXT_CQ_UPDATE);
454
455                 wqe += sizeof (struct mthca_next_seg);
456                 size = sizeof (struct mthca_next_seg) / 16;
457
458                 if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {
459                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));
460                         ret = -ERANGE;
461                         *bad_wr = wr;
462                         goto out;
463                 }
464
465                 for (i = 0; i < (int)wr->num_ds; ++i) {
466                         ((struct mthca_data_seg *) wqe)->byte_count =
467                                 cl_hton32(wr->ds_array[i].length);
468                         ((struct mthca_data_seg *) wqe)->lkey =
469                                 cl_hton32(wr->ds_array[i].lkey);
470                         ((struct mthca_data_seg *) wqe)->addr =
471                                 cl_hton64(wr->ds_array[i].vaddr);
472                         wqe += sizeof (struct mthca_data_seg);
473                         size += sizeof (struct mthca_data_seg) / 16;
474                 }
475
476                 qp->wrid[ind] = wr->wr_id;
477
478                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
479                         cl_hton32((ind << qp->rq.wqe_shift) | 1);
480                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
481                         cl_hton32(MTHCA_NEXT_DBD | size);
482
483                 if (!size0)
484                         size0 = size;
485
486                 ++ind;
487                 if (unlikely(ind >= qp->rq.max))
488                         ind -= qp->rq.max;
489         }
490
491 out:
492         if (likely(nreq)) {
493                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
494                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));
495
496                 /*
497                  * Make sure that descriptors are written before
498                  * doorbell is rung.
499                  */
500                 mb();
501
502                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);
503         }
504
505         qp->rq.next_ind = ind;
506         qp->rq.head    += nreq;
507
508 err_busy:
509         cl_spinlock_release(&qp->rq.lock);
510         UVP_EXIT(UVP_DBG_QP);
511         return ret;
512 }
513
514 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,
515                           struct _ib_send_wr **bad_wr)
516 {
517         struct mthca_qp *qp = to_mqp(ibqp);
518         uint32_t doorbell[2];
519         uint8_t *wqe;
520         uint8_t *prev_wqe;
521         int ret = 0;
522         int nreq;       
523         int i;
524         int size;
525         int size0 = 0;
526         uint32_t f0 = 0;
527         int ind;
528         uint8_t op0 = 0;
529         enum ib_wr_opcode opcode;
530         
531         UVP_ENTER(UVP_DBG_QP);
532         
533         cl_spinlock_acquire(&qp->sq.lock);
534
535         /* XXX check that state is OK to post send */
536
537         ind = qp->sq.head & (qp->sq.max - 1);
538         if(ibqp->state == IBV_QPS_RESET) {
539                 ret = -EBUSY;
540                 *bad_wr = wr;
541                 goto err_busy;
542         }
543         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
544                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
545                         nreq = 0;
546
547                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
548                                             ((qp->sq.head & 0xffff) << 8) | f0 | op0);
549                         doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
550                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
551                         size0 = 0;
552
553                         /*
554                          * Make sure that descriptors are written before
555                          * doorbell record.
556                          */
557                         wmb();
558                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
559
560                         /*
561                          * Make sure doorbell record is written before we
562                          * write MMIO send doorbell.
563                          */
564                         wmb();
565                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
566
567                 }
568
569                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
570                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"
571                                         " %d max, %d nreq)\n", ibqp->qp_num,
572                                         qp->sq.head, qp->sq.tail,
573                                         qp->sq.max, nreq));                     
574                         ret = -ENOMEM;
575                         *bad_wr = wr;
576                         goto out;
577                 }
578
579                 wqe = get_send_wqe(qp, ind);
580                 prev_wqe = qp->sq.last;
581                 qp->sq.last = wqe;
582                 opcode = conv_ibal_wr_opcode(wr);
583
584                 ((struct mthca_next_seg *) wqe)->flags =
585                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
586                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
587                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
588                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
589                         cl_hton32(1);
590                 if (opcode == MTHCA_OPCODE_SEND_IMM||
591                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
592                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
593
594                 wqe += sizeof (struct mthca_next_seg);
595                 size = sizeof (struct mthca_next_seg) / 16;
596
597                 switch (ibqp->qp_type) {
598                 case IB_QPT_RELIABLE_CONN:
599                         switch (opcode) {
600                         case MTHCA_OPCODE_ATOMIC_CS:
601                         case MTHCA_OPCODE_ATOMIC_FA:
602                                 ((struct mthca_raddr_seg *) wqe)->raddr =
603                                         cl_hton64(wr->remote_ops.vaddr);
604                                 ((struct mthca_raddr_seg *) wqe)->rkey =
605                                         wr->remote_ops.rkey;
606                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
607
608                                 wqe += sizeof (struct mthca_raddr_seg);
609
610                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
611                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
612                                                 cl_hton64(wr->remote_ops.atomic2);
613                                         ((struct mthca_atomic_seg *) wqe)->compare =
614                                                 cl_hton64(wr->remote_ops.atomic1);
615                                 } else {
616                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
617                                                 cl_hton64(wr->remote_ops.atomic1);
618                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
619                                 }
620
621                                 wqe += sizeof (struct mthca_atomic_seg);
622                                 size += (sizeof (struct mthca_raddr_seg) +
623                                          sizeof (struct mthca_atomic_seg)) / 16;
624                                 break;
625
626                         case MTHCA_OPCODE_RDMA_READ:
627                         case MTHCA_OPCODE_RDMA_WRITE:
628                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
629                                 ((struct mthca_raddr_seg *) wqe)->raddr =
630                                         cl_hton64(wr->remote_ops.vaddr);
631                                 ((struct mthca_raddr_seg *) wqe)->rkey =
632                                         wr->remote_ops.rkey;
633                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
634                                 wqe += sizeof (struct mthca_raddr_seg);
635                                 size += sizeof (struct mthca_raddr_seg) / 16;
636                                 break;
637
638                         default:
639                                 /* No extra segments required for sends */
640                                 break;
641                         }
642
643                         break;
644
645                 case IB_QPT_UNRELIABLE_CONN:
646                         switch (opcode) {
647                         case MTHCA_OPCODE_RDMA_WRITE:
648                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
649                                 ((struct mthca_raddr_seg *) wqe)->raddr =
650                                         cl_hton64(wr->remote_ops.vaddr);
651                                 ((struct mthca_raddr_seg *) wqe)->rkey =
652                                         wr->remote_ops.rkey;
653                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
654                                 wqe += sizeof (struct mthca_raddr_seg);
655                                 size += sizeof (struct mthca_raddr_seg) / 16;
656                                 break;
657
658                         default:
659                                 /* No extra segments required for sends */
660                                 break;
661                         }
662
663                         break;
664
665                 case IB_QPT_UNRELIABLE_DGRM:
666                         {
667                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);
668                                 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
669                                        ah->av, sizeof ( struct mthca_av));
670                                 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
671                                 ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
672
673
674                                 wqe += sizeof (struct mthca_arbel_ud_seg);
675                                 size += sizeof (struct mthca_arbel_ud_seg) / 16;
676                                 break;
677                         }
678
679                 default:
680                         break;
681                 }
682
683                 if ((int)wr->num_ds > qp->sq.max_gs) {
684                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));
685                         ret = -ERANGE;
686                         *bad_wr = wr;
687                         goto out;
688                 }
689
690                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
691                         if (wr->num_ds) {
692                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
693                                 int s = 0;
694
695                                 wqe += sizeof *seg;
696                                 for (i = 0; i < (int)wr->num_ds; ++i) {
697                                         struct _ib_local_ds *sge = &wr->ds_array[i];
698
699                                         s += sge->length;
700
701                                         if (s > qp->max_inline_data) {
702                                                 ret = -1;
703                                                 *bad_wr = wr;
704                                                 goto out;
705                                         }
706
707                                         memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
708                                                sge->length);
709                                         wqe += sge->length;
710                                 }
711
712                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
713                                 size += align(s + sizeof *seg, 16) / 16;
714                         }
715                 } else {
716
717                         for (i = 0; i < (int)wr->num_ds; ++i) {
718                                 ((struct mthca_data_seg *) wqe)->byte_count =
719                                         cl_hton32(wr->ds_array[i].length);
720                                 ((struct mthca_data_seg *) wqe)->lkey =
721                                         cl_hton32(wr->ds_array[i].lkey);
722                                 ((struct mthca_data_seg *) wqe)->addr =
723                                         cl_hton64(wr->ds_array[i].vaddr);
724                                 wqe += sizeof (struct mthca_data_seg);
725                                 size += sizeof (struct mthca_data_seg) / 16;
726                         }
727 //TODO do this also in kernel
728 //                      size += wr->num_ds * (sizeof *seg / 16);
729                 }
730
731                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
732
733                 if (opcode == MTHCA_OPCODE_INVALID) {
734                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));
735                         ret = -EINVAL;
736                         *bad_wr = wr;
737                         goto out;
738                 }
739
740                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
741                         cl_hton32(((ind << qp->sq.wqe_shift) +
742                                qp->send_wqe_offset) |
743                               opcode);
744                 wmb();
745                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
746                         cl_hton32(MTHCA_NEXT_DBD | size);
747                         cl_hton32(MTHCA_NEXT_DBD | size |
748                           ((wr->send_opt & IB_SEND_OPT_FENCE) ?
749                                                    MTHCA_NEXT_FENCE : 0));
750
751                 if (!size0) {
752                         size0 = size;
753                         op0   = opcode;
754                 }
755
756                 ++ind;
757                 if (unlikely(ind >= qp->sq.max))
758                         ind -= qp->sq.max;
759         }
760
761 out:
762         if (likely(nreq)) {
763                 doorbell[0] = cl_hton32((nreq << 24) |
764                                     ((qp->sq.head & 0xffff) << 8) | f0 | op0);
765                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);
766
767                 qp->sq.head += nreq;
768
769                 /*
770                  * Make sure that descriptors are written before
771                  * doorbell record.
772                  */
773                 wmb();
774                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
775
776                 /*
777                  * Make sure doorbell record is written before we
778                  * write MMIO send doorbell.
779                  */
780                 wmb();
781                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);
782         }
783
784 err_busy:
785         cl_spinlock_release(&qp->sq.lock);
786
787         UVP_EXIT(UVP_DBG_QP);
788         
789         return ret;
790 }
791
792 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,
793                           struct _ib_recv_wr **bad_wr)
794 {
795         struct mthca_qp *qp = to_mqp(ibqp);
796         int ret = 0;
797         int nreq;
798         int ind;
799         int i;
800         uint8_t *wqe;
801         
802         UVP_ENTER(UVP_DBG_QP);
803         
804         cl_spinlock_acquire(&qp->rq.lock);
805
806         /* XXX check that state is OK to post receive */
807
808         ind = qp->rq.head & (qp->rq.max - 1);
809         if(ibqp->state == IBV_QPS_RESET) {
810                 ret = -EBUSY;
811                 *bad_wr = wr;
812                 goto err_busy;
813         }
814         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
815                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq
816                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"
817                                         " %d max, %d nreq)\n", ibqp->qp_num,
818                                         qp->rq.head, qp->rq.tail,
819                                         qp->rq.max, nreq));
820                         ret = -ENOMEM;
821                         *bad_wr = wr;
822                         goto out;
823                 }
824
825                 wqe = get_recv_wqe(qp, ind);
826
827                 ((struct mthca_next_seg *) wqe)->flags = 0;
828
829                 wqe += sizeof (struct mthca_next_seg);
830
831                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
832                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));
833                         ret = -ERANGE;
834                         *bad_wr = wr;
835                         goto out;
836                 }
837
838                 for (i = 0; i < (int)wr->num_ds; ++i) {
839                         ((struct mthca_data_seg *) wqe)->byte_count =
840                                 cl_hton32(wr->ds_array[i].length);
841                         ((struct mthca_data_seg *) wqe)->lkey =
842                                 cl_hton32(wr->ds_array[i].lkey);
843                         ((struct mthca_data_seg *) wqe)->addr =
844                                 cl_hton64(wr->ds_array[i].vaddr);
845                         wqe += sizeof (struct mthca_data_seg);
846                 }
847
848                 if (i < qp->rq.max_gs) {
849                         ((struct mthca_data_seg *) wqe)->byte_count = 0;
850                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
851                         ((struct mthca_data_seg *) wqe)->addr = 0;
852                 }
853
854                 qp->wrid[ind] = wr->wr_id;
855
856                 ++ind;
857                 if (unlikely(ind >= qp->rq.max))
858                         ind -= qp->rq.max;
859         }
860 out:
861         if (likely(nreq)) {
862                 qp->rq.head += nreq;
863
864                 /*
865                  * Make sure that descriptors are written before
866                  * doorbell record.
867                  */
868                 mb();
869                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
870         }
871
872 err_busy:
873         cl_spinlock_release(&qp->rq.lock);
874         
875         UVP_EXIT(UVP_DBG_QP);
876         
877         return ret;
878 }
879
880 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
881                        ib_qp_type_t type, struct mthca_qp *qp)
882 {
883         int size;
884         int max_sq_sge;
885
886         qp->rq.max_gs    = cap->max_recv_sge;
887         qp->sq.max_gs    = cap->max_send_sge;
888         max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
889                                  sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
890         if (max_sq_sge < (int)cap->max_send_sge)
891                 max_sq_sge = cap->max_send_sge;
892
893         qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
894         if (!qp->wrid)
895                 return -1;
896
897         size = sizeof (struct mthca_next_seg) +
898                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
899
900         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
901              qp->rq.wqe_shift++)
902                 ; /* nothing */
903
904         size = max_sq_sge * sizeof (struct mthca_data_seg);
905         switch (type) {
906         case IB_QPT_UNRELIABLE_DGRM:
907                 size += mthca_is_memfree(pd->context) ?
908                         sizeof (struct mthca_arbel_ud_seg) :
909                         sizeof (struct mthca_tavor_ud_seg);
910                 break;
911
912         case IB_QPT_UNRELIABLE_CONN:
913                 size += sizeof (struct mthca_raddr_seg);
914                 break;
915
916         case IB_QPT_RELIABLE_CONN:
917                 size += sizeof (struct mthca_raddr_seg);
918                 /*
919                  * An atomic op will require an atomic segment, a
920                  * remote address segment and one scatter entry.
921                  */
922                 if (size < (sizeof (struct mthca_atomic_seg) +
923                             sizeof (struct mthca_raddr_seg) +
924                             sizeof (struct mthca_data_seg)))
925                         size = (sizeof (struct mthca_atomic_seg) +
926                                 sizeof (struct mthca_raddr_seg) +
927                                 sizeof (struct mthca_data_seg));
928                 break;
929
930         default:
931                 break;
932         }
933
934         /* Make sure that we have enough space for a bind request */
935         if (size < sizeof (struct mthca_bind_seg))
936                 size = sizeof (struct mthca_bind_seg);
937
938         size += sizeof (struct mthca_next_seg);
939
940         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
941              qp->sq.wqe_shift++)
942                 ; /* nothing */
943
944         qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
945                                     1 << qp->sq.wqe_shift);
946
947         qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
948
949         if (posix_memalign(&qp->buf, g_page_size,
950                            align(qp->buf_size, g_page_size))) {
951                 cl_free(qp->wrid);
952                 return -1;
953         }
954
955         memset(qp->buf, 0, qp->buf_size);
956
957         if (mthca_is_memfree(pd->context)) {
958                 struct mthca_next_seg *next;
959                 struct mthca_data_seg *scatter;
960                 int i;
961                 uint32_t sz;
962
963                 sz = cl_hton32((sizeof (struct mthca_next_seg) +
964                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
965
966                 for (i = 0; i < qp->rq.max; ++i) {
967                         next = get_recv_wqe(qp, i);
968                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
969                                              qp->rq.wqe_shift);
970                         next->ee_nds = sz;
971
972                         for (scatter = (void *) (next + 1);
973                              (void *) scatter < (void *) ((char *)next + (1 << qp->rq.wqe_shift));
974                              ++scatter)
975                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
976                 }
977
978                 for (i = 0; i < qp->sq.max; ++i) {
979                         next = get_send_wqe(qp, i);
980                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
981                                               qp->sq.wqe_shift) +
982                                              qp->send_wqe_offset);
983                 }
984         }
985
986         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
987         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
988
989         return 0;
990 }
991
992 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
993 {
994         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
995
996         if (ctx->qp_table[tind].refcnt)
997                 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
998         else
999                 return NULL;
1000 }
1001
1002 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
1003 {
1004         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1005         int ret = 0;
1006
1007         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1008
1009         if (!ctx->qp_table[tind].refcnt) {
1010                 ctx->qp_table[tind].table = cl_malloc(
1011                         (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));
1012                 if (!ctx->qp_table[tind].table) {
1013                         ret = -1;
1014                         goto out;
1015                 }
1016         }
1017         ++ctx->qp_table[tind].refcnt;
1018         ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
1019
1020 out:
1021         ReleaseMutex( ctx->qp_table_mutex );
1022         return ret;
1023 }
1024
1025 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
1026 {
1027         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
1028
1029         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );
1030
1031         if (!--ctx->qp_table[tind].refcnt)
1032                 cl_free(ctx->qp_table[tind].table);
1033         else
1034                 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
1035         
1036         ReleaseMutex( ctx->qp_table_mutex );
1037 }
1038
1039 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
1040                        int index, int *dbd, uint32_t *new_wqe)
1041 {
1042         struct mthca_next_seg *next;
1043
1044         /*
1045          * For SRQs, all WQEs generate a CQE, so we're always at the
1046          * end of the doorbell chain.
1047          */
1048         if (qp->ibv_qp.srq) {
1049                 *new_wqe = 0;
1050                 return 0;
1051         }
1052
1053         if (is_send)
1054                 next = get_send_wqe(qp, index);
1055         else
1056                 next = get_recv_wqe(qp, index);
1057
1058         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
1059         if (next->ee_nds & cl_hton32(0x3f))
1060                 *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |
1061                         (next->ee_nds & cl_hton32(0x3f));
1062         else
1063                 *new_wqe = 0;
1064
1065         return 0;
1066 }
1067