[MTHCA] bug fix: removed unnecessary conversion to network order in atomic operations...
[mirror/winof/.git] / hw / mthca / user / mlnx_uvp_qp.c
1 /*\r
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.\r
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.\r
4  * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.\r
5  *\r
6  * This software is available to you under a choice of one of two\r
7  * licenses.  You may choose to be licensed under the terms of the GNU\r
8  * General Public License (GPL) Version 2, available from the file\r
9  * COPYING in the main directory of this source tree, or the\r
10  * OpenIB.org BSD license below:\r
11  *\r
12  *     Redistribution and use in source and binary forms, with or\r
13  *     without modification, are permitted provided that the following\r
14  *     conditions are met:\r
15  *\r
16  *      - Redistributions of source code must retain the above\r
17  *        copyright notice, this list of conditions and the following\r
18  *        disclaimer.\r
19  *\r
20  *      - Redistributions in binary form must reproduce the above\r
21  *        copyright notice, this list of conditions and the following\r
22  *        disclaimer in the documentation and/or other materials\r
23  *        provided with the distribution.\r
24  *\r
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
32  * SOFTWARE.\r
33  *\r
34  * $Id$\r
35  */\r
36 \r
37 #include <mt_l2w.h>\r
38 #include "mlnx_uvp.h"\r
39 #include "mlnx_uvp_doorbell.h"\r
40 #include "mthca_wqe.h"\r
41 #include "mlnx_ual_data.h"\r
42 \r
43 #if defined(EVENT_TRACING)\r
44 #include "mlnx_uvp_qp.tmh"\r
45 #endif\r
46 \r
47 static const uint8_t mthca_opcode[] = {\r
48         MTHCA_OPCODE_RDMA_WRITE,\r
49         MTHCA_OPCODE_RDMA_WRITE_IMM,\r
50         MTHCA_OPCODE_SEND,\r
51         MTHCA_OPCODE_SEND_IMM,\r
52         MTHCA_OPCODE_RDMA_READ,\r
53         MTHCA_OPCODE_ATOMIC_CS,\r
54         MTHCA_OPCODE_ATOMIC_FA\r
55 };\r
56 \r
57 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)\r
58 {\r
59         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;\r
60 \r
61         switch (wr->wr_type) {\r
62                 case WR_SEND: \r
63                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;\r
64                         break;\r
65                 case WR_RDMA_WRITE:     \r
66                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;\r
67                         break;\r
68                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;\r
69                 case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break;\r
70                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;\r
71                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;\r
72         }\r
73         return opcode;\r
74 }\r
75 \r
76 \r
77 static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr, int size)\r
78 {\r
79         net32_t *wqe = wqe_ptr;\r
80         int i;\r
81 \r
82         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */\r
83         UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE: QPN 0x%06x, buf %p , buf_sz %d, send_offset %d\n",\r
84                 qp_ptr->ibv_qp.qp_num, qp_ptr->buf, qp_ptr->buf_size, qp_ptr->send_wqe_offset ));\r
85         for (i=0; i<size; ++i) {\r
86                 UVP_PRINT(print_lvl,UVP_DBG_QP,("  segment[%d] %08x %08x %08x %08x \n",i,\r
87                         cl_ntoh32(wqe[4*i+0]), cl_ntoh32(wqe[4*i+1]), \r
88                         cl_ntoh32(wqe[4*i+2]), cl_ntoh32(wqe[4*i+3])));\r
89         }\r
90 \r
91 }\r
92 static void *get_recv_wqe(struct mthca_qp *qp, int n)\r
93 {\r
94         return qp->buf + (n << qp->rq.wqe_shift);\r
95 }\r
96 \r
97 static void *get_send_wqe(struct mthca_qp *qp, int n)\r
98 {\r
99         void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);\r
100         UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP,\r
101                 ("wqe %p, qp_buf %p, offset %#x,  index %d, shift %d \n",\r
102                  wqe_addr, qp->buf, qp->send_wqe_offset, n, \r
103                 qp->sq.wqe_shift));\r
104         \r
105         return wqe_addr;\r
106 }\r
107 \r
108 void mthca_init_qp_indices(struct mthca_qp *qp)\r
109 {\r
110         qp->sq.next_ind  = 0;\r
111         qp->sq.last_comp = qp->sq.max - 1;\r
112         qp->sq.head      = 0;\r
113         qp->sq.tail      = 0;\r
114         qp->sq.last      = get_send_wqe(qp, qp->sq.max - 1);\r
115 \r
116         qp->rq.next_ind  = 0;\r
117         qp->rq.last_comp = qp->rq.max - 1;\r
118         qp->rq.head      = 0;\r
119         qp->rq.tail      = 0;\r
120         qp->rq.last      = get_recv_wqe(qp, qp->rq.max - 1);\r
121 }\r
122 \r
123 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)\r
124 {\r
125         unsigned cur;\r
126 \r
127         cur = wq->head - wq->tail;\r
128         if ((int)(cur + nreq) < wq->max)\r
129                 return 0;\r
130 \r
131         cl_spinlock_acquire(&cq->lock);\r
132         cur = wq->head - wq->tail;\r
133         cl_spinlock_release(&cq->lock);\r
134 \r
135         return (int)(cur + nreq) >= wq->max;\r
136 }\r
137 \r
138 \r
139 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,\r
140                           struct _ib_send_wr **bad_wr)\r
141 {\r
142         struct mthca_qp *qp = to_mqp(ibqp);\r
143         uint8_t *wqe;\r
144         uint8_t *prev_wqe;\r
145         int ret = 0;\r
146         int nreq;\r
147         int i;\r
148         int size;\r
149         int size0 = 0;\r
150         uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;\r
151         int ind;\r
152         int op0 = 0;\r
153         enum ib_wr_opcode opcode;\r
154         \r
155         UVP_ENTER(UVP_DBG_QP);\r
156         cl_spinlock_acquire(&qp->sq.lock);\r
157 \r
158         /* XXX check that state is OK to post send */\r
159 \r
160         ind = qp->sq.next_ind;\r
161 \r
162         if(ibqp->state == IBV_QPS_RESET) {\r
163                 ret = -EBUSY;\r
164                 if (bad_wr)\r
165                         *bad_wr = wr;\r
166                 goto err_busy;\r
167         }\r
168 \r
169         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
170 \r
171                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {\r
172                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail,"\r
173                                         " %d max, %d nreq)\n", ibqp->qp_num,\r
174                                         qp->sq.head, qp->sq.tail,\r
175                                         qp->sq.max, nreq));\r
176                         ret = -ENOMEM;\r
177                         if (bad_wr)\r
178                                 *bad_wr = wr;\r
179                         goto out;\r
180                 }\r
181 \r
182                 wqe = get_send_wqe(qp, ind);\r
183                 prev_wqe = qp->sq.last;\r
184                 qp->sq.last = wqe;\r
185                 opcode = conv_ibal_wr_opcode(wr);\r
186                 if (opcode == MTHCA_OPCODE_INVALID) {\r
187                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));\r
188                         ret = -EINVAL;\r
189                         if (bad_wr)\r
190                                 *bad_wr = wr;\r
191                         goto out;\r
192                 }\r
193 \r
194 \r
195                 ((struct mthca_next_seg *) wqe)->nda_op = 0;\r
196                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;\r
197                 ((struct mthca_next_seg *) wqe)->flags =\r
198                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?\r
199                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |\r
200                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?\r
201                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |\r
202                         cl_hton32(1);\r
203                 if (opcode == MTHCA_OPCODE_SEND_IMM||\r
204                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)\r
205                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;\r
206 \r
207                 wqe += sizeof (struct mthca_next_seg);\r
208                 size = sizeof (struct mthca_next_seg) / 16;\r
209 \r
210 \r
211                 switch (ibqp->qp_type) {\r
212                 case IB_QPT_RELIABLE_CONN:\r
213                         switch (opcode) {\r
214                         case MTHCA_OPCODE_ATOMIC_CS:\r
215                         case MTHCA_OPCODE_ATOMIC_FA:\r
216                                 ((struct mthca_raddr_seg *) wqe)->raddr =\r
217                                         cl_hton64(wr->remote_ops.vaddr);\r
218                                 ((struct mthca_raddr_seg *) wqe)->rkey =\r
219                                         wr->remote_ops.rkey;\r
220                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
221 \r
222                                 wqe += sizeof (struct mthca_raddr_seg);\r
223 \r
224                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {\r
225                                         ((struct mthca_atomic_seg *) wqe)->swap_add =\r
226                                                 (wr->remote_ops.atomic2);\r
227                                         ((struct mthca_atomic_seg *) wqe)->compare =\r
228                                                 (wr->remote_ops.atomic1);\r
229                                 } else {\r
230                                         ((struct mthca_atomic_seg *) wqe)->swap_add =\r
231                                                 (wr->remote_ops.atomic1);\r
232                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;\r
233                                 }\r
234 \r
235                                 wqe += sizeof (struct mthca_atomic_seg);\r
236                                 size += (sizeof (struct mthca_raddr_seg) +\r
237                                          sizeof (struct mthca_atomic_seg)) / 16;\r
238                                 break;\r
239 \r
240                         case MTHCA_OPCODE_RDMA_WRITE:\r
241                         case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
242                         case MTHCA_OPCODE_RDMA_READ:\r
243                                 ((struct mthca_raddr_seg *) wqe)->raddr =\r
244                                         cl_hton64(wr->remote_ops.vaddr);\r
245                                 ((struct mthca_raddr_seg *) wqe)->rkey =\r
246                                         wr->remote_ops.rkey;\r
247                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
248                                 wqe += sizeof (struct mthca_raddr_seg);\r
249                                 size += sizeof (struct mthca_raddr_seg) / 16;\r
250                                 break;\r
251 \r
252                         default:\r
253                                 /* No extra segments required for sends */\r
254                                 break;\r
255                         }\r
256 \r
257                         break;\r
258 \r
259                 case IB_QPT_UNRELIABLE_CONN:\r
260                         switch (opcode) {\r
261                         case MTHCA_OPCODE_RDMA_WRITE:\r
262                         case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
263                                 ((struct mthca_raddr_seg *) wqe)->raddr =\r
264                                         cl_hton64(wr->remote_ops.vaddr);\r
265                                 ((struct mthca_raddr_seg *) wqe)->rkey =\r
266                                         wr->remote_ops.rkey;\r
267                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
268                                 wqe += sizeof (struct mthca_raddr_seg);\r
269                                 size += sizeof (struct mthca_raddr_seg) / 16;\r
270                                 break;\r
271 \r
272                         default:\r
273                                 /* No extra segments required for sends */\r
274                                 break;\r
275                         }\r
276 \r
277                         break;\r
278 \r
279                 case IB_QPT_UNRELIABLE_DGRM:\r
280                         {\r
281                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);\r
282                                 ((struct mthca_tavor_ud_seg *) wqe)->lkey =\r
283                                         cl_hton32(ah->key);\r
284                                 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =\r
285                                         cl_hton64((ULONG_PTR)ah->av);\r
286                                 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;\r
287                                 ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;\r
288 \r
289                                 wqe += sizeof (struct mthca_tavor_ud_seg);\r
290                                 size += sizeof (struct mthca_tavor_ud_seg) / 16;\r
291                                 break;\r
292                         }\r
293 \r
294                 default:\r
295                         break;\r
296                 }\r
297 \r
298                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {\r
299                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num));\r
300                         ret = -ERANGE;\r
301                         if (bad_wr)\r
302                                 *bad_wr = wr;\r
303                         goto out;\r
304                 }\r
305 //TODO sleybo:\r
306                 if (wr->send_opt & IB_SEND_OPT_INLINE) {\r
307                         if (wr->num_ds) {\r
308                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;\r
309                                 uint32_t s = 0;\r
310 \r
311                                 wqe += sizeof *seg;\r
312                                 for (i = 0; i < (int)wr->num_ds; ++i) {\r
313                                         struct _ib_local_ds *sge = &wr->ds_array[i];\r
314 \r
315                                         s += sge->length;\r
316 \r
317                                         if (s > (uint32_t)qp->max_inline_data) {\r
318                                                 ret = -E2BIG;\r
319                                                 if (bad_wr)\r
320                                                         *bad_wr = wr;\r
321                                                 goto out;\r
322                                         }\r
323 \r
324                                         memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,\r
325                                                sge->length);\r
326                                         wqe += sge->length;\r
327                                 }\r
328 \r
329                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);\r
330                                 size += align(s + sizeof *seg, 16) / 16;\r
331                         }\r
332                 } else {\r
333                         for (i = 0; i < (int)wr->num_ds; ++i) {\r
334                                 ((struct mthca_data_seg *) wqe)->byte_count =\r
335                                         cl_hton32(wr->ds_array[i].length);\r
336                                 ((struct mthca_data_seg *) wqe)->lkey =\r
337                                         cl_hton32(wr->ds_array[i].lkey);\r
338                                 ((struct mthca_data_seg *) wqe)->addr =\r
339                                         cl_hton64(wr->ds_array[i].vaddr);\r
340                                 wqe += sizeof (struct mthca_data_seg);\r
341                                 size += sizeof (struct mthca_data_seg) / 16;\r
342                         }\r
343                 }\r
344 \r
345                 qp->wrid[ind + qp->rq.max] = wr->wr_id;\r
346 \r
347                 ((struct mthca_next_seg *) prev_wqe)->nda_op =\r
348                         cl_hton32(((ind << qp->sq.wqe_shift) +\r
349                         qp->send_wqe_offset) |opcode);\r
350                 \r
351                 wmb();\r
352                 \r
353                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =\r
354                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |\r
355                         ((wr->send_opt& IB_SEND_OPT_FENCE) ?\r
356                          MTHCA_NEXT_FENCE : 0));\r
357 \r
358                 if (!size0) {\r
359                         size0 = size;\r
360                         op0   = opcode;\r
361                 }\r
362                 \r
363                 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp, size);\r
364                 \r
365                 ++ind;\r
366                 if (unlikely(ind >= qp->sq.max))\r
367                         ind -= qp->sq.max;\r
368 \r
369         }\r
370 \r
371 out:\r
372         if (likely(nreq)) {\r
373                 uint32_t doorbell[2];\r
374 \r
375                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +\r
376                         qp->send_wqe_offset) | f0 | op0);\r
377                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);\r
378 \r
379                 wmb();\r
380 \r
381                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);\r
382         }\r
383 \r
384         qp->sq.next_ind = ind;\r
385         qp->sq.head    += nreq;\r
386 \r
387 err_busy:\r
388         cl_spinlock_release(&qp->sq.lock);\r
389         \r
390         UVP_EXIT(UVP_DBG_QP);\r
391         return ret;\r
392 }\r
393 \r
394 \r
395 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,\r
396                           struct _ib_recv_wr **bad_wr)\r
397 {\r
398         struct mthca_qp *qp = to_mqp(ibqp);\r
399         uint32_t doorbell[2];\r
400         int ret = 0;\r
401         int nreq;\r
402         int i;\r
403         int size;\r
404         int size0 = 0;\r
405         int ind;\r
406         uint8_t *wqe;\r
407         uint8_t *prev_wqe;\r
408         \r
409         UVP_ENTER(UVP_DBG_QP);\r
410         \r
411         cl_spinlock_acquire(&qp->rq.lock);\r
412 \r
413         /* XXX check that state is OK to post receive */\r
414         \r
415         ind = qp->rq.next_ind;\r
416         if(ibqp->state == IBV_QPS_RESET) {\r
417                 ret = -EBUSY;\r
418                 if (bad_wr)\r
419                         *bad_wr = wr;\r
420                 goto err_busy;\r
421         }\r
422         \r
423         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
424                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {\r
425                         nreq = 0;\r
426 \r
427                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);\r
428                         doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct \r
429 \r
430                         /*\r
431                          * Make sure that descriptors are written\r
432                          * before doorbell is rung.\r
433                          */\r
434                         mb();\r
435 \r
436                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);\r
437 \r
438                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;\r
439                         size0 = 0;\r
440                 }\r
441 \r
442                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {\r
443                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail,"\r
444                                         " %d max, %d nreq)\n", ibqp->qp_num,\r
445                                         qp->rq.head, qp->rq.tail,\r
446                                         qp->rq.max, nreq));\r
447                         ret = -ENOMEM;\r
448                         if (bad_wr)\r
449                                 *bad_wr = wr;\r
450                         goto out;\r
451                 }\r
452 \r
453                 wqe = get_recv_wqe(qp, ind);\r
454                 prev_wqe = qp->rq.last;\r
455                 qp->rq.last = wqe;\r
456 \r
457                 ((struct mthca_next_seg *) wqe)->nda_op = 0;\r
458                 ((struct mthca_next_seg *) wqe)->ee_nds =\r
459                         cl_hton32(MTHCA_NEXT_DBD);\r
460                 ((struct mthca_next_seg *) wqe)->flags =\r
461                         cl_hton32(MTHCA_NEXT_CQ_UPDATE);\r
462 \r
463                 wqe += sizeof (struct mthca_next_seg);\r
464                 size = sizeof (struct mthca_next_seg) / 16;\r
465 \r
466                 if (unlikely((int)wr->num_ds  > qp->rq.max_gs)) {\r
467                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num));\r
468                         ret = -ERANGE;\r
469                         if (bad_wr)\r
470                                 *bad_wr = wr;\r
471                         goto out;\r
472                 }\r
473 \r
474                 for (i = 0; i < (int)wr->num_ds; ++i) {\r
475                         ((struct mthca_data_seg *) wqe)->byte_count =\r
476                                 cl_hton32(wr->ds_array[i].length);\r
477                         ((struct mthca_data_seg *) wqe)->lkey =\r
478                                 cl_hton32(wr->ds_array[i].lkey);\r
479                         ((struct mthca_data_seg *) wqe)->addr =\r
480                                 cl_hton64(wr->ds_array[i].vaddr);\r
481                         wqe += sizeof (struct mthca_data_seg);\r
482                         size += sizeof (struct mthca_data_seg) / 16;\r
483                 }\r
484 \r
485                 qp->wrid[ind] = wr->wr_id;\r
486 \r
487                 ((struct mthca_next_seg *) prev_wqe)->nda_op =\r
488                         cl_hton32((ind << qp->rq.wqe_shift) | 1);\r
489                 wmb();\r
490                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =\r
491                         cl_hton32(MTHCA_NEXT_DBD | size);\r
492 \r
493                 if (!size0)\r
494                         size0 = size;\r
495 \r
496                 ++ind;\r
497                 if (unlikely(ind >= qp->rq.max))\r
498                         ind -= qp->rq.max;\r
499         }\r
500 \r
501 out:\r
502         if (likely(nreq)) {\r
503                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);\r
504                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255));\r
505 \r
506                 /*\r
507                  * Make sure that descriptors are written before\r
508                  * doorbell is rung.\r
509                  */\r
510                 mb();\r
511 \r
512                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL);\r
513         }\r
514 \r
515         qp->rq.next_ind = ind;\r
516         qp->rq.head    += nreq;\r
517 \r
518 err_busy:\r
519         cl_spinlock_release(&qp->rq.lock);\r
520         UVP_EXIT(UVP_DBG_QP);\r
521         return ret;\r
522 }\r
523 \r
524 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr,\r
525                           struct _ib_send_wr **bad_wr)\r
526 {\r
527         struct mthca_qp *qp = to_mqp(ibqp);\r
528         uint32_t doorbell[2];\r
529         uint8_t *wqe;\r
530         uint8_t *prev_wqe;\r
531         int ret = 0;\r
532         int nreq;       \r
533         int i;\r
534         int size;\r
535         int size0 = 0;\r
536         uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;\r
537         int ind;\r
538         uint8_t op0 = 0;\r
539         enum ib_wr_opcode opcode;\r
540         \r
541         UVP_ENTER(UVP_DBG_QP);\r
542         \r
543         cl_spinlock_acquire(&qp->sq.lock);\r
544 \r
545         /* XXX check that state is OK to post send */\r
546 \r
547         ind = qp->sq.head & (qp->sq.max - 1);\r
548         if(ibqp->state == IBV_QPS_RESET) {\r
549                 ret = -EBUSY;\r
550                 if (bad_wr)\r
551                         *bad_wr = wr;\r
552                 goto err_busy;\r
553         }\r
554 \r
555         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
556                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {\r
557                         nreq = 0;\r
558 \r
559                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |\r
560                                             ((qp->sq.head & 0xffff) << 8) | f0 | op0);\r
561                         doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);\r
562                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;\r
563                         size0 = 0;\r
564                         f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;\r
565 \r
566                         /*\r
567                          * Make sure that descriptors are written before\r
568                          * doorbell record.\r
569                          */\r
570                         wmb();\r
571                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);\r
572 \r
573                         /*\r
574                          * Make sure doorbell record is written before we\r
575                          * write MMIO send doorbell.\r
576                          */\r
577                         wmb();\r
578                         mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);\r
579 \r
580                 }\r
581 \r
582                 if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {\r
583                         UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail,"\r
584                                         " %d max, %d nreq)\n", ibqp->qp_num,\r
585                                         qp->sq.head, qp->sq.tail,\r
586                                         qp->sq.max, nreq));                     \r
587                         ret = -ENOMEM;\r
588                         if (bad_wr)\r
589                                 *bad_wr = wr;\r
590                         goto out;\r
591                 }\r
592 \r
593                 wqe = get_send_wqe(qp, ind);\r
594                 prev_wqe = qp->sq.last;\r
595                 qp->sq.last = wqe;\r
596                 opcode = conv_ibal_wr_opcode(wr);\r
597 \r
598                 ((struct mthca_next_seg *) wqe)->flags =\r
599                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?\r
600                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |\r
601                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?\r
602                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |\r
603                         cl_hton32(1);\r
604                 if (opcode == MTHCA_OPCODE_SEND_IMM||\r
605                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)\r
606                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;\r
607 \r
608                 wqe += sizeof (struct mthca_next_seg);\r
609                 size = sizeof (struct mthca_next_seg) / 16;\r
610 \r
611                 switch (ibqp->qp_type) {\r
612                 case IB_QPT_RELIABLE_CONN:\r
613                         switch (opcode) {\r
614                         case MTHCA_OPCODE_ATOMIC_CS:\r
615                         case MTHCA_OPCODE_ATOMIC_FA:\r
616                                 ((struct mthca_raddr_seg *) wqe)->raddr =\r
617                                         cl_hton64(wr->remote_ops.vaddr);\r
618                                 ((struct mthca_raddr_seg *) wqe)->rkey =\r
619                                         wr->remote_ops.rkey;\r
620                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
621 \r
622                                 wqe += sizeof (struct mthca_raddr_seg);\r
623 \r
624                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {\r
625                                         ((struct mthca_atomic_seg *) wqe)->swap_add =\r
626                                                 (wr->remote_ops.atomic2);\r
627                                         ((struct mthca_atomic_seg *) wqe)->compare =\r
628                                                 (wr->remote_ops.atomic1);\r
629                                 } else {\r
630                                         ((struct mthca_atomic_seg *) wqe)->swap_add =\r
631                                                 (wr->remote_ops.atomic1);\r
632                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;\r
633                                 }\r
634 \r
635                                 wqe += sizeof (struct mthca_atomic_seg);\r
636                                 size += (sizeof (struct mthca_raddr_seg) +\r
637                                          sizeof (struct mthca_atomic_seg)) / 16;\r
638                                 break;\r
639 \r
640                         case MTHCA_OPCODE_RDMA_READ:\r
641                         case MTHCA_OPCODE_RDMA_WRITE:\r
642                         case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
643                                 ((struct mthca_raddr_seg *) wqe)->raddr =\r
644                                         cl_hton64(wr->remote_ops.vaddr);\r
645                                 ((struct mthca_raddr_seg *) wqe)->rkey =\r
646                                         wr->remote_ops.rkey;\r
647                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
648                                 wqe += sizeof (struct mthca_raddr_seg);\r
649                                 size += sizeof (struct mthca_raddr_seg) / 16;\r
650                                 break;\r
651 \r
652                         default:\r
653                                 /* No extra segments required for sends */\r
654                                 break;\r
655                         }\r
656 \r
657                         break;\r
658 \r
659                 case IB_QPT_UNRELIABLE_CONN:\r
660                         switch (opcode) {\r
661                         case MTHCA_OPCODE_RDMA_WRITE:\r
662                         case MTHCA_OPCODE_RDMA_WRITE_IMM:\r
663                                 ((struct mthca_raddr_seg *) wqe)->raddr =\r
664                                         cl_hton64(wr->remote_ops.vaddr);\r
665                                 ((struct mthca_raddr_seg *) wqe)->rkey =\r
666                                         wr->remote_ops.rkey;\r
667                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;\r
668                                 wqe += sizeof (struct mthca_raddr_seg);\r
669                                 size += sizeof (struct mthca_raddr_seg) / 16;\r
670                                 break;\r
671 \r
672                         default:\r
673                                 /* No extra segments required for sends */\r
674                                 break;\r
675                         }\r
676 \r
677                         break;\r
678 \r
679                 case IB_QPT_UNRELIABLE_DGRM:\r
680                         {\r
681                                 struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av);\r
682                                 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,\r
683                                        ah->av, sizeof ( struct mthca_av));\r
684                                 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;\r
685                                 ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;\r
686 \r
687 \r
688                                 wqe += sizeof (struct mthca_arbel_ud_seg);\r
689                                 size += sizeof (struct mthca_arbel_ud_seg) / 16;\r
690                                 break;\r
691                         }\r
692 \r
693                 default:\r
694                         break;\r
695                 }\r
696 \r
697                 if ((int)wr->num_ds > qp->sq.max_gs) {\r
698                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num));\r
699                         ret = -ERANGE;\r
700                         if (bad_wr)\r
701                                 *bad_wr = wr;\r
702                         goto out;\r
703                 }\r
704 \r
705                 if (wr->send_opt & IB_SEND_OPT_INLINE) {\r
706                         if (wr->num_ds) {\r
707                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;\r
708                                 uint32_t s = 0;\r
709 \r
710                                 wqe += sizeof *seg;\r
711                                 for (i = 0; i < (int)wr->num_ds; ++i) {\r
712                                         struct _ib_local_ds *sge = &wr->ds_array[i];\r
713 \r
714                                         s += sge->length;\r
715 \r
716                                         if (s > (uint32_t)qp->max_inline_data) {\r
717                                                 ret = -E2BIG;\r
718                                                 if (bad_wr)\r
719                                                         *bad_wr = wr;\r
720                                                 goto out;\r
721                                         }\r
722 \r
723                                         memcpy(wqe, (void *) (uintptr_t) sge->vaddr,\r
724                                                sge->length);\r
725                                         wqe += sge->length;\r
726                                 }\r
727 \r
728                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);\r
729                                 size += align(s + sizeof *seg, 16) / 16;\r
730                         }\r
731                 } else {\r
732 \r
733                         for (i = 0; i < (int)wr->num_ds; ++i) {\r
734                                 ((struct mthca_data_seg *) wqe)->byte_count =\r
735                                         cl_hton32(wr->ds_array[i].length);\r
736                                 ((struct mthca_data_seg *) wqe)->lkey =\r
737                                         cl_hton32(wr->ds_array[i].lkey);\r
738                                 ((struct mthca_data_seg *) wqe)->addr =\r
739                                         cl_hton64(wr->ds_array[i].vaddr);\r
740                                 wqe += sizeof (struct mthca_data_seg);\r
741                                 size += sizeof (struct mthca_data_seg) / 16;\r
742                         }\r
743 //TODO do this also in kernel\r
744 //                      size += wr->num_ds * (sizeof *seg / 16);\r
745                 }\r
746 \r
747                         qp->wrid[ind + qp->rq.max] = wr->wr_id;\r
748 \r
749                 if (opcode == MTHCA_OPCODE_INVALID) {\r
750                         UVP_PRINT(TRACE_LEVEL_ERROR  ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num));\r
751                         ret = -EINVAL;\r
752                         if (bad_wr)\r
753                                 *bad_wr = wr;\r
754                         goto out;\r
755                 }\r
756 \r
757                 ((struct mthca_next_seg *) prev_wqe)->nda_op =\r
758                         cl_hton32(((ind << qp->sq.wqe_shift) +\r
759                                qp->send_wqe_offset) |\r
760                               opcode);\r
761                 wmb();\r
762                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =\r
763                         cl_hton32(MTHCA_NEXT_DBD | size |\r
764                           ((wr->send_opt & IB_SEND_OPT_FENCE) ?\r
765                                                    MTHCA_NEXT_FENCE : 0));\r
766 \r
767                 if (!size0) {\r
768                         size0 = size;\r
769                         op0   = opcode;\r
770                 }\r
771 \r
772                 dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp, size);\r
773 \r
774                 ++ind;\r
775                 if (unlikely(ind >= qp->sq.max))\r
776                         ind -= qp->sq.max;\r
777         }\r
778 \r
779 out:\r
780         if (likely(nreq)) {\r
781                 doorbell[0] = cl_hton32((nreq << 24) |\r
782                                     ((qp->sq.head & 0xffff) << 8) | f0 | op0);\r
783                 doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0);\r
784 \r
785                 qp->sq.head += nreq;\r
786 \r
787                 /*\r
788                  * Make sure that descriptors are written before\r
789                  * doorbell record.\r
790                  */\r
791                 wmb();\r
792                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);\r
793 \r
794                 /*\r
795                  * Make sure doorbell record is written before we\r
796                  * write MMIO send doorbell.\r
797                  */\r
798                 wmb();\r
799                 mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL);\r
800         }\r
801 \r
802 err_busy:\r
803         cl_spinlock_release(&qp->sq.lock);\r
804 \r
805         UVP_EXIT(UVP_DBG_QP);\r
806         \r
807         return ret;\r
808 }\r
809 \r
810 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr,\r
811                           struct _ib_recv_wr **bad_wr)\r
812 {\r
813         struct mthca_qp *qp = to_mqp(ibqp);\r
814         int ret = 0;\r
815         int nreq;\r
816         int ind;\r
817         int i;\r
818         uint8_t *wqe;\r
819         \r
820         UVP_ENTER(UVP_DBG_QP);\r
821         \r
822         cl_spinlock_acquire(&qp->rq.lock);\r
823 \r
824         /* XXX check that state is OK to post receive */\r
825 \r
826         ind = qp->rq.head & (qp->rq.max - 1);\r
827         if(ibqp->state == IBV_QPS_RESET) {\r
828                 ret = -EBUSY;\r
829                 if (bad_wr)\r
830                         *bad_wr = wr;\r
831                 goto err_busy;\r
832         }\r
833         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
834                 if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq\r
835                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail,"\r
836                                         " %d max, %d nreq)\n", ibqp->qp_num,\r
837                                         qp->rq.head, qp->rq.tail,\r
838                                         qp->rq.max, nreq));\r
839                         ret = -ENOMEM;\r
840                         if (bad_wr)\r
841                                 *bad_wr = wr;\r
842                         goto out;\r
843                 }\r
844 \r
845                 wqe = get_recv_wqe(qp, ind);\r
846 \r
847                 ((struct mthca_next_seg *) wqe)->flags = 0;\r
848 \r
849                 wqe += sizeof (struct mthca_next_seg);\r
850 \r
851                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {\r
852                         UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num));\r
853                         ret = -ERANGE;\r
854                         if (bad_wr)\r
855                                 *bad_wr = wr;\r
856                         goto out;\r
857                 }\r
858 \r
859                 for (i = 0; i < (int)wr->num_ds; ++i) {\r
860                         ((struct mthca_data_seg *) wqe)->byte_count =\r
861                                 cl_hton32(wr->ds_array[i].length);\r
862                         ((struct mthca_data_seg *) wqe)->lkey =\r
863                                 cl_hton32(wr->ds_array[i].lkey);\r
864                         ((struct mthca_data_seg *) wqe)->addr =\r
865                                 cl_hton64(wr->ds_array[i].vaddr);\r
866                         wqe += sizeof (struct mthca_data_seg);\r
867                 }\r
868 \r
869                 if (i < qp->rq.max_gs) {\r
870                         ((struct mthca_data_seg *) wqe)->byte_count = 0;\r
871                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);\r
872                         ((struct mthca_data_seg *) wqe)->addr = 0;\r
873                 }\r
874 \r
875                         qp->wrid[ind] = wr->wr_id;\r
876 \r
877                 ++ind;\r
878                 if (unlikely(ind >= qp->rq.max))\r
879                         ind -= qp->rq.max;\r
880         }\r
881 out:\r
882         if (likely(nreq)) {\r
883                 qp->rq.head += nreq;\r
884 \r
885                 /*\r
886                  * Make sure that descriptors are written before\r
887                  * doorbell record.\r
888                  */\r
889                 mb();\r
890                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);\r
891         }\r
892 \r
893 err_busy:\r
894         cl_spinlock_release(&qp->rq.lock);\r
895         \r
896         UVP_EXIT(UVP_DBG_QP);\r
897         \r
898         return ret;\r
899 }\r
900 \r
901 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,\r
902                        ib_qp_type_t type, struct mthca_qp *qp)\r
903 {\r
904         int size;\r
905         int max_sq_sge;\r
906 \r
907         qp->rq.max_gs    = cap->max_recv_sge;\r
908         qp->sq.max_gs    = cap->max_send_sge;\r
909         max_sq_sge       = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),\r
910                                  sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);\r
911         if (max_sq_sge < (int)cap->max_send_sge)\r
912                 max_sq_sge = cap->max_send_sge;\r
913 \r
914         qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));\r
915         if (!qp->wrid)\r
916                 return -1;\r
917 \r
918         size = sizeof (struct mthca_next_seg) +\r
919                 qp->rq.max_gs * sizeof (struct mthca_data_seg);\r
920 \r
921         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;\r
922              qp->rq.wqe_shift++)\r
923                 ; /* nothing */\r
924 \r
925         size = max_sq_sge * sizeof (struct mthca_data_seg);\r
926         switch (type) {\r
927         case IB_QPT_UNRELIABLE_DGRM:\r
928                 size += mthca_is_memfree(pd->context) ?\r
929                         sizeof (struct mthca_arbel_ud_seg) :\r
930                         sizeof (struct mthca_tavor_ud_seg);\r
931                 break;\r
932 \r
933         case IB_QPT_UNRELIABLE_CONN:\r
934                 size += sizeof (struct mthca_raddr_seg);\r
935                 break;\r
936 \r
937         case IB_QPT_RELIABLE_CONN:\r
938                 size += sizeof (struct mthca_raddr_seg);\r
939                 /*\r
940                  * An atomic op will require an atomic segment, a\r
941                  * remote address segment and one scatter entry.\r
942                  */\r
943                 if (size < (sizeof (struct mthca_atomic_seg) +\r
944                             sizeof (struct mthca_raddr_seg) +\r
945                             sizeof (struct mthca_data_seg)))\r
946                         size = (sizeof (struct mthca_atomic_seg) +\r
947                                 sizeof (struct mthca_raddr_seg) +\r
948                                 sizeof (struct mthca_data_seg));\r
949                 break;\r
950 \r
951         default:\r
952                 break;\r
953         }\r
954 \r
955         /* Make sure that we have enough space for a bind request */\r
956         if (size < sizeof (struct mthca_bind_seg))\r
957                 size = sizeof (struct mthca_bind_seg);\r
958 \r
959         size += sizeof (struct mthca_next_seg);\r
960 \r
961         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;\r
962                 qp->sq.wqe_shift++)\r
963                 ; /* nothing */\r
964 \r
965                 qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,\r
966                         1 << qp->sq.wqe_shift);\r
967 \r
968                 qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);\r
969 \r
970         if (posix_memalign(&qp->buf, g_page_size,\r
971                 align(qp->buf_size, g_page_size))) {\r
972                 cl_free(qp->wrid);\r
973                 return -1;\r
974         }\r
975 \r
976         memset(qp->buf, 0, qp->buf_size);\r
977 \r
978         if (mthca_is_memfree(pd->context)) {\r
979                 struct mthca_next_seg *next;\r
980                 struct mthca_data_seg *scatter;\r
981                 int i;\r
982                 uint32_t sz;\r
983 \r
984                 sz = cl_hton32((sizeof (struct mthca_next_seg) +\r
985                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);\r
986 \r
987                 for (i = 0; i < qp->rq.max; ++i) {\r
988                         next = get_recv_wqe(qp, i);\r
989                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<\r
990                                              qp->rq.wqe_shift);\r
991                         next->ee_nds = sz;\r
992 \r
993                         for (scatter = (void *) (next + 1);\r
994                              (void *) scatter < (void *) ((char *)next + (uint32_t)(1 << qp->rq.wqe_shift));\r
995                              ++scatter)\r
996                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);\r
997                 }\r
998 \r
999                 for (i = 0; i < qp->sq.max; ++i) {\r
1000                         next = get_send_wqe(qp, i);\r
1001                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<\r
1002                                               qp->sq.wqe_shift) +\r
1003                                              qp->send_wqe_offset);\r
1004                 }\r
1005         }\r
1006 \r
1007         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);\r
1008         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);\r
1009 \r
1010         return 0;\r
1011 }\r
1012 \r
1013 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)\r
1014 {\r
1015         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;\r
1016 \r
1017         if (ctx->qp_table[tind].refcnt)\r
1018                 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];\r
1019         else\r
1020                 return NULL;\r
1021 }\r
1022 \r
1023 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)\r
1024 {\r
1025         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;\r
1026         int ret = 0;\r
1027 \r
1028         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );\r
1029 \r
1030         if (!ctx->qp_table[tind].refcnt) {\r
1031                 ctx->qp_table[tind].table = cl_malloc(\r
1032                         (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *));\r
1033                 if (!ctx->qp_table[tind].table) {\r
1034                         ret = -1;\r
1035                         goto out;\r
1036                 }\r
1037         }\r
1038         ++ctx->qp_table[tind].refcnt;\r
1039         ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;\r
1040 \r
1041 out:\r
1042         ReleaseMutex( ctx->qp_table_mutex );\r
1043         return ret;\r
1044 }\r
1045 \r
1046 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)\r
1047 {\r
1048         int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;\r
1049 \r
1050         WaitForSingleObject( ctx->qp_table_mutex, INFINITE );\r
1051 \r
1052         if (!--ctx->qp_table[tind].refcnt)\r
1053                 cl_free(ctx->qp_table[tind].table);\r
1054         else\r
1055                 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;\r
1056         \r
1057         ReleaseMutex( ctx->qp_table_mutex );\r
1058 }\r
1059 \r
1060 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,\r
1061                        int index, int *dbd, uint32_t *new_wqe)\r
1062 {\r
1063         struct mthca_next_seg *next;\r
1064 \r
1065         /*\r
1066          * For SRQs, all WQEs generate a CQE, so we're always at the\r
1067          * end of the doorbell chain.\r
1068          */\r
1069         if (qp->ibv_qp.srq) {\r
1070                 *new_wqe = 0;\r
1071                 return 0;\r
1072         }\r
1073 \r
1074         if (is_send)\r
1075                 next = get_send_wqe(qp, index);\r
1076         else\r
1077                 next = get_recv_wqe(qp, index);\r
1078 \r
1079         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));\r
1080         if (next->ee_nds & cl_hton32(0x3f))\r
1081                 *new_wqe = (next->nda_op & cl_hton32(~0x3f)) |\r
1082                         (next->ee_nds & cl_hton32(0x3f));\r
1083         else\r
1084                 *new_wqe = 0;\r
1085 \r
1086         return 0;\r
1087 }\r
1088 \r