[MTHCA]bug fixes:
[mirror/winof/.git] / hw / mthca / kernel / mthca_qp.c
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Cisco Systems. All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  *
35  * $Id$
36  */
37
38 #include <ib_verbs.h>
39 #include <ib_cache.h>
40 #include <ib_pack.h>
41
42 #include "mthca_dev.h"
43 #if defined(EVENT_TRACING)
44 #ifdef offsetof
45 #undef offsetof
46 #endif
47 #include "mthca_qp.tmh"
48 #endif
49 #include "mthca_cmd.h"
50 #include "mthca_memfree.h"
51 #include "mthca_wqe.h"
52
53
54 #ifdef ALLOC_PRAGMA
55 #pragma alloc_text (PAGE, mthca_init_qp_table)
56 #pragma alloc_text (PAGE, mthca_cleanup_qp_table)
57 #endif
58
59 enum {
60         MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
61         MTHCA_ACK_REQ_FREQ       = 10,
62         MTHCA_FLIGHT_LIMIT       = 9,
63         MTHCA_UD_HEADER_SIZE     = 72, /* largest UD header possible */
64         MTHCA_INLINE_HEADER_SIZE = 4,  /* data segment overhead for inline */
65         MTHCA_INLINE_CHUNK_SIZE  = 16  /* inline data segment chunk */
66 };
67
68 enum {
69         MTHCA_QP_STATE_RST  = 0,
70         MTHCA_QP_STATE_INIT = 1,
71         MTHCA_QP_STATE_RTR  = 2,
72         MTHCA_QP_STATE_RTS  = 3,
73         MTHCA_QP_STATE_SQE  = 4,
74         MTHCA_QP_STATE_SQD  = 5,
75         MTHCA_QP_STATE_ERR  = 6,
76         MTHCA_QP_STATE_DRAINING = 7
77 };
78
79 enum {
80         MTHCA_QP_ST_RC  = 0x0,
81         MTHCA_QP_ST_UC  = 0x1,
82         MTHCA_QP_ST_RD  = 0x2,
83         MTHCA_QP_ST_UD  = 0x3,
84         MTHCA_QP_ST_MLX = 0x7
85 };
86
87 enum {
88         MTHCA_QP_PM_MIGRATED = 0x3,
89         MTHCA_QP_PM_ARMED    = 0x0,
90         MTHCA_QP_PM_REARM    = 0x1
91 };
92
93 enum {
94         /* qp_context flags */
95         MTHCA_QP_BIT_DE  = 1 <<  8,
96         /* params1 */
97         MTHCA_QP_BIT_SRE = 1 << 15,
98         MTHCA_QP_BIT_SWE = 1 << 14,
99         MTHCA_QP_BIT_SAE = 1 << 13,
100         MTHCA_QP_BIT_SIC = 1 <<  4,
101         MTHCA_QP_BIT_SSC = 1 <<  3,
102         /* params2 */
103         MTHCA_QP_BIT_RRE = 1 << 15,
104         MTHCA_QP_BIT_RWE = 1 << 14,
105         MTHCA_QP_BIT_RAE = 1 << 13,
106         MTHCA_QP_BIT_RIC = 1 <<  4,
107         MTHCA_QP_BIT_RSC = 1 <<  3
108 };
109
110 #pragma pack(push,1)
111 struct mthca_qp_path {
112         __be32 port_pkey;
113         u8     rnr_retry;
114         u8     g_mylmc;
115         __be16 rlid;
116         u8     ackto;
117         u8     mgid_index;
118         u8     static_rate;
119         u8     hop_limit;
120         __be32 sl_tclass_flowlabel;
121         u8     rgid[16];
122 } ;
123
124 struct mthca_qp_context {
125         __be32 flags;
126         __be32 tavor_sched_queue; /* Reserved on Arbel */
127         u8     mtu_msgmax;
128         u8     rq_size_stride;  /* Reserved on Tavor */
129         u8     sq_size_stride;  /* Reserved on Tavor */
130         u8     rlkey_arbel_sched_queue; /* Reserved on Tavor */
131         __be32 usr_page;
132         __be32 local_qpn;
133         __be32 remote_qpn;
134         u32    reserved1[2];
135         struct mthca_qp_path pri_path;
136         struct mthca_qp_path alt_path;
137         __be32 rdd;
138         __be32 pd;
139         __be32 wqe_base;
140         __be32 wqe_lkey;
141         __be32 params1;
142         __be32 reserved2;
143         __be32 next_send_psn;
144         __be32 cqn_snd;
145         __be32 snd_wqe_base_l;  /* Next send WQE on Tavor */
146         __be32 snd_db_index;    /* (debugging only entries) */
147         __be32 last_acked_psn;
148         __be32 ssn;
149         __be32 params2;
150         __be32 rnr_nextrecvpsn;
151         __be32 ra_buff_indx;
152         __be32 cqn_rcv;
153         __be32 rcv_wqe_base_l;  /* Next recv WQE on Tavor */
154         __be32 rcv_db_index;    /* (debugging only entries) */
155         __be32 qkey;
156         __be32 srqn;
157         __be32 rmsn;
158         __be16 rq_wqe_counter;  /* reserved on Tavor */
159         __be16 sq_wqe_counter;  /* reserved on Tavor */
160         u32    reserved3[18];
161 } ;
162
163 struct mthca_qp_param {
164         __be32 opt_param_mask;
165         u32    reserved1;
166         struct mthca_qp_context context;
167         u32    reserved2[62];
168 } ;
169 #pragma pack(pop)
170
171 enum {
172         MTHCA_QP_OPTPAR_ALT_ADDR_PATH     = 1 << 0,
173         MTHCA_QP_OPTPAR_RRE               = 1 << 1,
174         MTHCA_QP_OPTPAR_RAE               = 1 << 2,
175         MTHCA_QP_OPTPAR_RWE               = 1 << 3,
176         MTHCA_QP_OPTPAR_PKEY_INDEX        = 1 << 4,
177         MTHCA_QP_OPTPAR_Q_KEY             = 1 << 5,
178         MTHCA_QP_OPTPAR_RNR_TIMEOUT       = 1 << 6,
179         MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
180         MTHCA_QP_OPTPAR_SRA_MAX           = 1 << 8,
181         MTHCA_QP_OPTPAR_RRA_MAX           = 1 << 9,
182         MTHCA_QP_OPTPAR_PM_STATE          = 1 << 10,
183         MTHCA_QP_OPTPAR_PORT_NUM          = 1 << 11,
184         MTHCA_QP_OPTPAR_RETRY_COUNT       = 1 << 12,
185         MTHCA_QP_OPTPAR_ALT_RNR_RETRY     = 1 << 13,
186         MTHCA_QP_OPTPAR_ACK_TIMEOUT       = 1 << 14,
187         MTHCA_QP_OPTPAR_RNR_RETRY         = 1 << 15,
188         MTHCA_QP_OPTPAR_SCHED_QUEUE       = 1 << 16
189 };
190
191 static const u8 mthca_opcode[] = {
192         MTHCA_OPCODE_RDMA_WRITE,
193         MTHCA_OPCODE_RDMA_WRITE_IMM,
194         MTHCA_OPCODE_SEND,
195         MTHCA_OPCODE_SEND_IMM,
196         MTHCA_OPCODE_RDMA_READ,
197         MTHCA_OPCODE_ATOMIC_CS,
198         MTHCA_OPCODE_ATOMIC_FA
199 };
200
201
202 enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
203
204 static struct _state_table {
205         int trans;
206         u32 req_param[NUM_TRANS];
207         u32 opt_param[NUM_TRANS];
208 } state_table[IBQPS_ERR + 1][IBQPS_ERR + 1]= {0};
209
210 static void fill_state_table()
211 {
212         struct _state_table *t;
213         RtlZeroMemory( state_table, sizeof(state_table) );
214
215         /* IBQPS_RESET */       
216         t = &state_table[IBQPS_RESET][0];
217         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
218         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
219
220         t[IBQPS_INIT].trans                                             = MTHCA_TRANS_RST2INIT;
221         t[IBQPS_INIT].req_param[UD]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_QKEY;
222         t[IBQPS_INIT].req_param[UC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
223         t[IBQPS_INIT].req_param[RC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
224         t[IBQPS_INIT].req_param[MLX]    = IB_QP_PKEY_INDEX |IB_QP_QKEY;
225         t[IBQPS_INIT].opt_param[MLX]    = IB_QP_PORT;
226
227         /* IBQPS_INIT */        
228         t = &state_table[IBQPS_INIT][0];
229         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
230         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
231
232         t[IBQPS_INIT].trans                                             = MTHCA_TRANS_INIT2INIT;
233         t[IBQPS_INIT].opt_param[UD]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_QKEY;
234         t[IBQPS_INIT].opt_param[UC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
235         t[IBQPS_INIT].opt_param[RC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
236         t[IBQPS_INIT].opt_param[MLX]    = IB_QP_PKEY_INDEX |IB_QP_QKEY;
237
238         t[IBQPS_RTR].trans                                              = MTHCA_TRANS_INIT2RTR;
239         t[IBQPS_RTR].req_param[UC]      = 
240                 IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN;
241         t[IBQPS_RTR].req_param[RC]      = 
242                 IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_MIN_RNR_TIMER;
243         t[IBQPS_RTR].opt_param[UD]      = IB_QP_PKEY_INDEX |IB_QP_QKEY;
244         t[IBQPS_RTR].opt_param[UC]      = IB_QP_PKEY_INDEX |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS;
245         t[IBQPS_RTR].opt_param[RC]      = IB_QP_PKEY_INDEX |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS;
246         t[IBQPS_RTR].opt_param[MLX]     = IB_QP_PKEY_INDEX |IB_QP_QKEY;
247
248 /* IBQPS_RTR */ 
249         t = &state_table[IBQPS_RTR][0];
250         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
251         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
252
253         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_RTR2RTS;
254         t[IBQPS_RTS].req_param[UD]      = IB_QP_SQ_PSN;
255         t[IBQPS_RTS].req_param[UC]      = IB_QP_SQ_PSN;
256         t[IBQPS_RTS].req_param[RC]      = 
257                 IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY |IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC;
258         t[IBQPS_RTS].req_param[MLX]     = IB_QP_SQ_PSN;
259         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
260         t[IBQPS_RTS].opt_param[UC]      = 
261                 IB_QP_CUR_STATE |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PATH_MIG_STATE;
262         t[IBQPS_RTS].opt_param[RC]      =       IB_QP_CUR_STATE |IB_QP_ALT_PATH |
263                 IB_QP_ACCESS_FLAGS |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE;
264         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
265
266         /* IBQPS_RTS */ 
267         t = &state_table[IBQPS_RTS][0];
268         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
269         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
270
271         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_RTS2RTS;
272         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
273         t[IBQPS_RTS].opt_param[UC]      = IB_QP_ACCESS_FLAGS |IB_QP_ALT_PATH |IB_QP_PATH_MIG_STATE;
274         t[IBQPS_RTS].opt_param[RC]      =       IB_QP_ACCESS_FLAGS |
275                 IB_QP_ALT_PATH |IB_QP_PATH_MIG_STATE |IB_QP_MIN_RNR_TIMER;
276         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
277
278         t[IBQPS_SQD].trans                                              = MTHCA_TRANS_RTS2SQD;
279         t[IBQPS_SQD].opt_param[UD]      = IB_QP_EN_SQD_ASYNC_NOTIFY;
280         t[IBQPS_SQD].opt_param[UC]      = IB_QP_EN_SQD_ASYNC_NOTIFY;
281         t[IBQPS_SQD].opt_param[RC]      =       IB_QP_EN_SQD_ASYNC_NOTIFY;
282         t[IBQPS_SQD].opt_param[MLX]     = IB_QP_EN_SQD_ASYNC_NOTIFY;
283
284         /* IBQPS_SQD */ 
285         t = &state_table[IBQPS_SQD][0];
286         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
287         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
288
289         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_SQD2RTS;
290         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
291         t[IBQPS_RTS].opt_param[UC]      = IB_QP_CUR_STATE |
292                 IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PATH_MIG_STATE;
293         t[IBQPS_RTS].opt_param[RC]      =       IB_QP_CUR_STATE |IB_QP_ALT_PATH |
294                 IB_QP_ACCESS_FLAGS |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE;
295         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
296
297         t[IBQPS_SQD].trans                                              = MTHCA_TRANS_SQD2SQD;
298         t[IBQPS_SQD].opt_param[UD]      = IB_QP_PKEY_INDEX |IB_QP_QKEY;
299         t[IBQPS_SQD].opt_param[UC]      = IB_QP_AV |    IB_QP_CUR_STATE |
300                 IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_PATH_MIG_STATE;
301         t[IBQPS_SQD].opt_param[RC]      =       IB_QP_AV |IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY |
302                 IB_QP_MAX_QP_RD_ATOMIC |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_CUR_STATE |IB_QP_ALT_PATH |
303                 IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE;
304         t[IBQPS_SQD].opt_param[MLX]     = IB_QP_PKEY_INDEX |IB_QP_QKEY;
305
306         /* IBQPS_SQE */ 
307         t = &state_table[IBQPS_SQE][0];
308         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
309         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
310
311         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_SQERR2RTS;
312         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
313         t[IBQPS_RTS].opt_param[UC]      = IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS;
314 //      t[IBQPS_RTS].opt_param[RC]      =       IB_QP_CUR_STATE |IB_QP_MIN_RNR_TIMER;
315         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
316
317         /* IBQPS_ERR */ 
318         t = &state_table[IBQPS_ERR][0];
319         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
320         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
321
322 };
323
324
325 static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
326 {
327         return qp->qpn >= (u32)dev->qp_table.sqp_start &&
328                 qp->qpn <= (u32)dev->qp_table.sqp_start + 3;
329 }
330
331 static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
332 {
333         return qp->qpn >= (u32)dev->qp_table.sqp_start &&
334                 qp->qpn <= (u32)(dev->qp_table.sqp_start + 1);
335 }
336
337
338 static void dump_wqe(u32 print_lvl, u32 *wqe_ptr , struct mthca_qp *qp_ptr)
339 {
340         __be32 *wqe = wqe_ptr;
341
342         UNUSED_PARAM_WOWPP(qp_ptr);
343         UNUSED_PARAM_WOWPP(print_lvl);
344
345         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
346         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->qpn));
347         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
348                 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
349         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
350                 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
351         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
352                 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
353         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
354                 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
355
356 }
357
358
359 static void *get_recv_wqe(struct mthca_qp *qp, int n)
360 {
361         if (qp->is_direct)
362                 return (u8*)qp->queue.direct.page + (n << qp->rq.wqe_shift);
363         else
364                 return (u8*)qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].page +
365                         ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
366 }
367
368 static void *get_send_wqe(struct mthca_qp *qp, int n)
369 {
370         if (qp->is_direct)
371                 return (u8*)qp->queue.direct.page + qp->send_wqe_offset +
372                         (n << qp->sq.wqe_shift);
373         else
374                 return (u8*)qp->queue.page_list[(qp->send_wqe_offset +
375                                             (n << qp->sq.wqe_shift)) >>
376                                            PAGE_SHIFT].page +
377                         ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
378                          (PAGE_SIZE - 1));
379 }
380
381 static void mthca_wq_init(struct mthca_wq *wq)
382 {       
383         spin_lock_init(&wq->lock);      
384         wq->next_ind  = 0;      
385         wq->last_comp = wq->max - 1;    
386         wq->head      = 0;      
387         wq->tail      = 0;      
388 }
389
390 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
391                     enum ib_event_type event_type, u8 vendor_code)
392 {
393         struct mthca_qp *qp;
394         struct ib_event event;
395         SPIN_LOCK_PREP(lh);
396
397         spin_lock(&dev->qp_table.lock, &lh);
398         qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
399         if (qp)
400                 atomic_inc(&qp->refcount);
401         spin_unlock(&lh);
402
403         if (!qp) {
404                 HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_QP,("QP %06x Async event for bogus \n", qpn));
405                 return;
406         }
407
408         event.device      = &dev->ib_dev;
409         event.event       = event_type;
410         event.element.qp  = &qp->ibqp;
411         event.vendor_specific = vendor_code;
412         HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_QP,("QP %06x Async event  event_type 0x%x vendor_code 0x%x\n",
413                 qpn,event_type,vendor_code));
414         if (qp->ibqp.event_handler)
415                 qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
416
417         if (atomic_dec_and_test(&qp->refcount))
418                 wake_up(&qp->wait);
419 }
420
421 static int to_mthca_state(enum ib_qp_state ib_state)
422 {
423         switch (ib_state) {
424         case IBQPS_RESET: return MTHCA_QP_STATE_RST;
425         case IBQPS_INIT:  return MTHCA_QP_STATE_INIT;
426         case IBQPS_RTR:   return MTHCA_QP_STATE_RTR;
427         case IBQPS_RTS:   return MTHCA_QP_STATE_RTS;
428         case IBQPS_SQD:   return MTHCA_QP_STATE_SQD;
429         case IBQPS_SQE:   return MTHCA_QP_STATE_SQE;
430         case IBQPS_ERR:   return MTHCA_QP_STATE_ERR;
431         default:                return -1;
432         }
433 }
434
435 static int to_mthca_st(int transport)
436 {
437         switch (transport) {
438         case RC:  return MTHCA_QP_ST_RC;
439         case UC:  return MTHCA_QP_ST_UC;
440         case UD:  return MTHCA_QP_ST_UD;
441         case RD:  return MTHCA_QP_ST_RD;
442         case MLX: return MTHCA_QP_ST_MLX;
443         default:  return -1;
444         }
445 }
446
447 static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
448                         int attr_mask)
449 {
450         if (attr_mask & IB_QP_PKEY_INDEX)
451                 sqp->pkey_index = attr->pkey_index;
452         if (attr_mask & IB_QP_QKEY)
453                 sqp->qkey = attr->qkey;
454         if (attr_mask & IB_QP_SQ_PSN)
455                 sqp->send_psn = attr->sq_psn;
456 }
457
458 static void init_port(struct mthca_dev *dev, int port)
459 {
460         int err;
461         u8 status;
462         struct mthca_init_ib_param param;
463
464         RtlZeroMemory(&param, sizeof param);
465
466         param.port_width    = dev->limits.port_width_cap;
467         param.vl_cap    = dev->limits.vl_cap;
468         param.mtu_cap   = dev->limits.mtu_cap;
469         param.gid_cap   = (u16)dev->limits.gid_table_len;
470         param.pkey_cap  = (u16)dev->limits.pkey_table_len;
471
472         err = mthca_INIT_IB(dev, &param, port, &status);
473         if (err)
474                 HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP  ,("INIT_IB failed, return code %d.\n", err));
475         if (status)
476                 HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP  ,("INIT_IB returned status %02x.\n", status));
477 }
478
479
480 static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
481                                   int attr_mask)
482 {
483         u8 dest_rd_atomic;
484         u32 access_flags;
485         u32 hw_access_flags = 0;
486
487         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
488                 dest_rd_atomic = attr->max_dest_rd_atomic;
489         else
490                 dest_rd_atomic = qp->resp_depth;
491
492         if (attr_mask & IB_QP_ACCESS_FLAGS)
493                 access_flags = attr->qp_access_flags;
494         else
495                 access_flags = qp->atomic_rd_en;
496
497         if (!dest_rd_atomic)
498                 access_flags &= MTHCA_ACCESS_REMOTE_WRITE;
499
500         if (access_flags & MTHCA_ACCESS_REMOTE_READ)
501                 hw_access_flags |= MTHCA_QP_BIT_RRE;
502         if (access_flags & MTHCA_ACCESS_REMOTE_ATOMIC)
503                 hw_access_flags |= MTHCA_QP_BIT_RAE;
504         if (access_flags & MTHCA_ACCESS_REMOTE_WRITE)
505                 hw_access_flags |= MTHCA_QP_BIT_RWE;
506
507         return cl_hton32(hw_access_flags);
508 }
509
510 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
511 {
512         struct mthca_dev *dev = to_mdev(ibqp->device);
513         struct mthca_qp *qp = to_mqp(ibqp);
514         enum ib_qp_state cur_state, new_state;
515         struct mthca_mailbox *mailbox;
516         struct mthca_qp_param *qp_param;
517         struct mthca_qp_context *qp_context;
518         u32 req_param, opt_param;
519         u32 sqd_event = 0;
520         u8 status;
521         int err = -EINVAL;
522         SPIN_LOCK_PREP(lhs);
523         SPIN_LOCK_PREP(lhr);
524
525         down( &qp->mutex );
526
527         if (attr_mask & IB_QP_CUR_STATE) {
528                 if (attr->cur_qp_state != IBQPS_RTR &&
529                         attr->cur_qp_state != IBQPS_RTS &&
530                         attr->cur_qp_state != IBQPS_SQD &&
531                         attr->cur_qp_state != IBQPS_SQE)
532                         goto out;
533                 else
534                         cur_state = attr->cur_qp_state;
535         } else {
536                 spin_lock_irq(&qp->sq.lock, &lhs);
537                 spin_lock(&qp->rq.lock, &lhr);
538                 cur_state = qp->state;
539                 spin_unlock(&lhr);
540                 spin_unlock_irq(&lhs);
541         }
542
543         if (attr_mask & IB_QP_STATE) {
544                 if (attr->qp_state < 0 || attr->qp_state > IBQPS_ERR)
545                         goto out;
546                 new_state = attr->qp_state;
547         } else
548                 new_state = cur_state;
549
550         if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
551                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Illegal QP transition "
552                           "%d->%d\n", cur_state, new_state));
553                 goto out;
554         }
555
556         req_param = state_table[cur_state][new_state].req_param[qp->transport];
557         opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
558
559         if ((req_param & attr_mask) != req_param) {
560                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition "
561                           "%d->%d missing req attr 0x%08x\n",
562                           cur_state, new_state,
563                           req_param & ~attr_mask));
564                 //NB: IBAL doesn't use all the fields, so we can miss some mandatory flags
565                 goto out;
566         }
567
568         if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
569                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition (transport %d) "
570                           "%d->%d has extra attr 0x%08x\n",
571                           qp->transport,
572                           cur_state, new_state,
573                           attr_mask & ~(req_param | opt_param |
574                                                  IB_QP_STATE)));
575                 //NB: The old code sometimes uses optional flags that are not so in this code
576                 goto out;
577         }
578
579         if ((attr_mask & IB_QP_PKEY_INDEX) && 
580                 attr->pkey_index >= dev->limits.pkey_table_len) {
581                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("PKey index (%u) too large. max is %d\n",
582                           attr->pkey_index,dev->limits.pkey_table_len-1)); 
583                 goto out;
584         }
585
586         if ((attr_mask & IB_QP_PORT) &&
587                 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
588                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Port number (%u) is invalid\n", attr->port_num));
589                 goto out;
590         }
591
592         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
593                 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
594                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as initiator %u too large (max is %d)\n",
595                           attr->max_rd_atomic, dev->limits.max_qp_init_rdma));
596                 goto out;
597         }
598
599         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
600             attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
601                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as responder %u too large (max %d)\n",
602                           attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift));
603                 goto out;
604         }
605
606         mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
607         if (IS_ERR(mailbox)) {
608                 err = PTR_ERR(mailbox);
609                 goto out;
610         }
611         qp_param = mailbox->buf;
612         qp_context = &qp_param->context;
613         RtlZeroMemory(qp_param, sizeof *qp_param);
614
615         qp_context->flags      = cl_hton32((to_mthca_state(new_state) << 28) |
616                                              (to_mthca_st(qp->transport) << 16));
617         qp_context->flags     |= cl_hton32(MTHCA_QP_BIT_DE);
618         if (!(attr_mask & IB_QP_PATH_MIG_STATE))
619                 qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11);
620         else {
621                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PM_STATE);
622                 switch (attr->path_mig_state) {
623                 case IB_APM_MIGRATED:
624                         qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11);
625                         break;
626                 case IB_APM_REARM:
627                         qp_context->flags |= cl_hton32(MTHCA_QP_PM_REARM << 11);
628                         break;
629                 case IB_APM_ARMED:
630                         qp_context->flags |= cl_hton32(MTHCA_QP_PM_ARMED << 11);
631                         break;
632                 }
633         }
634
635         /* leave tavor_sched_queue as 0 */
636
637         if (qp->transport == MLX || qp->transport == UD)
638                 qp_context->mtu_msgmax = (IB_MTU_LEN_2048 << 5) | 11;
639         else if (attr_mask & IB_QP_PATH_MTU) {
640                 if (attr->path_mtu < IB_MTU_LEN_256 || attr->path_mtu > IB_MTU_LEN_2048) {
641                         HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,
642                                 ("path MTU (%u) is invalid\n", attr->path_mtu));
643                         goto out_mailbox;
644                 }
645                 qp_context->mtu_msgmax = (u8)((attr->path_mtu << 5) | 31);
646         }
647
648         if (mthca_is_memfree(dev)) {
649                 if (qp->rq.max)
650                         qp_context->rq_size_stride = (u8)(long_log2(qp->rq.max) << 3);
651                 qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
652
653                 if (qp->sq.max)
654                         qp_context->sq_size_stride = (u8)(long_log2(qp->sq.max) << 3);
655                 qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
656         }
657
658         /* leave arbel_sched_queue as 0 */
659
660         if (qp->ibqp.ucontext)
661                 qp_context->usr_page =
662                         cl_hton32(to_mucontext(qp->ibqp.ucontext)->uar.index);
663         else
664                 qp_context->usr_page = cl_hton32(dev->driver_uar.index);
665         qp_context->local_qpn  = cl_hton32(qp->qpn);
666         if (attr_mask & IB_QP_DEST_QPN) {
667                 qp_context->remote_qpn = cl_hton32(attr->dest_qp_num);
668         }
669
670         if (qp->transport == MLX)
671                 qp_context->pri_path.port_pkey |=
672                         cl_hton32(to_msqp(qp)->port << 24);
673         else {
674                 if (attr_mask & IB_QP_PORT) {
675                         qp_context->pri_path.port_pkey |=
676                                 cl_hton32(attr->port_num << 24);
677                         qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PORT_NUM);
678                 }
679         }
680
681         if (attr_mask & IB_QP_PKEY_INDEX) {
682                 qp_context->pri_path.port_pkey |=
683                         cl_hton32(attr->pkey_index);
684                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PKEY_INDEX);
685         }
686
687         if (attr_mask & IB_QP_RNR_RETRY) {
688                 qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
689                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_RETRY);
690         }
691
692         if (attr_mask & IB_QP_AV) {
693                 qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
694                 qp_context->pri_path.rlid        = cl_hton16(attr->ah_attr.dlid);
695                 //TODO: work around: set always full speed  - really, it's much more complicate
696                 qp_context->pri_path.static_rate = 0;
697                 if (attr->ah_attr.ah_flags & IB_AH_GRH) {
698                         qp_context->pri_path.g_mylmc |= 1 << 7;
699                         qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
700                         qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
701                         qp_context->pri_path.sl_tclass_flowlabel =
702                                 cl_hton32((attr->ah_attr.sl << 28)                |
703                                             (attr->ah_attr.grh.traffic_class << 20) |
704                                             (attr->ah_attr.grh.flow_label));
705                         memcpy(qp_context->pri_path.rgid,
706                                attr->ah_attr.grh.dgid.raw, 16);
707                 } else {
708                         qp_context->pri_path.sl_tclass_flowlabel =
709                                 cl_hton32(attr->ah_attr.sl << 28);
710                 }
711                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
712         }
713
714         if (attr_mask & IB_QP_TIMEOUT) {
715                 qp_context->pri_path.ackto = attr->timeout << 3;
716                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
717         }
718
719         /* XXX alt_path */
720
721         /* leave rdd as 0 */
722         qp_context->pd         = cl_hton32(to_mpd(ibqp->pd)->pd_num);
723         /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
724         qp_context->wqe_lkey   = cl_hton32(qp->mr.ibmr.lkey);
725         qp_context->params1    = cl_hton32((unsigned long)(
726                 (MTHCA_ACK_REQ_FREQ << 28) |
727                 (MTHCA_FLIGHT_LIMIT << 24) |
728                 MTHCA_QP_BIT_SWE));
729         if (qp->sq_policy == IB_SIGNAL_ALL_WR)
730                 qp_context->params1 |= cl_hton32(MTHCA_QP_BIT_SSC);
731         if (attr_mask & IB_QP_RETRY_CNT) {
732                 qp_context->params1 |= cl_hton32(attr->retry_cnt << 16);
733                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RETRY_COUNT);
734         }
735
736         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
737                 if (attr->max_rd_atomic) {
738                         qp_context->params1 |=
739                                 cl_hton32(MTHCA_QP_BIT_SRE |
740                                             MTHCA_QP_BIT_SAE);
741                         qp_context->params1 |=
742                                 cl_hton32(fls(attr->max_rd_atomic - 1) << 21);
743                 }
744                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_SRA_MAX);
745         }
746
747         if (attr_mask & IB_QP_SQ_PSN)
748                 qp_context->next_send_psn = cl_hton32(attr->sq_psn);
749         qp_context->cqn_snd = cl_hton32(to_mcq(ibqp->send_cq)->cqn);
750
751         if (mthca_is_memfree(dev)) {
752                 qp_context->snd_wqe_base_l = cl_hton32(qp->send_wqe_offset);
753                 qp_context->snd_db_index   = cl_hton32(qp->sq.db_index);
754         }
755
756         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
757
758                 if (attr->max_dest_rd_atomic)
759                         qp_context->params2 |=
760                                 cl_hton32(fls(attr->max_dest_rd_atomic - 1) << 21);
761
762                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RRA_MAX);
763
764         }
765
766         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
767                 qp_context->params2      |= get_hw_access_flags(qp, attr, attr_mask);
768                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RWE |
769                                                         MTHCA_QP_OPTPAR_RRE |
770                                                         MTHCA_QP_OPTPAR_RAE);
771         }
772
773         qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RSC);
774
775         if (ibqp->srq)
776                 qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RIC);
777
778         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
779                 qp_context->rnr_nextrecvpsn |= cl_hton32(attr->min_rnr_timer << 24);
780                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
781         }
782         if (attr_mask & IB_QP_RQ_PSN)
783                 qp_context->rnr_nextrecvpsn |= cl_hton32(attr->rq_psn);
784
785         qp_context->ra_buff_indx =
786                 cl_hton32(dev->qp_table.rdb_base +
787                             ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
788                              dev->qp_table.rdb_shift));
789
790         qp_context->cqn_rcv = cl_hton32(to_mcq(ibqp->recv_cq)->cqn);
791
792         if (mthca_is_memfree(dev))
793                 qp_context->rcv_db_index   = cl_hton32(qp->rq.db_index);
794
795         if (attr_mask & IB_QP_QKEY) {
796                 qp_context->qkey = cl_hton32(attr->qkey);
797                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_Q_KEY);
798         }
799
800         if (ibqp->srq)
801                 qp_context->srqn = cl_hton32(1 << 24 |
802                                                to_msrq(ibqp->srq)->srqn);
803
804         if (cur_state == IBQPS_RTS && new_state == IBQPS_SQD    &&
805             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY               &&
806             attr->en_sqd_async_notify)
807                 sqd_event = (u32)(1 << 31);
808
809         err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
810                               qp->qpn, 0, mailbox, sqd_event, &status);
811         if (err) {
812                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("mthca_MODIFY_QP returned error (qp-num = 0x%x) returned status %02x "
813                         "cur_state  = %d  new_state = %d attr_mask = %d req_param = %d opt_param = %d\n",
814                         ibqp->qp_num, status, cur_state, new_state, 
815                         attr_mask, req_param, opt_param));        
816                 goto out_mailbox;
817         }
818         if (status) {
819                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("mthca_MODIFY_QP bad status(qp-num = 0x%x) returned status %02x "
820                         "cur_state  = %d  new_state = %d attr_mask = %d req_param = %d opt_param = %d\n",
821                         ibqp->qp_num, status, cur_state, new_state, 
822                         attr_mask, req_param, opt_param));
823                 err = -EINVAL;
824                 goto out_mailbox;
825         }
826
827         qp->state = new_state;
828         if (attr_mask & IB_QP_ACCESS_FLAGS)
829                 qp->atomic_rd_en = (u8)attr->qp_access_flags;
830         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
831                 qp->resp_depth = attr->max_dest_rd_atomic;
832
833         if (is_sqp(dev, qp))
834                 store_attrs(to_msqp(qp), attr, attr_mask);
835
836         /*
837          * If we moved QP0 to RTR, bring the IB link up; if we moved
838          * QP0 to RESET or ERROR, bring the link back down.
839          */
840         if (is_qp0(dev, qp)) {
841                 if (cur_state != IBQPS_RTR &&
842                         new_state == IBQPS_RTR)
843                         init_port(dev, to_msqp(qp)->port);
844
845                 if (cur_state != IBQPS_RESET &&
846                         cur_state != IBQPS_ERR &&
847                         (new_state == IBQPS_RESET ||
848                         new_state == IBQPS_ERR))
849                         mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
850         }
851
852         /*
853          * If we moved a kernel QP to RESET, clean up all old CQ
854          * entries and reinitialize the QP.
855          */
856         if (new_state == IBQPS_RESET && !qp->ibqp.ucontext) {
857                 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
858                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
859                 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
860                         mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn,
861                                        qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
862
863                 mthca_wq_init(&qp->sq);
864                 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
865                 mthca_wq_init(&qp->rq);
866                 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
867
868                 if (mthca_is_memfree(dev)) {
869                         *qp->sq.db = 0;
870                         *qp->rq.db = 0;
871                 }
872         }
873
874 out_mailbox:
875         mthca_free_mailbox(dev, mailbox);
876
877 out:
878         up( &qp->mutex );
879         return err;
880 }
881
882 static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
883 {
884
885         /*
886          * Calculate the maximum size of WQE s/g segments, excluding
887          * the next segment and other non-data segments.
888          */
889         int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
890
891         switch (qp->transport) {
892         case MLX:
893                 max_data_size -= 2 * sizeof (struct mthca_data_seg);
894                 break;
895
896         case UD:
897                 if (mthca_is_memfree(dev))
898                         max_data_size -= sizeof (struct mthca_arbel_ud_seg);
899                 else
900                         max_data_size -= sizeof (struct mthca_tavor_ud_seg);
901                 break;
902
903         default:
904                 max_data_size -= sizeof (struct mthca_raddr_seg);
905                 break;
906         }
907                 return max_data_size;
908 }
909
910 static inline int mthca_max_inline_data(int max_data_size)
911 {
912         return max_data_size - MTHCA_INLINE_HEADER_SIZE ;
913 }
914
915 static void mthca_adjust_qp_caps(struct mthca_dev *dev,
916                                  struct mthca_qp *qp)
917 {
918         int max_data_size = mthca_max_data_size(dev, qp,
919                 min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift));
920
921         qp->max_inline_data = mthca_max_inline_data( max_data_size);
922
923         qp->sq.max_gs = min(dev->limits.max_sg,
924                 (int)(max_data_size / sizeof (struct mthca_data_seg)));
925         qp->rq.max_gs = min(dev->limits.max_sg,
926                 (int)((min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
927                 sizeof (struct mthca_next_seg)) / sizeof (struct mthca_data_seg)));     
928 }
929
930 /*
931  * Allocate and register buffer for WQEs.  qp->rq.max, sq.max,
932  * rq.max_gs and sq.max_gs must all be assigned.
933  * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
934  * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
935  * queue)
936  */
937 static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
938                                struct mthca_pd *pd,
939                                struct mthca_qp *qp)
940 {
941         int size;
942         int err = -ENOMEM;
943         
944         HCA_ENTER(HCA_DBG_QP);
945         size = sizeof (struct mthca_next_seg) +
946                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
947
948         if (size > dev->limits.max_desc_sz)
949                 return -EINVAL;
950
951         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
952              qp->rq.wqe_shift++)
953                 ; /* nothing */
954
955         size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
956         switch (qp->transport) {
957                 case MLX:
958                         size += 2 * sizeof (struct mthca_data_seg);
959                         break;
960
961                 case UD:
962                         size += mthca_is_memfree(dev) ?
963                                 sizeof (struct mthca_arbel_ud_seg) :
964                                 sizeof (struct mthca_tavor_ud_seg);
965                         break;
966                 
967                 case UC:
968                         size += sizeof (struct mthca_raddr_seg);
969                         break;
970                 
971                 case RC:
972                         size += sizeof (struct mthca_raddr_seg);
973                         /*
974                          * An atomic op will require an atomic segment, a
975                          * remote address segment and one scatter entry.
976                          */
977                         size = max(size,
978                                  sizeof (struct mthca_atomic_seg) +
979                                  sizeof (struct mthca_raddr_seg) +
980                                  sizeof (struct mthca_data_seg));
981                         break;
982                         
983                 default:
984                         break;
985         }
986                 
987         /* Make sure that we have enough space for a bind request */
988         size = max(size, sizeof (struct mthca_bind_seg));
989         
990         size += sizeof (struct mthca_next_seg);
991         
992         if (size > dev->limits.max_desc_sz)
993                 return -EINVAL;
994
995         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
996              qp->sq.wqe_shift++)
997                 ; /* nothing */
998
999         qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
1000                                     1 << qp->sq.wqe_shift);
1001
1002         /*
1003          * If this is a userspace QP, we don't actually have to
1004          * allocate anything.  All we need is to calculate the WQE
1005          * sizes and the send_wqe_offset, so we're done now.
1006          */
1007         if (pd->ibpd.ucontext)
1008                 return 0;
1009
1010         size = (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset +
1011                           (qp->sq.max << qp->sq.wqe_shift));
1012
1013         qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
1014                            GFP_KERNEL);
1015         if (!qp->wrid)
1016                 goto err_out;
1017
1018         err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
1019                               &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
1020         if (err)
1021                 goto err_out;
1022         
1023         HCA_EXIT(HCA_DBG_QP);
1024         return 0;
1025
1026 err_out:
1027         kfree(qp->wrid);
1028         return err;
1029 }
1030
1031 static void mthca_free_wqe_buf(struct mthca_dev *dev,
1032                                struct mthca_qp *qp)
1033 {
1034         mthca_buf_free(dev, (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset +
1035                                        (qp->sq.max << qp->sq.wqe_shift)),
1036                        &qp->queue, qp->is_direct, &qp->mr);
1037         kfree(qp->wrid);
1038 }
1039
1040 static int mthca_map_memfree(struct mthca_dev *dev,
1041                              struct mthca_qp *qp)
1042 {
1043         int ret;
1044
1045         if (mthca_is_memfree(dev)) {
1046                 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
1047                 if (ret)
1048                         return ret;
1049
1050                 ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
1051                 if (ret)
1052                         goto err_qpc;
1053
1054                 ret = mthca_table_get(dev, dev->qp_table.rdb_table,
1055                                       qp->qpn << dev->qp_table.rdb_shift);
1056                 if (ret)
1057                         goto err_eqpc;
1058
1059         }
1060
1061         return 0;
1062
1063 err_eqpc:
1064         mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1065
1066 err_qpc:
1067         mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1068
1069         return ret;
1070 }
1071
1072 static void mthca_unmap_memfree(struct mthca_dev *dev,
1073                                 struct mthca_qp *qp)
1074 {
1075         mthca_table_put(dev, dev->qp_table.rdb_table,
1076                         qp->qpn << dev->qp_table.rdb_shift);
1077         mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1078         mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1079 }
1080
1081 static int mthca_alloc_memfree(struct mthca_dev *dev,
1082                                struct mthca_qp *qp)
1083 {
1084         int ret = 0;
1085
1086         if (mthca_is_memfree(dev)) {
1087                 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1088                                                  qp->qpn, &qp->rq.db);
1089                 if (qp->rq.db_index < 0)
1090                         return qp->rq.db_index;
1091
1092                 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1093                                                  qp->qpn, &qp->sq.db);
1094                 if (qp->sq.db_index < 0){
1095                         mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1096                         return qp->sq.db_index;
1097                 }
1098
1099         }
1100
1101         return ret;
1102 }
1103
1104 static void mthca_free_memfree(struct mthca_dev *dev,
1105                                struct mthca_qp *qp)
1106 {
1107         if (mthca_is_memfree(dev)) {
1108                 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
1109                 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1110         }
1111 }
1112
1113 static int mthca_alloc_qp_common(struct mthca_dev *dev,
1114                                  struct mthca_pd *pd,
1115                                  struct mthca_cq *send_cq,
1116                                  struct mthca_cq *recv_cq,
1117                                  enum ib_sig_type send_policy,
1118                                  struct mthca_qp *qp)
1119 {
1120         int ret;
1121         int i;
1122
1123         atomic_set(&qp->refcount, 1);
1124         init_waitqueue_head(&qp->wait);
1125         KeInitializeMutex(&qp->mutex, 0);
1126
1127         qp->state        = IBQPS_RESET;
1128         qp->atomic_rd_en = 0;
1129         qp->resp_depth   = 0;
1130         qp->sq_policy    = send_policy;
1131         mthca_wq_init(&qp->sq);
1132         mthca_wq_init(&qp->rq);
1133
1134         UNREFERENCED_PARAMETER(send_cq);
1135         UNREFERENCED_PARAMETER(recv_cq);
1136         
1137         ret = mthca_map_memfree(dev, qp);
1138         if (ret)
1139                 return ret;
1140
1141         ret = mthca_alloc_wqe_buf(dev, pd, qp);
1142         if (ret) {
1143                 mthca_unmap_memfree(dev, qp);
1144                 return ret;
1145         }
1146
1147         mthca_adjust_qp_caps(dev, qp);
1148
1149         /*
1150          * If this is a userspace QP, we're done now.  The doorbells
1151          * will be allocated and buffers will be initialized in
1152          * userspace.
1153          */
1154         if (pd->ibpd.ucontext)
1155                 return 0;
1156
1157         ret = mthca_alloc_memfree(dev, qp);
1158         if (ret) {
1159                 mthca_free_wqe_buf(dev, qp);
1160                 mthca_unmap_memfree(dev, qp);
1161                 return ret;
1162         }
1163
1164         if (mthca_is_memfree(dev)) {
1165                 struct mthca_next_seg *next;
1166                 struct mthca_data_seg *scatter;
1167                 int size = (sizeof (struct mthca_next_seg) +
1168                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
1169
1170                 for (i = 0; i < qp->rq.max; ++i) {
1171                         next = get_recv_wqe(qp, i);
1172                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
1173                                                    qp->rq.wqe_shift);
1174                         next->ee_nds = cl_hton32(size);
1175
1176                         for (scatter = (void *) (next + 1);
1177                              (void *) scatter < (void *) ((u8*)next + (1 << qp->rq.wqe_shift));
1178                              ++scatter)
1179                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
1180                 }
1181
1182                 for (i = 0; i < qp->sq.max; ++i) {
1183                         next = get_send_wqe(qp, i);
1184                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
1185                                                     qp->sq.wqe_shift) +
1186                                                    qp->send_wqe_offset);
1187                 }
1188         }
1189
1190         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
1191         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
1192
1193         return 0;
1194 }
1195
1196 static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1197         struct mthca_qp *qp)
1198 {
1199         int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
1200
1201         /* Sanity check QP size before proceeding */
1202         if (cap->max_send_wr     > (u32)dev->limits.max_wqes ||
1203             cap->max_recv_wr     > (u32)dev->limits.max_wqes ||
1204             cap->max_send_sge    > (u32)dev->limits.max_sg   ||
1205             cap->max_recv_sge    > (u32)dev->limits.max_sg   ||
1206             cap->max_inline_data > (u32)mthca_max_inline_data(max_data_size))
1207                 return -EINVAL;
1208
1209         /*
1210          * For MLX transport we need 2 extra S/G entries:
1211          * one for the header and one for the checksum at the end
1212          */
1213         if (qp->transport == MLX && cap->max_recv_sge + 2 > (u32)dev->limits.max_sg)
1214                 return -EINVAL;
1215
1216         if (mthca_is_memfree(dev)) {
1217                 qp->rq.max = cap->max_recv_wr ?
1218                         roundup_pow_of_two(cap->max_recv_wr) : 0;
1219                 qp->sq.max = cap->max_send_wr ?
1220                         roundup_pow_of_two(cap->max_send_wr) : 0;
1221         } else {
1222                 qp->rq.max = cap->max_recv_wr;
1223                 qp->sq.max = cap->max_send_wr;
1224         }
1225
1226         qp->rq.max_gs = cap->max_recv_sge;
1227         qp->sq.max_gs = MAX(cap->max_send_sge,
1228                               ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
1229                                     MTHCA_INLINE_CHUNK_SIZE) /
1230                               (int)sizeof (struct mthca_data_seg));
1231
1232         return 0;
1233 }
1234
1235 int mthca_alloc_qp(struct mthca_dev *dev,
1236                    struct mthca_pd *pd,
1237                    struct mthca_cq *send_cq,
1238                    struct mthca_cq *recv_cq,
1239                    enum ib_qp_type_t type,
1240                    enum ib_sig_type send_policy,
1241                    struct ib_qp_cap *cap,
1242                    struct mthca_qp *qp)
1243 {
1244         int err;
1245         SPIN_LOCK_PREP(lh);
1246
1247         switch (type) {
1248         case IB_QPT_RELIABLE_CONN: qp->transport = RC; break;
1249         case IB_QPT_UNRELIABLE_CONN: qp->transport = UC; break;
1250         case IB_QPT_UNRELIABLE_DGRM: qp->transport = UD; break;
1251         default: return -EINVAL;
1252         }
1253
1254         err = mthca_set_qp_size(dev, cap, qp);
1255         if (err)
1256                 return err;
1257
1258         qp->qpn = mthca_alloc(&dev->qp_table.alloc);
1259         if (qp->qpn == -1)
1260                 return -ENOMEM;
1261
1262         err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1263                                     send_policy, qp);
1264         if (err) {
1265                 mthca_free(&dev->qp_table.alloc, qp->qpn);
1266                 return err;
1267         }
1268
1269         spin_lock_irq(&dev->qp_table.lock, &lh);
1270         mthca_array_set(&dev->qp_table.qp,
1271                         qp->qpn & (dev->limits.num_qps - 1), qp);
1272         spin_unlock_irq(&lh);
1273
1274         return 0;
1275 }
1276
1277 int mthca_alloc_sqp(struct mthca_dev *dev,
1278                     struct mthca_pd *pd,
1279                     struct mthca_cq *send_cq,
1280                     struct mthca_cq *recv_cq,
1281                     enum ib_sig_type send_policy,
1282                     struct ib_qp_cap *cap,
1283                     int qpn,
1284                     int port,
1285                     struct mthca_sqp *sqp)
1286 {
1287         u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1288         int err;
1289         SPIN_LOCK_PREP(lhs);
1290         SPIN_LOCK_PREP(lhr);
1291         SPIN_LOCK_PREP(lht);
1292
1293         err = mthca_set_qp_size(dev, cap, &sqp->qp);
1294         if (err)
1295                 return err;
1296
1297         alloc_dma_zmem_map(dev, 
1298                 sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE, 
1299                 PCI_DMA_BIDIRECTIONAL,
1300                 &sqp->sg);
1301         if (!sqp->sg.page)
1302                 return -ENOMEM;
1303
1304         spin_lock_irq(&dev->qp_table.lock, &lht);
1305         if (mthca_array_get(&dev->qp_table.qp, mqpn))
1306                 err = -EBUSY;
1307         else
1308                 mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
1309         spin_unlock_irq(&lht);
1310
1311         if (err)
1312                 goto err_out;
1313
1314         sqp->port = port;
1315         sqp->qp.qpn       = mqpn;
1316         sqp->qp.transport = MLX;
1317
1318         err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1319                                     send_policy, &sqp->qp);
1320         if (err)
1321                 goto err_out_free;
1322
1323         atomic_inc(&pd->sqp_count);
1324
1325         return 0;
1326
1327  err_out_free:
1328         /*
1329          * Lock CQs here, so that CQ polling code can do QP lookup
1330          * without taking a lock.
1331          */
1332         spin_lock_irq(&send_cq->lock, &lhs);
1333         if (send_cq != recv_cq)
1334                 spin_lock(&recv_cq->lock, &lhr);
1335
1336         spin_lock(&dev->qp_table.lock, &lht);
1337         mthca_array_clear(&dev->qp_table.qp, mqpn);
1338         spin_unlock(&lht);
1339
1340         if (send_cq != recv_cq)
1341                 spin_unlock(&lhr);
1342         spin_unlock_irq(&lhs);
1343
1344  err_out:
1345         free_dma_mem_map(dev, &sqp->sg, PCI_DMA_BIDIRECTIONAL);
1346
1347         return err;
1348 }
1349
1350 void mthca_free_qp(struct mthca_dev *dev,
1351                    struct mthca_qp *qp)
1352 {
1353         u8 status;
1354         struct mthca_cq *send_cq;
1355         struct mthca_cq *recv_cq;
1356         SPIN_LOCK_PREP(lhs);
1357         SPIN_LOCK_PREP(lhr);
1358         SPIN_LOCK_PREP(lht);
1359
1360         send_cq = to_mcq(qp->ibqp.send_cq);
1361         recv_cq = to_mcq(qp->ibqp.recv_cq);
1362
1363         /*
1364          * Lock CQs here, so that CQ polling code can do QP lookup
1365          * without taking a lock.
1366          */
1367         spin_lock_irq(&send_cq->lock, &lhs);
1368         if (send_cq != recv_cq)
1369                 spin_lock(&recv_cq->lock, &lhr);
1370
1371         spin_lock(&dev->qp_table.lock, &lht);
1372         mthca_array_clear(&dev->qp_table.qp,
1373                           qp->qpn & (dev->limits.num_qps - 1));
1374         spin_unlock(&lht);
1375
1376         if (send_cq != recv_cq)
1377                 spin_unlock(&lhr);
1378         spin_unlock_irq(&lhs);
1379
1380         atomic_dec(&qp->refcount);
1381         wait_event(&qp->wait, !atomic_read(&qp->refcount));
1382
1383         if (qp->state != IBQPS_RESET) {
1384                 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
1385         }
1386
1387         /*
1388          * If this is a userspace QP, the buffers, MR, CQs and so on
1389          * will be cleaned up in userspace, so all we have to do is
1390          * unref the mem-free tables and free the QPN in our table.
1391          */
1392         if (!qp->ibqp.ucontext) {
1393                 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
1394                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1395                 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
1396                         mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn,
1397                                        qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1398
1399                 mthca_free_memfree(dev, qp);
1400                 mthca_free_wqe_buf(dev, qp);
1401         }
1402
1403         mthca_unmap_memfree(dev, qp);
1404
1405         if (is_sqp(dev, qp)) {
1406                 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
1407                 free_dma_mem_map(dev, &to_msqp(qp)->sg, PCI_DMA_BIDIRECTIONAL);
1408         } else
1409                 mthca_free(&dev->qp_table.alloc, qp->qpn);
1410 }
1411
1412 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
1413 {
1414
1415         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
1416
1417         switch (wr->wr_type) {
1418                 case WR_SEND: 
1419                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
1420                         break;
1421                 case WR_RDMA_WRITE:     
1422                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
1423                         break;
1424                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
1425                 case WR_COMPARE_SWAP:           opcode = MTHCA_OPCODE_ATOMIC_CS; break;
1426                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
1427                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;
1428         }
1429         return opcode;
1430 }
1431
1432 /* Create UD header for an MLX send and build a data segment for it */
1433 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1434                             int ind, struct _ib_send_wr *wr,
1435                             struct mthca_mlx_seg *mlx,
1436                             struct mthca_data_seg *data)
1437 {
1438         enum ib_wr_opcode opcode = conv_ibal_wr_opcode(wr);
1439         int header_size;
1440         int err;
1441         u16 pkey;
1442         CPU_2_BE64_PREP;
1443
1444         if (!wr->dgrm.ud.h_av) {
1445                 HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_AV, 
1446                         ("absent AV in send wr %p\n", wr));
1447                 return -EINVAL;
1448         }
1449                 
1450         ib_ud_header_init(256, /* assume a MAD */
1451                 mthca_ah_grh_present(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)),
1452                 &sqp->ud_header);
1453
1454         err = mthca_read_ah(dev, to_mah((struct ib_ah *)wr->dgrm.ud.h_av), &sqp->ud_header);
1455         if (err){
1456                 HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_AV, ("read av error%p\n",
1457                         to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av));
1458                 return err;
1459         }
1460         mlx->flags &= ~cl_hton32(MTHCA_NEXT_SOLICIT | 1);
1461         mlx->flags |= cl_hton32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
1462                                   (sqp->ud_header.lrh.destination_lid ==
1463                                    IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
1464                                   (sqp->ud_header.lrh.service_level << 8));
1465         mlx->rlid = sqp->ud_header.lrh.destination_lid;
1466         mlx->vcrc = 0;
1467
1468         switch (opcode) {
1469         case MTHCA_OPCODE_SEND:
1470                 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1471                 sqp->ud_header.immediate_present = 0;
1472                 break;
1473         case MTHCA_OPCODE_SEND_IMM:
1474                 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1475                 sqp->ud_header.immediate_present = 1;
1476                 sqp->ud_header.immediate_data = wr->immediate_data;
1477                 break;
1478         default:
1479                 return -EINVAL;
1480         }
1481
1482         sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
1483         if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1484                 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1485         sqp->ud_header.bth.solicited_event = (u8)!!(wr->send_opt & IB_SEND_OPT_SOLICITED);
1486         if (!sqp->qp.ibqp.qp_num)
1487                 ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port,
1488                                    sqp->pkey_index, &pkey);
1489         else
1490                 ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port,
1491                                    wr->dgrm.ud.pkey_index, &pkey);
1492         sqp->ud_header.bth.pkey = cl_hton16(pkey);
1493         sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;
1494         sqp->ud_header.bth.psn = cl_hton32((sqp->send_psn++) & ((1 << 24) - 1));
1495         sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?
1496                                                cl_hton32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
1497         sqp->ud_header.deth.source_qpn = cl_hton32(sqp->qp.ibqp.qp_num);
1498
1499         header_size = ib_ud_header_pack(&sqp->ud_header,
1500                                         (u8*)sqp->sg.page +
1501                                         ind * MTHCA_UD_HEADER_SIZE);
1502
1503         data->byte_count = cl_hton32(header_size);
1504         data->lkey       = cl_hton32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
1505         data->addr       = CPU_2_BE64(sqp->sg.dma_address +
1506                                        ind * MTHCA_UD_HEADER_SIZE);
1507
1508         return 0;
1509 }
1510
1511 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
1512                                     struct ib_cq *ib_cq)
1513 {
1514         unsigned cur;
1515         struct mthca_cq *cq;
1516         SPIN_LOCK_PREP(lh);
1517
1518         cur = wq->head - wq->tail;
1519         if (likely((int)cur + nreq < wq->max))
1520                 return 0;
1521
1522         cq = to_mcq(ib_cq);
1523         spin_lock_dpc(&cq->lock, &lh);
1524         cur = wq->head - wq->tail;
1525         spin_unlock_dpc(&lh);
1526
1527         return (int)cur + nreq >= wq->max;
1528 }
1529
1530 int mthca_tavor_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr,
1531                           struct _ib_send_wr **bad_wr)
1532 {
1533         struct mthca_dev *dev = to_mdev(ibqp->device);
1534         struct mthca_qp *qp = to_mqp(ibqp);
1535         u8 *wqe;
1536         u8 *prev_wqe;
1537         int err = 0;
1538         int nreq;
1539         int i;
1540         int size;
1541         int size0 = 0;
1542         u32 f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
1543         int ind;
1544         u8 op0 = 0;
1545         enum ib_wr_opcode opcode;
1546         SPIN_LOCK_PREP(lh);   
1547
1548         spin_lock_irqsave(&qp->sq.lock, &lh);
1549     
1550         /* XXX check that state is OK to post send */
1551
1552         ind = qp->sq.next_ind;
1553
1554         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
1555                 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1556                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail,"
1557                                         " %d max, %d nreq)\n", qp->qpn,
1558                                         qp->sq.head, qp->sq.tail,
1559                                         qp->sq.max, nreq));
1560                         err = -ENOMEM;
1561                         if (bad_wr)
1562                                 *bad_wr = wr;
1563                         goto out;
1564                 }
1565
1566                 wqe = get_send_wqe(qp, ind);
1567                 prev_wqe = qp->sq.last;
1568                 qp->sq.last = wqe;
1569                 opcode = conv_ibal_wr_opcode(wr);
1570
1571                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
1572                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
1573                 ((struct mthca_next_seg *) wqe)->flags =
1574                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
1575                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1576                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
1577                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
1578                         cl_hton32(1);
1579                 if (opcode == MTHCA_OPCODE_SEND_IMM||
1580                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
1581                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
1582
1583                 wqe += sizeof (struct mthca_next_seg);
1584                 size = sizeof (struct mthca_next_seg) / 16;
1585
1586                 switch (qp->transport) {
1587                 case RC:
1588                         switch (opcode) {
1589                         case MTHCA_OPCODE_ATOMIC_CS:
1590                         case MTHCA_OPCODE_ATOMIC_FA:
1591                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1592                                         cl_hton64(wr->remote_ops.vaddr);
1593                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1594                                         wr->remote_ops.rkey;
1595                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1596
1597                                 wqe += sizeof (struct mthca_raddr_seg);
1598
1599                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
1600                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
1601                                                 cl_hton64(wr->remote_ops.atomic2);
1602                                         ((struct mthca_atomic_seg *) wqe)->compare =
1603                                                 cl_hton64(wr->remote_ops.atomic1);
1604                                 } else {
1605                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
1606                                                 cl_hton64(wr->remote_ops.atomic1);
1607                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
1608                                 }
1609
1610                                 wqe += sizeof (struct mthca_atomic_seg);
1611                                 size += (sizeof (struct mthca_raddr_seg) +
1612                                         sizeof (struct mthca_atomic_seg)) / 16 ;
1613                                 break;
1614
1615                         case MTHCA_OPCODE_RDMA_READ:
1616                         case MTHCA_OPCODE_RDMA_WRITE:
1617                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
1618                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1619                                         cl_hton64(wr->remote_ops.vaddr);
1620                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1621                                         wr->remote_ops.rkey;
1622                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1623                                 wqe += sizeof (struct mthca_raddr_seg);
1624                                 size += sizeof (struct mthca_raddr_seg) / 16;
1625                                 break;
1626
1627                         default:
1628                                 /* No extra segments required for sends */
1629                                 break;
1630                         }
1631
1632                         break;
1633
1634                 case UC:
1635                         switch (opcode) {
1636                         case MTHCA_OPCODE_RDMA_WRITE:
1637                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
1638                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1639                                         cl_hton64(wr->remote_ops.vaddr);
1640                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1641                                         wr->remote_ops.rkey;
1642                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1643                                 wqe += sizeof (struct mthca_raddr_seg);
1644                                 size += sizeof (struct mthca_raddr_seg) / 16;
1645                                 break;
1646
1647                         default:
1648                                 /* No extra segments required for sends */
1649                                 break;
1650                         }
1651
1652                         break;
1653
1654                 case UD:
1655                         ((struct mthca_tavor_ud_seg *) wqe)->lkey =
1656                                 cl_hton32(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->key);
1657                         ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
1658                                 cl_hton64(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->avdma);
1659                         ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
1660                         ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
1661
1662                         wqe += sizeof (struct mthca_tavor_ud_seg);
1663                         size += sizeof (struct mthca_tavor_ud_seg) / 16;
1664                         break;
1665
1666                 case MLX:
1667                         err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1668                                                (void*)(wqe - sizeof (struct mthca_next_seg)),
1669                                                (void*)wqe);
1670                         if (err) {
1671                                 if (bad_wr)
1672                                         *bad_wr = wr;
1673                                 goto out;
1674                         }
1675                         wqe += sizeof (struct mthca_data_seg);
1676                         size += sizeof (struct mthca_data_seg) / 16;
1677                         break;
1678                 }
1679
1680                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
1681                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x too many gathers\n",qp->qpn));
1682                         err = -EINVAL;
1683                         if (bad_wr)
1684                                 *bad_wr = wr;
1685                         goto out;
1686                 }
1687                 if (wr->send_opt & IB_SEND_OPT_INLINE) {
1688                         if (wr->num_ds) {
1689                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
1690                                 uint32_t s = 0;
1691
1692                                 wqe += sizeof *seg;
1693                                 for (i = 0; i < (int)wr->num_ds; ++i) {
1694                                         struct _ib_local_ds *sge = &wr->ds_array[i];
1695
1696                                         s += sge->length;
1697
1698                                         if (s > (uint32_t)qp->max_inline_data) {
1699                                                 err = -EINVAL;
1700                                                 if (bad_wr)
1701                                                         *bad_wr = wr;
1702                                                 goto out;
1703                                         }
1704
1705                                         memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr,
1706                                                sge->length);
1707                                         wqe += sge->length;
1708                                 }
1709
1710                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
1711                                 size += align(s + sizeof *seg, 16) / 16;
1712                         }
1713                 } else {
1714                         
1715                 for (i = 0; i < (int)wr->num_ds; ++i) {
1716                         ((struct mthca_data_seg *) wqe)->byte_count =
1717                                 cl_hton32(wr->ds_array[i].length);
1718                         ((struct mthca_data_seg *) wqe)->lkey =
1719                                 cl_hton32(wr->ds_array[i].lkey);
1720                         ((struct mthca_data_seg *) wqe)->addr =
1721                                 cl_hton64(wr->ds_array[i].vaddr);
1722                         wqe += sizeof (struct mthca_data_seg);
1723                         size += sizeof (struct mthca_data_seg) / 16;
1724                         HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,("SQ %06x [%02x]  lkey 0x%08x vaddr 0x%I64x 0x%x\n",qp->qpn,i,
1725                                 (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length));
1726                 }
1727         }
1728
1729                 /* Add one more inline data segment for ICRC */
1730                 if (qp->transport == MLX) {
1731                         ((struct mthca_data_seg *) wqe)->byte_count =
1732                                 cl_hton32((unsigned long)((1 << 31) | 4));
1733                         ((u32 *) wqe)[1] = 0;
1734                         wqe += sizeof (struct mthca_data_seg);
1735                         size += sizeof (struct mthca_data_seg) / 16;
1736                 }
1737
1738                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
1739
1740                 if (opcode == MTHCA_OPCODE_INVALID) {
1741                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn));
1742                         err = -EINVAL;
1743                         if (bad_wr)
1744                                 *bad_wr = wr;
1745                         goto out;
1746                 }
1747
1748                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
1749                         cl_hton32(((ind << qp->sq.wqe_shift) +  
1750                         qp->send_wqe_offset) |opcode);
1751                 wmb();
1752                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1753                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
1754                                 ((wr->send_opt & IB_SEND_OPT_FENCE) ?
1755                                 MTHCA_NEXT_FENCE : 0));
1756
1757                 if (!size0) {
1758                         size0 = size;
1759                         op0   = opcode;
1760                 }
1761
1762                 dump_wqe( TRACE_LEVEL_VERBOSE, (u32*)qp->sq.last,qp);
1763
1764                 ++ind;
1765                 if (unlikely(ind >= qp->sq.max))
1766                         ind -= qp->sq.max;
1767         }
1768
1769 out:
1770         if (likely(nreq)) {
1771                 __be32 doorbell[2];
1772
1773                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
1774                                            qp->send_wqe_offset) | f0 | op0);
1775                 doorbell[1] = cl_hton32((qp->qpn << 8) | size0);
1776
1777                 wmb();
1778
1779                 mthca_write64(doorbell,
1780                               dev->kar + MTHCA_SEND_DOORBELL,
1781                               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1782         }
1783
1784         qp->sq.next_ind = ind;
1785         qp->sq.head    += nreq;
1786         
1787     spin_unlock_irqrestore(&lh);   
1788         return err;
1789 }
1790
1791 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr,
1792                              struct _ib_recv_wr **bad_wr)
1793 {
1794         struct mthca_dev *dev = to_mdev(ibqp->device);
1795         struct mthca_qp *qp = to_mqp(ibqp);
1796         __be32 doorbell[2];
1797         int err = 0;
1798         int nreq;
1799         int i;
1800         int size;
1801         int size0 = 0;
1802         int ind;
1803         u8 *wqe;
1804         u8 *prev_wqe;
1805         SPIN_LOCK_PREP(lh);
1806
1807         spin_lock_irqsave(&qp->rq.lock, &lh);
1808
1809         /* XXX check that state is OK to post receive */
1810
1811         ind = qp->rq.next_ind;
1812
1813         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
1814                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
1815                         nreq = 0;
1816
1817                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1818                         doorbell[1] = cl_hton32(qp->qpn << 8);
1819
1820                         wmb();
1821
1822                         mthca_write64(doorbell, dev->kar + MTHCA_RECV_DOORBELL,
1823                       MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1824
1825                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
1826                         size0 = 0;
1827                 }
1828                 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1829                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail,"
1830                                         " %d max, %d nreq)\n", qp->qpn,
1831                                         qp->rq.head, qp->rq.tail,
1832                                         qp->rq.max, nreq));
1833                         err = -ENOMEM;
1834                         if (bad_wr)
1835                                 *bad_wr = wr;
1836                         goto out;
1837                 }
1838
1839                 wqe = get_recv_wqe(qp, ind);
1840                 prev_wqe = qp->rq.last;
1841                 qp->rq.last = wqe;
1842
1843                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
1844                 ((struct mthca_next_seg *) wqe)->ee_nds =
1845                         cl_hton32(MTHCA_NEXT_DBD);
1846                 ((struct mthca_next_seg *) wqe)->flags = 0;
1847
1848                 wqe += sizeof (struct mthca_next_seg);
1849                 size = sizeof (struct mthca_next_seg) / 16;
1850
1851                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
1852                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("RQ %06x too many gathers\n",qp->qpn));
1853                         err = -EINVAL;
1854                         if (bad_wr)
1855                                 *bad_wr = wr;
1856                         goto out;
1857                 }
1858
1859                 for (i = 0; i < (int)wr->num_ds; ++i) {
1860                         ((struct mthca_data_seg *) wqe)->byte_count =
1861                                 cl_hton32(wr->ds_array[i].length);
1862                         ((struct mthca_data_seg *) wqe)->lkey =
1863                                 cl_hton32(wr->ds_array[i].lkey);
1864                         ((struct mthca_data_seg *) wqe)->addr =
1865                                 cl_hton64(wr->ds_array[i].vaddr);
1866                         wqe += sizeof (struct mthca_data_seg);
1867                         size += sizeof (struct mthca_data_seg) / 16;
1868 //                      HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("RQ %06x [%02x]  lkey 0x%08x vaddr 0x%I64x 0x %x 0x%08x\n",i,qp->qpn,
1869 //                              (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length, wr->wr_id));
1870                 }
1871
1872                 qp->wrid[ind] = wr->wr_id;
1873
1874                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
1875                         cl_hton32((ind << qp->rq.wqe_shift) | 1);
1876                 wmb();
1877                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1878                         cl_hton32(MTHCA_NEXT_DBD | size);
1879
1880                 if (!size0)
1881                         size0 = size;
1882
1883                 dump_wqe(TRACE_LEVEL_VERBOSE,  (u32*)wqe ,qp);
1884                 
1885                 ++ind;
1886                 if (unlikely(ind >= qp->rq.max))
1887                         ind -= qp->rq.max;
1888         }
1889
1890 out:
1891         if (likely(nreq)) {
1892                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1893                 doorbell[1] = cl_hton32((qp->qpn << 8) | (nreq & 255));
1894
1895                 wmb();
1896
1897                 mthca_write64(doorbell, dev->kar + MTHCA_RECV_DOORBELL,
1898               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1899         }
1900
1901         qp->rq.next_ind = ind;
1902         qp->rq.head    += nreq;
1903
1904         spin_unlock_irqrestore(&lh);
1905         return err;
1906 }
1907
1908 int mthca_arbel_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr,
1909                           struct _ib_send_wr **bad_wr)
1910 {
1911         struct mthca_dev *dev = to_mdev(ibqp->device);
1912         struct mthca_qp *qp = to_mqp(ibqp);
1913         __be32 doorbell[2];
1914         u8 *wqe;
1915         u8 *prev_wqe;
1916         int err = 0;
1917         int nreq;
1918         int i;
1919         int size;
1920         int size0 = 0;
1921         u32 f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
1922         int ind;
1923         u8 op0 = 0;
1924         enum ib_wr_opcode opcode;
1925         SPIN_LOCK_PREP(lh);
1926
1927         spin_lock_irqsave(&qp->sq.lock, &lh);
1928
1929         /* XXX check that state is OK to post send */
1930
1931         ind = qp->sq.head & (qp->sq.max - 1);
1932
1933         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
1934                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
1935                         nreq = 0;
1936                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
1937                                 ((qp->sq.head & 0xffff) << 8) |f0 | op0);
1938                         doorbell[1] = cl_hton32((qp->qpn << 8) | size0);
1939                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
1940                         size0 = 0;
1941                         f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0;
1942
1943                         /*
1944                          * Make sure that descriptors are written before
1945                          * doorbell record.
1946                          */
1947                         wmb();
1948                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
1949
1950                         /*
1951                          * Make sure doorbell record is written before we
1952                          * write MMIO send doorbell.
1953                          */
1954                         wmb();
1955                         mthca_write64(doorbell, dev->kar + MTHCA_SEND_DOORBELL,
1956                                 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1957                 }
1958
1959                 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1960                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail,"
1961                                         " %d max, %d nreq)\n", qp->qpn,
1962                                         qp->sq.head, qp->sq.tail,
1963                                         qp->sq.max, nreq));
1964                         err = -ENOMEM;
1965                         if (bad_wr)
1966                                 *bad_wr = wr;
1967                         goto out;
1968                 }
1969
1970                 wqe = get_send_wqe(qp, ind);
1971                 prev_wqe = qp->sq.last;
1972                 qp->sq.last = wqe;
1973                 opcode = conv_ibal_wr_opcode(wr);
1974
1975                 ((struct mthca_next_seg *) wqe)->flags =
1976                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
1977                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1978                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
1979                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
1980                         cl_hton32(1);
1981                 if (opcode == MTHCA_OPCODE_SEND_IMM||
1982                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
1983                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
1984
1985                 wqe += sizeof (struct mthca_next_seg);
1986                 size = sizeof (struct mthca_next_seg) / 16;
1987
1988                 switch (qp->transport) {
1989                 case RC:
1990                         switch (opcode) {
1991                         case MTHCA_OPCODE_ATOMIC_CS:
1992                         case MTHCA_OPCODE_ATOMIC_FA:
1993                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1994                                         cl_hton64(wr->remote_ops.vaddr);
1995                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1996                                         wr->remote_ops.rkey;
1997                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1998
1999                                 wqe += sizeof (struct mthca_raddr_seg);
2000
2001                                 if (opcode == MTHCA_OPCODE_ATOMIC_FA) {
2002                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
2003                                                 cl_hton64(wr->remote_ops.atomic2);
2004                                         ((struct mthca_atomic_seg *) wqe)->compare =
2005                                                 cl_hton64(wr->remote_ops.atomic1);
2006                                 } else {
2007                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
2008                                                 cl_hton64(wr->remote_ops.atomic1);
2009                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
2010                                 }
2011
2012                                 wqe += sizeof (struct mthca_atomic_seg);
2013                                 size += (sizeof (struct mthca_raddr_seg) +
2014                                         sizeof (struct mthca_atomic_seg)) / 16 ;
2015                                 break;
2016
2017                         case MTHCA_OPCODE_RDMA_READ:
2018                         case MTHCA_OPCODE_RDMA_WRITE:
2019                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
2020                                 ((struct mthca_raddr_seg *) wqe)->raddr =
2021                                         cl_hton64(wr->remote_ops.vaddr);
2022                                 ((struct mthca_raddr_seg *) wqe)->rkey =
2023                                         wr->remote_ops.rkey;
2024                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
2025                                 wqe += sizeof (struct mthca_raddr_seg);
2026                                 size += sizeof (struct mthca_raddr_seg) / 16;
2027                                 break;
2028
2029                         default:
2030                                 /* No extra segments required for sends */
2031                                 break;
2032                         }
2033
2034                         break;
2035
2036                 case UC:
2037                         switch (opcode) {
2038                         case MTHCA_OPCODE_RDMA_WRITE:
2039                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
2040                                 ((struct mthca_raddr_seg *) wqe)->raddr =
2041                                         cl_hton64(wr->remote_ops.vaddr);
2042                                 ((struct mthca_raddr_seg *) wqe)->rkey =
2043                                         wr->remote_ops.rkey;
2044                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
2045                                 wqe += sizeof (struct mthca_raddr_seg);
2046                                 size += sizeof (struct mthca_raddr_seg) / 16;
2047                                 break;
2048
2049                         default:
2050                                 /* No extra segments required for sends */
2051                                 break;
2052                         }
2053
2054                         break;
2055
2056                 case UD:
2057                         memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
2058                                to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, MTHCA_AV_SIZE);
2059                         ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
2060                         ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
2061
2062                         wqe += sizeof (struct mthca_arbel_ud_seg);
2063                         size += sizeof (struct mthca_arbel_ud_seg) / 16;
2064                         break;
2065
2066                 case MLX:
2067                         err = build_mlx_header(dev, to_msqp(qp), ind, wr,
2068                                                (void*)(wqe - sizeof (struct mthca_next_seg)),
2069                                                (void*)wqe);
2070                         if (err) {
2071                                 if (bad_wr)
2072                                         *bad_wr = wr;
2073                                 goto out;
2074                         }
2075                         wqe += sizeof (struct mthca_data_seg);
2076                         size += sizeof (struct mthca_data_seg) / 16;
2077                         break;
2078                 }
2079
2080                 if ((int)wr->num_ds > qp->sq.max_gs) {
2081                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x full too many gathers\n",qp->qpn));
2082                         err = -EINVAL;
2083                         if (bad_wr)
2084                                 *bad_wr = wr;
2085                         goto out;
2086                 }
2087         if (wr->send_opt & IB_SEND_OPT_INLINE) {
2088                         if (wr->num_ds) {
2089                                 struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe;
2090                                 uint32_t s = 0;
2091
2092                                 wqe += sizeof *seg;
2093                                 for (i = 0; i < (int)wr->num_ds; ++i) {
2094                                         struct _ib_local_ds *sge = &wr->ds_array[i];
2095
2096                                         s += sge->length;
2097
2098                                         if (s > (uint32_t)qp->max_inline_data) {
2099                                                 err = -EINVAL;
2100                                                 if (bad_wr)
2101                                                         *bad_wr = wr;
2102                                                 goto out;
2103                                         }
2104
2105                                         memcpy(wqe, (void *) (uintptr_t) sge->vaddr,
2106                                                sge->length);
2107                                         wqe += sge->length;
2108                                 }
2109
2110                                 seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s);
2111                                 size += align(s + sizeof *seg, 16) / 16;
2112                         }
2113                 } else {
2114                 for (i = 0; i < (int)wr->num_ds; ++i) {
2115                         ((struct mthca_data_seg *) wqe)->byte_count =
2116                                 cl_hton32(wr->ds_array[i].length);
2117                         ((struct mthca_data_seg *) wqe)->lkey =
2118                                 cl_hton32(wr->ds_array[i].lkey);
2119                         ((struct mthca_data_seg *) wqe)->addr =
2120                                 cl_hton64(wr->ds_array[i].vaddr);
2121                         wqe += sizeof (struct mthca_data_seg);
2122                         size += sizeof (struct mthca_data_seg) / 16;
2123                 }
2124         }
2125
2126                 /* Add one more inline data segment for ICRC */
2127                 if (qp->transport == MLX) {
2128                         ((struct mthca_data_seg *) wqe)->byte_count =
2129                                 cl_hton32((unsigned long)((1 << 31) | 4));
2130                         ((u32 *) wqe)[1] = 0;
2131                         wqe += sizeof (struct mthca_data_seg);
2132                         size += sizeof (struct mthca_data_seg) / 16;
2133                 }
2134
2135                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
2136
2137                 if (opcode == MTHCA_OPCODE_INVALID) {
2138                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn));
2139                         err = -EINVAL;
2140                         if (bad_wr)
2141                                 *bad_wr = wr;
2142                         goto out;
2143                 }
2144
2145                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
2146                         cl_hton32(((ind << qp->sq.wqe_shift) +
2147                         qp->send_wqe_offset) |opcode);
2148                 wmb();
2149                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
2150                         cl_hton32(MTHCA_NEXT_DBD | size |
2151                         ((wr->send_opt & IB_SEND_OPT_FENCE) ?
2152                         MTHCA_NEXT_FENCE : 0));
2153                 
2154                 if (!size0) {
2155                         size0 = size;
2156                         op0   = opcode;
2157                 }
2158
2159                 ++ind;
2160                 if (unlikely(ind >= qp->sq.max))
2161                         ind -= qp->sq.max;
2162         }
2163
2164 out:
2165         if (likely(nreq)) {
2166                 doorbell[0] = cl_hton32((nreq << 24) |
2167                         ((qp->sq.head & 0xffff) << 8) |f0 | op0);
2168                 doorbell[1] = cl_hton32((qp->qpn << 8) | size0);
2169                 qp->sq.head += nreq;
2170
2171                 /*
2172                  * Make sure that descriptors are written before
2173                  * doorbell record.
2174                  */
2175                 wmb();
2176                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
2177
2178                 /*
2179                  * Make sure doorbell record is written before we
2180                  * write MMIO send doorbell.
2181                  */
2182                 wmb();
2183                 mthca_write64(doorbell,
2184                               dev->kar + MTHCA_SEND_DOORBELL,
2185                               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
2186         }
2187
2188         spin_unlock_irqrestore(&lh);
2189         return err;
2190 }
2191
2192 int mthca_arbel_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr,
2193                              struct _ib_recv_wr **bad_wr)
2194 {
2195         struct mthca_qp *qp = to_mqp(ibqp);
2196         int err = 0;
2197         int nreq;
2198         int ind;
2199         int i;
2200         u8 *wqe;
2201         SPIN_LOCK_PREP(lh);
2202
2203         spin_lock_irqsave(&qp->rq.lock, &lh);
2204
2205         /* XXX check that state is OK to post receive */
2206
2207         ind = qp->rq.head & (qp->rq.max - 1);
2208
2209         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
2210                 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2211                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail,"
2212                                         " %d max, %d nreq)\n", qp->qpn,
2213                                         qp->rq.head, qp->rq.tail,
2214                                         qp->rq.max, nreq));
2215                         err = -ENOMEM;
2216                         if (bad_wr)
2217                                 *bad_wr = wr;
2218                         goto out;
2219                 }
2220
2221                 wqe = get_recv_wqe(qp, ind);
2222
2223                 ((struct mthca_next_seg *) wqe)->flags = 0;
2224
2225                 wqe += sizeof (struct mthca_next_seg);
2226
2227                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
2228                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("RQ %06x full too many scatter\n",qp->qpn));
2229                         err = -EINVAL;
2230                         if (bad_wr)
2231                                 *bad_wr = wr;
2232                         goto out;
2233                 }
2234
2235                 for (i = 0; i < (int)wr->num_ds; ++i) {
2236                         ((struct mthca_data_seg *) wqe)->byte_count =
2237                                 cl_hton32(wr->ds_array[i].length);
2238                         ((struct mthca_data_seg *) wqe)->lkey =
2239                                 cl_hton32(wr->ds_array[i].lkey);
2240                         ((struct mthca_data_seg *) wqe)->addr =
2241                                 cl_hton64(wr->ds_array[i].vaddr);
2242                         wqe += sizeof (struct mthca_data_seg);
2243                 }
2244
2245                 if (i < qp->rq.max_gs) {
2246                         ((struct mthca_data_seg *) wqe)->byte_count = 0;
2247                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
2248                         ((struct mthca_data_seg *) wqe)->addr = 0;
2249                 }
2250
2251                 qp->wrid[ind] = wr->wr_id;
2252
2253                 ++ind;
2254                 if (unlikely(ind >= qp->rq.max))
2255                         ind -= qp->rq.max;
2256         }
2257 out:
2258         if (likely(nreq)) {
2259                 qp->rq.head += nreq;
2260
2261                 /*
2262                  * Make sure that descriptors are written before
2263                  * doorbell record.
2264                  */
2265                 wmb();
2266                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
2267         }
2268
2269         spin_unlock_irqrestore(&lh);
2270         return err;
2271 }
2272
2273 void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
2274                        int index, int *dbd, __be32 *new_wqe)
2275 {
2276         struct mthca_next_seg *next;
2277
2278         UNREFERENCED_PARAMETER(dev);
2279         
2280         /*
2281          * For SRQs, all WQEs generate a CQE, so we're always at the
2282          * end of the doorbell chain.
2283          */
2284         if (qp->ibqp.srq) {
2285                 *new_wqe = 0;
2286                 return;
2287         }
2288
2289         if (is_send)
2290                 next = get_send_wqe(qp, index);
2291         else
2292                 next = get_recv_wqe(qp, index);
2293
2294         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
2295         if (next->ee_nds & cl_hton32(0x3f))
2296                 *new_wqe = (next->nda_op & cl_hton32((unsigned long)~0x3f)) |
2297                         (next->ee_nds & cl_hton32(0x3f));
2298         else
2299                 *new_wqe = 0;
2300 }
2301
2302 int mthca_init_qp_table(struct mthca_dev *dev)
2303 {
2304         int err;
2305         u8 status;
2306         int i;
2307
2308         spin_lock_init(&dev->qp_table.lock);
2309         fill_state_table();
2310
2311         /*
2312          * We reserve 2 extra QPs per port for the special QPs.  The
2313          * special QP for port 1 has to be even, so round up.
2314          */
2315         dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
2316         err = mthca_alloc_init(&dev->qp_table.alloc,
2317                                dev->limits.num_qps,
2318                                (1 << 24) - 1,
2319                                dev->qp_table.sqp_start +
2320                                MTHCA_MAX_PORTS * 2);
2321         if (err)
2322                 return err;
2323
2324         err = mthca_array_init(&dev->qp_table.qp,
2325                                dev->limits.num_qps);
2326         if (err) {
2327                 mthca_alloc_cleanup(&dev->qp_table.alloc);
2328                 return err;
2329         }
2330
2331         for (i = 0; i < 2; ++i) {
2332                 err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_QP1 : IB_QPT_QP0,
2333                                             dev->qp_table.sqp_start + i * 2,
2334                                             &status);
2335                 if (err)
2336                         goto err_out;
2337                 if (status) {
2338                         HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("CONF_SPECIAL_QP returned "
2339                                    "status %02x, aborting.\n",
2340                                    status));
2341                         err = -EINVAL;
2342                         goto err_out;
2343                 }
2344         }
2345         return 0;
2346
2347  err_out:
2348         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP1, 0, &status);
2349         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP0, 0, &status);
2350
2351         mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2352         mthca_alloc_cleanup(&dev->qp_table.alloc);
2353
2354         return err;
2355 }
2356
2357 void mthca_cleanup_qp_table(struct mthca_dev *dev)
2358 {
2359         u8 status;
2360
2361         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP1, 0, &status);
2362         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP0, 0, &status);
2363
2364         mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2365         mthca_alloc_cleanup(&dev->qp_table.alloc);
2366 }
2367
2368
2369