6a4f22149855e071677d785274b4643812cafa7a
[mirror/winof/.git] / hw / mthca / kernel / mthca_qp.c
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Cisco Systems. All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  *
35  * $Id: mthca_qp.c 3047 2005-08-10 03:59:35Z roland $
36  */
37
38 #include <ib_verbs.h>
39 #include <ib_cache.h>
40 #include <ib_pack.h>
41
42 #include "mthca_dev.h"
43 #if defined(EVENT_TRACING)
44 #ifdef offsetof
45 #undef offsetof
46 #endif
47 #include "mthca_qp.tmh"
48 #endif
49 #include "mthca_cmd.h"
50 #include "mthca_memfree.h"
51 #include "mthca_wqe.h"
52
53
54 #ifdef ALLOC_PRAGMA
55 #pragma alloc_text (PAGE, mthca_init_qp_table)
56 #pragma alloc_text (PAGE, mthca_cleanup_qp_table)
57 #endif
58
59 enum {
60         MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
61         MTHCA_ACK_REQ_FREQ       = 10,
62         MTHCA_FLIGHT_LIMIT       = 9,
63         MTHCA_UD_HEADER_SIZE     = 72, /* largest UD header possible */
64         MTHCA_INLINE_HEADER_SIZE = 4,  /* data segment overhead for inline */
65         MTHCA_INLINE_CHUNK_SIZE  = 16  /* inline data segment chunk */
66 };
67
68 enum {
69         MTHCA_QP_STATE_RST  = 0,
70         MTHCA_QP_STATE_INIT = 1,
71         MTHCA_QP_STATE_RTR  = 2,
72         MTHCA_QP_STATE_RTS  = 3,
73         MTHCA_QP_STATE_SQE  = 4,
74         MTHCA_QP_STATE_SQD  = 5,
75         MTHCA_QP_STATE_ERR  = 6,
76         MTHCA_QP_STATE_DRAINING = 7
77 };
78
79 enum {
80         MTHCA_QP_ST_RC  = 0x0,
81         MTHCA_QP_ST_UC  = 0x1,
82         MTHCA_QP_ST_RD  = 0x2,
83         MTHCA_QP_ST_UD  = 0x3,
84         MTHCA_QP_ST_MLX = 0x7
85 };
86
87 enum {
88         MTHCA_QP_PM_MIGRATED = 0x3,
89         MTHCA_QP_PM_ARMED    = 0x0,
90         MTHCA_QP_PM_REARM    = 0x1
91 };
92
93 enum {
94         /* qp_context flags */
95         MTHCA_QP_BIT_DE  = 1 <<  8,
96         /* params1 */
97         MTHCA_QP_BIT_SRE = 1 << 15,
98         MTHCA_QP_BIT_SWE = 1 << 14,
99         MTHCA_QP_BIT_SAE = 1 << 13,
100         MTHCA_QP_BIT_SIC = 1 <<  4,
101         MTHCA_QP_BIT_SSC = 1 <<  3,
102         /* params2 */
103         MTHCA_QP_BIT_RRE = 1 << 15,
104         MTHCA_QP_BIT_RWE = 1 << 14,
105         MTHCA_QP_BIT_RAE = 1 << 13,
106         MTHCA_QP_BIT_RIC = 1 <<  4,
107         MTHCA_QP_BIT_RSC = 1 <<  3
108 };
109
110 #pragma pack(push,1)
111 struct mthca_qp_path {
112         __be32 port_pkey;
113         u8     rnr_retry;
114         u8     g_mylmc;
115         __be16 rlid;
116         u8     ackto;
117         u8     mgid_index;
118         u8     static_rate;
119         u8     hop_limit;
120         __be32 sl_tclass_flowlabel;
121         u8     rgid[16];
122 } ;
123
124 struct mthca_qp_context {
125         __be32 flags;
126         __be32 tavor_sched_queue; /* Reserved on Arbel */
127         u8     mtu_msgmax;
128         u8     rq_size_stride;  /* Reserved on Tavor */
129         u8     sq_size_stride;  /* Reserved on Tavor */
130         u8     rlkey_arbel_sched_queue; /* Reserved on Tavor */
131         __be32 usr_page;
132         __be32 local_qpn;
133         __be32 remote_qpn;
134         u32    reserved1[2];
135         struct mthca_qp_path pri_path;
136         struct mthca_qp_path alt_path;
137         __be32 rdd;
138         __be32 pd;
139         __be32 wqe_base;
140         __be32 wqe_lkey;
141         __be32 params1;
142         __be32 reserved2;
143         __be32 next_send_psn;
144         __be32 cqn_snd;
145         __be32 snd_wqe_base_l;  /* Next send WQE on Tavor */
146         __be32 snd_db_index;    /* (debugging only entries) */
147         __be32 last_acked_psn;
148         __be32 ssn;
149         __be32 params2;
150         __be32 rnr_nextrecvpsn;
151         __be32 ra_buff_indx;
152         __be32 cqn_rcv;
153         __be32 rcv_wqe_base_l;  /* Next recv WQE on Tavor */
154         __be32 rcv_db_index;    /* (debugging only entries) */
155         __be32 qkey;
156         __be32 srqn;
157         __be32 rmsn;
158         __be16 rq_wqe_counter;  /* reserved on Tavor */
159         __be16 sq_wqe_counter;  /* reserved on Tavor */
160         u32    reserved3[18];
161 } ;
162
163 struct mthca_qp_param {
164         __be32 opt_param_mask;
165         u32    reserved1;
166         struct mthca_qp_context context;
167         u32    reserved2[62];
168 } ;
169 #pragma pack(pop)
170
171 enum {
172         MTHCA_QP_OPTPAR_ALT_ADDR_PATH     = 1 << 0,
173         MTHCA_QP_OPTPAR_RRE               = 1 << 1,
174         MTHCA_QP_OPTPAR_RAE               = 1 << 2,
175         MTHCA_QP_OPTPAR_RWE               = 1 << 3,
176         MTHCA_QP_OPTPAR_PKEY_INDEX        = 1 << 4,
177         MTHCA_QP_OPTPAR_Q_KEY             = 1 << 5,
178         MTHCA_QP_OPTPAR_RNR_TIMEOUT       = 1 << 6,
179         MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
180         MTHCA_QP_OPTPAR_SRA_MAX           = 1 << 8,
181         MTHCA_QP_OPTPAR_RRA_MAX           = 1 << 9,
182         MTHCA_QP_OPTPAR_PM_STATE          = 1 << 10,
183         MTHCA_QP_OPTPAR_PORT_NUM          = 1 << 11,
184         MTHCA_QP_OPTPAR_RETRY_COUNT       = 1 << 12,
185         MTHCA_QP_OPTPAR_ALT_RNR_RETRY     = 1 << 13,
186         MTHCA_QP_OPTPAR_ACK_TIMEOUT       = 1 << 14,
187         MTHCA_QP_OPTPAR_RNR_RETRY         = 1 << 15,
188         MTHCA_QP_OPTPAR_SCHED_QUEUE       = 1 << 16
189 };
190
191 static const u8 mthca_opcode[] = {
192         MTHCA_OPCODE_RDMA_WRITE,
193         MTHCA_OPCODE_RDMA_WRITE_IMM,
194         MTHCA_OPCODE_SEND,
195         MTHCA_OPCODE_SEND_IMM,
196         MTHCA_OPCODE_RDMA_READ,
197         MTHCA_OPCODE_ATOMIC_CS,
198         MTHCA_OPCODE_ATOMIC_FA
199 };
200
201
202 enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
203
204 static struct _state_table {
205         int trans;
206         u32 req_param[NUM_TRANS];
207         u32 opt_param[NUM_TRANS];
208 } state_table[IBQPS_ERR + 1][IBQPS_ERR + 1]= {0};
209
210 static void fill_state_table()
211 {
212         struct _state_table *t;
213         RtlZeroMemory( state_table, sizeof(state_table) );
214
215         /* IBQPS_RESET */       
216         t = &state_table[IBQPS_RESET][0];
217         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
218         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
219
220         t[IBQPS_INIT].trans                                             = MTHCA_TRANS_RST2INIT;
221         t[IBQPS_INIT].req_param[UD]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_QKEY;
222         t[IBQPS_INIT].req_param[UC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
223         t[IBQPS_INIT].req_param[RC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
224         t[IBQPS_INIT].req_param[MLX]    = IB_QP_PKEY_INDEX |IB_QP_QKEY;
225         t[IBQPS_INIT].opt_param[MLX]    = IB_QP_PORT;
226
227         /* IBQPS_INIT */        
228         t = &state_table[IBQPS_INIT][0];
229         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
230         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
231
232         t[IBQPS_INIT].trans                                             = MTHCA_TRANS_INIT2INIT;
233         t[IBQPS_INIT].opt_param[UD]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_QKEY;
234         t[IBQPS_INIT].opt_param[UC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
235         t[IBQPS_INIT].opt_param[RC]     = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS;
236         t[IBQPS_INIT].opt_param[MLX]    = IB_QP_PKEY_INDEX |IB_QP_QKEY;
237
238         t[IBQPS_RTR].trans                                              = MTHCA_TRANS_INIT2RTR;
239         t[IBQPS_RTR].req_param[UC]      = 
240                 IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN;
241         t[IBQPS_RTR].req_param[RC]      = 
242                 IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_MIN_RNR_TIMER;
243         t[IBQPS_RTR].opt_param[UD]      = IB_QP_PKEY_INDEX |IB_QP_QKEY;
244         t[IBQPS_RTR].opt_param[UC]      = IB_QP_PKEY_INDEX |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS;
245         t[IBQPS_RTR].opt_param[RC]      = IB_QP_PKEY_INDEX |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS;
246         t[IBQPS_RTR].opt_param[MLX]     = IB_QP_PKEY_INDEX |IB_QP_QKEY;
247
248 /* IBQPS_RTR */ 
249         t = &state_table[IBQPS_RTR][0];
250         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
251         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
252
253         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_RTR2RTS;
254         t[IBQPS_RTS].req_param[UD]      = IB_QP_SQ_PSN;
255         t[IBQPS_RTS].req_param[UC]      = IB_QP_SQ_PSN;
256         t[IBQPS_RTS].req_param[RC]      = 
257                 IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY |IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC;
258         t[IBQPS_RTS].req_param[MLX]     = IB_QP_SQ_PSN;
259         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
260         t[IBQPS_RTS].opt_param[UC]      = 
261                 IB_QP_CUR_STATE |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PATH_MIG_STATE;
262         t[IBQPS_RTS].opt_param[RC]      =       IB_QP_CUR_STATE |IB_QP_ALT_PATH |
263                 IB_QP_ACCESS_FLAGS |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE;
264         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
265
266         /* IBQPS_RTS */ 
267         t = &state_table[IBQPS_RTS][0];
268         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
269         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
270
271         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_RTS2RTS;
272         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
273         t[IBQPS_RTS].opt_param[UC]      = IB_QP_ACCESS_FLAGS |IB_QP_ALT_PATH |IB_QP_PATH_MIG_STATE;
274         t[IBQPS_RTS].opt_param[RC]      =       IB_QP_ACCESS_FLAGS |
275                 IB_QP_ALT_PATH |IB_QP_PATH_MIG_STATE |IB_QP_MIN_RNR_TIMER;
276         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
277
278         t[IBQPS_SQD].trans                                              = MTHCA_TRANS_RTS2SQD;
279         t[IBQPS_SQD].opt_param[UD]      = IB_QP_EN_SQD_ASYNC_NOTIFY;
280         t[IBQPS_SQD].opt_param[UC]      = IB_QP_EN_SQD_ASYNC_NOTIFY;
281         t[IBQPS_SQD].opt_param[RC]      =       IB_QP_EN_SQD_ASYNC_NOTIFY;
282         t[IBQPS_SQD].opt_param[MLX]     = IB_QP_EN_SQD_ASYNC_NOTIFY;
283
284         /* IBQPS_SQD */ 
285         t = &state_table[IBQPS_SQD][0];
286         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
287         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
288
289         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_SQD2RTS;
290         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
291         t[IBQPS_RTS].opt_param[UC]      = IB_QP_CUR_STATE |
292                 IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PATH_MIG_STATE;
293         t[IBQPS_RTS].opt_param[RC]      =       IB_QP_CUR_STATE |IB_QP_ALT_PATH |
294                 IB_QP_ACCESS_FLAGS |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE;
295         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
296
297         t[IBQPS_SQD].trans                                              = MTHCA_TRANS_SQD2SQD;
298         t[IBQPS_SQD].opt_param[UD]      = IB_QP_PKEY_INDEX |IB_QP_QKEY;
299         t[IBQPS_SQD].opt_param[UC]      = IB_QP_AV |    IB_QP_CUR_STATE |
300                 IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_PATH_MIG_STATE;
301         t[IBQPS_SQD].opt_param[RC]      =       IB_QP_AV |IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY |
302                 IB_QP_MAX_QP_RD_ATOMIC |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_CUR_STATE |IB_QP_ALT_PATH |
303                 IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE;
304         t[IBQPS_SQD].opt_param[MLX]     = IB_QP_PKEY_INDEX |IB_QP_QKEY;
305
306         /* IBQPS_SQE */ 
307         t = &state_table[IBQPS_SQE][0];
308         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
309         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
310
311         t[IBQPS_RTS].trans                                              = MTHCA_TRANS_SQERR2RTS;
312         t[IBQPS_RTS].opt_param[UD]      = IB_QP_CUR_STATE |IB_QP_QKEY;
313         t[IBQPS_RTS].opt_param[UC]      = IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS;
314 //      t[IBQPS_RTS].opt_param[RC]      =       IB_QP_CUR_STATE |IB_QP_MIN_RNR_TIMER;
315         t[IBQPS_RTS].opt_param[MLX]     = IB_QP_CUR_STATE |IB_QP_QKEY;
316
317         /* IBQPS_ERR */ 
318         t = &state_table[IBQPS_ERR][0];
319         t[IBQPS_RESET].trans                                    = MTHCA_TRANS_ANY2RST;
320         t[IBQPS_ERR].trans                                              = MTHCA_TRANS_ANY2ERR;
321
322 };
323
324
325 static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
326 {
327         return qp->qpn >= (u32)dev->qp_table.sqp_start &&
328                 qp->qpn <= (u32)dev->qp_table.sqp_start + 3;
329 }
330
331 static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
332 {
333         return qp->qpn >= (u32)dev->qp_table.sqp_start &&
334                 qp->qpn <= (u32)(dev->qp_table.sqp_start + 1);
335 }
336
337
338 static void dump_wqe(u32 print_lvl, u32 *wqe_ptr , struct mthca_qp *qp_ptr)
339 {
340         __be32 *wqe = wqe_ptr;
341
342         UNREFERENCED_PARAMETER(qp_ptr);
343
344         (void) wqe;     /* avoid warning if mthca_dbg compiled away... */
345         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents  QPN 0x%06x \n",qp_ptr->qpn));
346         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0
347                 , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3])));
348         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4
349                 , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7])));
350         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8
351                 , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11])));
352         HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12
353                 , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15])));
354
355 }
356
357
358 static void *get_recv_wqe(struct mthca_qp *qp, int n)
359 {
360         if (qp->is_direct)
361                 return (u8*)qp->queue.direct.page + (n << qp->rq.wqe_shift);
362         else
363                 return (u8*)qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].page +
364                         ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
365 }
366
367 static void *get_send_wqe(struct mthca_qp *qp, int n)
368 {
369         if (qp->is_direct)
370                 return (u8*)qp->queue.direct.page + qp->send_wqe_offset +
371                         (n << qp->sq.wqe_shift);
372         else
373                 return (u8*)qp->queue.page_list[(qp->send_wqe_offset +
374                                             (n << qp->sq.wqe_shift)) >>
375                                            PAGE_SHIFT].page +
376                         ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
377                          (PAGE_SIZE - 1));
378 }
379
380 static void mthca_wq_init(struct mthca_wq *wq)
381 {       
382         spin_lock_init(&wq->lock);      
383         wq->next_ind  = 0;      
384         wq->last_comp = wq->max - 1;    
385         wq->head      = 0;      
386         wq->tail      = 0;      
387 }
388
389 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
390                     enum ib_event_type event_type)
391 {
392         struct mthca_qp *qp;
393         struct ib_event event;
394         SPIN_LOCK_PREP(lh);
395
396         spin_lock(&dev->qp_table.lock, &lh);
397         qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
398         if (qp)
399                 atomic_inc(&qp->refcount);
400         spin_unlock(&lh);
401
402         if (!qp) {
403                 HCA_PRINT(TRACE_LEVEL_WARNING  ,HCA_DBG_QP  ,("QP %06x Async event for bogus \n", qpn));
404                 return;
405         }
406
407         event.device      = &dev->ib_dev;
408         event.event       = event_type;
409         event.element.qp  = &qp->ibqp;
410         if (qp->ibqp.event_handler)
411                 qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
412
413         if (atomic_dec_and_test(&qp->refcount))
414                 wake_up(&qp->wait);
415 }
416
417 static int to_mthca_state(enum ib_qp_state ib_state)
418 {
419         switch (ib_state) {
420         case IBQPS_RESET: return MTHCA_QP_STATE_RST;
421         case IBQPS_INIT:  return MTHCA_QP_STATE_INIT;
422         case IBQPS_RTR:   return MTHCA_QP_STATE_RTR;
423         case IBQPS_RTS:   return MTHCA_QP_STATE_RTS;
424         case IBQPS_SQD:   return MTHCA_QP_STATE_SQD;
425         case IBQPS_SQE:   return MTHCA_QP_STATE_SQE;
426         case IBQPS_ERR:   return MTHCA_QP_STATE_ERR;
427         default:                return -1;
428         }
429 }
430
431 static int to_mthca_st(int transport)
432 {
433         switch (transport) {
434         case RC:  return MTHCA_QP_ST_RC;
435         case UC:  return MTHCA_QP_ST_UC;
436         case UD:  return MTHCA_QP_ST_UD;
437         case RD:  return MTHCA_QP_ST_RD;
438         case MLX: return MTHCA_QP_ST_MLX;
439         default:  return -1;
440         }
441 }
442
443 static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
444                         int attr_mask)
445 {
446         if (attr_mask & IB_QP_PKEY_INDEX)
447                 sqp->pkey_index = attr->pkey_index;
448         if (attr_mask & IB_QP_QKEY)
449                 sqp->qkey = attr->qkey;
450         if (attr_mask & IB_QP_SQ_PSN)
451                 sqp->send_psn = attr->sq_psn;
452 }
453
454 static void init_port(struct mthca_dev *dev, int port)
455 {
456         int err;
457         u8 status;
458         struct mthca_init_ib_param param;
459
460         RtlZeroMemory(&param, sizeof param);
461
462         param.port_width    = dev->limits.port_width_cap;
463         param.vl_cap    = dev->limits.vl_cap;
464         param.mtu_cap   = dev->limits.mtu_cap;
465         param.gid_cap   = (u16)dev->limits.gid_table_len;
466         param.pkey_cap  = (u16)dev->limits.pkey_table_len;
467
468         err = mthca_INIT_IB(dev, &param, port, &status);
469         if (err)
470                 HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP  ,("INIT_IB failed, return code %d.\n", err));
471         if (status)
472                 HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP  ,("INIT_IB returned status %02x.\n", status));
473 }
474
475
476 static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
477                                   int attr_mask)
478 {
479         u8 dest_rd_atomic;
480         u32 access_flags;
481         u32 hw_access_flags = 0;
482
483         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
484                 dest_rd_atomic = attr->max_dest_rd_atomic;
485         else
486                 dest_rd_atomic = qp->resp_depth;
487
488         if (attr_mask & IB_QP_ACCESS_FLAGS)
489                 access_flags = attr->qp_access_flags;
490         else
491                 access_flags = qp->atomic_rd_en;
492
493         if (!dest_rd_atomic)
494                 access_flags &= MTHCA_ACCESS_REMOTE_WRITE;
495
496         if (access_flags & MTHCA_ACCESS_REMOTE_READ)
497                 hw_access_flags |= MTHCA_QP_BIT_RRE;
498         if (access_flags & MTHCA_ACCESS_REMOTE_ATOMIC)
499                 hw_access_flags |= MTHCA_QP_BIT_RAE;
500         if (access_flags & MTHCA_ACCESS_REMOTE_WRITE)
501                 hw_access_flags |= MTHCA_QP_BIT_RWE;
502
503         return cl_hton32(hw_access_flags);
504 }
505
506 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
507 {
508         struct mthca_dev *dev = to_mdev(ibqp->device);
509         struct mthca_qp *qp = to_mqp(ibqp);
510         enum ib_qp_state cur_state, new_state;
511         struct mthca_mailbox *mailbox;
512         struct mthca_qp_param *qp_param;
513         struct mthca_qp_context *qp_context;
514         u32 req_param, opt_param;
515         u32 sqd_event = 0;
516         u8 status;
517         int err;
518         SPIN_LOCK_PREP(lhs);
519         SPIN_LOCK_PREP(lhr);
520
521         if (attr_mask & IB_QP_CUR_STATE) {
522                 if (attr->cur_qp_state != IBQPS_RTR &&
523                     attr->cur_qp_state != IBQPS_RTS &&
524                     attr->cur_qp_state != IBQPS_SQD &&
525                     attr->cur_qp_state != IBQPS_SQE)
526                         return -EINVAL;
527                 else
528                         cur_state = attr->cur_qp_state;
529         } else {
530                 spin_lock_irq(&qp->sq.lock, &lhs);
531                 spin_lock(&qp->rq.lock, &lhr);
532                 cur_state = qp->state;
533                 spin_unlock(&lhr);
534                 spin_unlock_irq(&lhs);
535         }
536
537         if (attr_mask & IB_QP_STATE) {
538                if (attr->qp_state < 0 || attr->qp_state > IBQPS_ERR)
539                         return -EINVAL;
540                 new_state = attr->qp_state;
541         } else
542                 new_state = cur_state;
543
544         if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
545                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Illegal QP transition "
546                           "%d->%d\n", cur_state, new_state));
547                 return -EINVAL;
548         }
549
550         req_param = state_table[cur_state][new_state].req_param[qp->transport];
551         opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
552
553         if ((req_param & attr_mask) != req_param) {
554                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition "
555                           "%d->%d missing req attr 0x%08x\n",
556                           cur_state, new_state,
557                           req_param & ~attr_mask));
558                 //NB: IBAL doesn't use all the fields, so we can miss some mandatory flags
559                 return -EINVAL;
560         }
561
562         if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
563                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition (transport %d) "
564                           "%d->%d has extra attr 0x%08x\n",
565                           qp->transport,
566                           cur_state, new_state,
567                           attr_mask & ~(req_param | opt_param |
568                                                  IB_QP_STATE)));
569                 //NB: The old code sometimes uses optional flags that are not so in this code
570                 return -EINVAL;
571         }
572
573         if ((attr_mask & IB_QP_PKEY_INDEX) && 
574              attr->pkey_index >= dev->limits.pkey_table_len) {
575                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("PKey index (%u) too large. max is %d\n",
576                           attr->pkey_index,dev->limits.pkey_table_len-1)); 
577                 return -EINVAL;
578         }
579
580         if ((attr_mask & IB_QP_PORT) &&
581             (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
582                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Port number (%u) is invalid\n", attr->port_num));
583                 return -EINVAL;
584         }
585
586         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
587             attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
588                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as initiator %u too large (max is %d)\n",
589                           attr->max_rd_atomic, dev->limits.max_qp_init_rdma));
590                 return -EINVAL;
591         }
592
593         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
594             attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
595                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as responder %u too large (max %d)\n",
596                           attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift));
597                 return -EINVAL;
598         }
599
600         mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
601         if (IS_ERR(mailbox))
602                 return PTR_ERR(mailbox);
603         qp_param = mailbox->buf;
604         qp_context = &qp_param->context;
605         RtlZeroMemory(qp_param, sizeof *qp_param);
606
607         qp_context->flags      = cl_hton32((to_mthca_state(new_state) << 28) |
608                                              (to_mthca_st(qp->transport) << 16));
609         qp_context->flags     |= cl_hton32(MTHCA_QP_BIT_DE);
610         if (!(attr_mask & IB_QP_PATH_MIG_STATE))
611                 qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11);
612         else {
613                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PM_STATE);
614                 switch (attr->path_mig_state) {
615                 case IB_APM_MIGRATED:
616                         qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11);
617                         break;
618                 case IB_APM_REARM:
619                         qp_context->flags |= cl_hton32(MTHCA_QP_PM_REARM << 11);
620                         break;
621                 case IB_APM_ARMED:
622                         qp_context->flags |= cl_hton32(MTHCA_QP_PM_ARMED << 11);
623                         break;
624                 }
625         }
626
627         /* leave tavor_sched_queue as 0 */
628
629         if (qp->transport == MLX || qp->transport == UD)
630                 qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
631         else if (attr_mask & IB_QP_PATH_MTU)
632                 qp_context->mtu_msgmax = (u8)((attr->path_mtu << 5) | 31);
633
634         if (mthca_is_memfree(dev)) {
635                 if (qp->rq.max)
636                         qp_context->rq_size_stride = (u8)(long_log2(qp->rq.max) << 3);
637                 qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
638
639                 if (qp->sq.max)
640                         qp_context->sq_size_stride = (u8)(long_log2(qp->sq.max) << 3);
641                 qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
642         }
643
644         /* leave arbel_sched_queue as 0 */
645
646         if (qp->ibqp.ucontext)
647                 qp_context->usr_page =
648                         cl_hton32(to_mucontext(qp->ibqp.ucontext)->uar.index);
649         else
650                 qp_context->usr_page = cl_hton32(dev->driver_uar.index);
651         qp_context->local_qpn  = cl_hton32(qp->qpn);
652         if (attr_mask & IB_QP_DEST_QPN) {
653                 qp_context->remote_qpn = cl_hton32(attr->dest_qp_num);
654         }
655
656         if (qp->transport == MLX)
657                 qp_context->pri_path.port_pkey |=
658                         cl_hton32(to_msqp(qp)->port << 24);
659         else {
660                 if (attr_mask & IB_QP_PORT) {
661                         qp_context->pri_path.port_pkey |=
662                                 cl_hton32(attr->port_num << 24);
663                         qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PORT_NUM);
664                 }
665         }
666
667         if (attr_mask & IB_QP_PKEY_INDEX) {
668                 qp_context->pri_path.port_pkey |=
669                         cl_hton32(attr->pkey_index);
670                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PKEY_INDEX);
671         }
672
673         if (attr_mask & IB_QP_RNR_RETRY) {
674                 qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
675                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_RETRY);
676         }
677
678         if (attr_mask & IB_QP_AV) {
679                 qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
680                 qp_context->pri_path.rlid        = cl_hton16(attr->ah_attr.dlid);
681                 qp_context->pri_path.static_rate = (u8)!!attr->ah_attr.static_rate;
682                 if (attr->ah_attr.ah_flags & IB_AH_GRH) {
683                         qp_context->pri_path.g_mylmc |= 1 << 7;
684                         qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
685                         qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
686                         qp_context->pri_path.sl_tclass_flowlabel =
687                                 cl_hton32((attr->ah_attr.sl << 28)                |
688                                             (attr->ah_attr.grh.traffic_class << 20) |
689                                             (attr->ah_attr.grh.flow_label));
690                         memcpy(qp_context->pri_path.rgid,
691                                attr->ah_attr.grh.dgid.raw, 16);
692                 } else {
693                         qp_context->pri_path.sl_tclass_flowlabel =
694                                 cl_hton32(attr->ah_attr.sl << 28);
695                 }
696                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
697         }
698
699         if (attr_mask & IB_QP_TIMEOUT) {
700                 qp_context->pri_path.ackto = attr->timeout << 3;
701                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
702         }
703
704         /* XXX alt_path */
705
706         /* leave rdd as 0 */
707         qp_context->pd         = cl_hton32(to_mpd(ibqp->pd)->pd_num);
708         /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
709         qp_context->wqe_lkey   = cl_hton32(qp->mr.ibmr.lkey);
710         qp_context->params1    = cl_hton32((unsigned long)(
711                 (MTHCA_ACK_REQ_FREQ << 28) |
712                 (MTHCA_FLIGHT_LIMIT << 24) |
713                 MTHCA_QP_BIT_SWE));
714         if (qp->sq_policy == IB_SIGNAL_ALL_WR)
715                 qp_context->params1 |= cl_hton32(MTHCA_QP_BIT_SSC);
716         if (attr_mask & IB_QP_RETRY_CNT) {
717                 qp_context->params1 |= cl_hton32(attr->retry_cnt << 16);
718                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RETRY_COUNT);
719         }
720
721         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
722                 if (attr->max_rd_atomic) {
723                         qp_context->params1 |=
724                                 cl_hton32(MTHCA_QP_BIT_SRE |
725                                             MTHCA_QP_BIT_SAE);
726                         qp_context->params1 |=
727                                 cl_hton32(fls(attr->max_rd_atomic - 1) << 21);
728                 }
729                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_SRA_MAX);
730         }
731
732         if (attr_mask & IB_QP_SQ_PSN)
733                 qp_context->next_send_psn = cl_hton32(attr->sq_psn);
734         qp_context->cqn_snd = cl_hton32(to_mcq(ibqp->send_cq)->cqn);
735
736         if (mthca_is_memfree(dev)) {
737                 qp_context->snd_wqe_base_l = cl_hton32(qp->send_wqe_offset);
738                 qp_context->snd_db_index   = cl_hton32(qp->sq.db_index);
739         }
740
741         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
742
743                 if (attr->max_dest_rd_atomic)
744                         qp_context->params2 |=
745                                 cl_hton32(fls(attr->max_dest_rd_atomic - 1) << 21);
746
747                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RRA_MAX);
748
749         }
750
751         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
752                 qp_context->params2      |= get_hw_access_flags(qp, attr, attr_mask);
753                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RWE |
754                                                         MTHCA_QP_OPTPAR_RRE |
755                                                         MTHCA_QP_OPTPAR_RAE);
756         }
757
758         qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RSC);
759
760         if (ibqp->srq)
761                 qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RIC);
762
763         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
764                 qp_context->rnr_nextrecvpsn |= cl_hton32(attr->min_rnr_timer << 24);
765                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
766         }
767         if (attr_mask & IB_QP_RQ_PSN)
768                 qp_context->rnr_nextrecvpsn |= cl_hton32(attr->rq_psn);
769
770         qp_context->ra_buff_indx =
771                 cl_hton32(dev->qp_table.rdb_base +
772                             ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
773                              dev->qp_table.rdb_shift));
774
775         qp_context->cqn_rcv = cl_hton32(to_mcq(ibqp->recv_cq)->cqn);
776
777         if (mthca_is_memfree(dev))
778                 qp_context->rcv_db_index   = cl_hton32(qp->rq.db_index);
779
780         if (attr_mask & IB_QP_QKEY) {
781                 qp_context->qkey = cl_hton32(attr->qkey);
782                 qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_Q_KEY);
783         }
784
785         if (ibqp->srq)
786                 qp_context->srqn = cl_hton32(1 << 24 |
787                                                to_msrq(ibqp->srq)->srqn);
788
789         if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD  &&
790             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY               &&
791             attr->en_sqd_async_notify)
792                 sqd_event = (u32)(1 << 31);
793
794         err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
795                               qp->qpn, 0, mailbox, sqd_event, &status);
796         if (status) {
797                 HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("modify QP %d returned status %02x.\n",
798                            state_table[cur_state][new_state].trans, status));
799                 err = -EINVAL;
800         }
801
802         if (!err) {
803                 qp->state = new_state;
804                 if (attr_mask & IB_QP_ACCESS_FLAGS)
805                         qp->atomic_rd_en = (u8)attr->qp_access_flags;
806                 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
807                         qp->resp_depth = attr->max_dest_rd_atomic;
808         }
809
810         mthca_free_mailbox(dev, mailbox);
811
812         if (is_sqp(dev, qp))
813                 store_attrs(to_msqp(qp), attr, attr_mask);
814
815         /*
816          * If we moved QP0 to RTR, bring the IB link up; if we moved
817          * QP0 to RESET or ERROR, bring the link back down.
818          */
819         if (is_qp0(dev, qp)) {
820                 if (cur_state != IBQPS_RTR &&
821                     new_state == IBQPS_RTR)
822                         init_port(dev, to_msqp(qp)->port);
823
824                 if (cur_state != IBQPS_RESET &&
825                     cur_state != IBQPS_ERR &&
826                     (new_state == IBQPS_RESET ||
827                      new_state == IBQPS_ERR))
828                         mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
829         }
830
831         /*
832          * If we moved a kernel QP to RESET, clean up all old CQ
833          * entries and reinitialize the QP.
834          */
835         if (!err && new_state == IB_QPS_RESET && !qp->ibqp.ucontext) {
836                 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
837                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
838                 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
839                         mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn,
840                                        qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
841
842                 mthca_wq_init(&qp->sq);
843                 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
844                 mthca_wq_init(&qp->rq);
845                 qp->rq.last = get_send_wqe(qp, qp->rq.max - 1);
846
847                 if (mthca_is_memfree(dev)) {
848                         *qp->sq.db = 0;
849                         *qp->rq.db = 0;
850                 }
851         }
852
853         return err;
854 }
855
856 static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
857 {
858
859         /*
860          * Calculate the maximum size of WQE s/g segments, excluding
861          * the next segment and other non-data segments.
862          */
863         int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
864
865         switch (qp->transport) {
866         case MLX:
867                 max_data_size -= 2 * sizeof (struct mthca_data_seg);
868                 break;
869
870         case UD:
871                 if (mthca_is_memfree(dev))
872                         max_data_size -= sizeof (struct mthca_arbel_ud_seg);
873                 else
874                         max_data_size -= sizeof (struct mthca_tavor_ud_seg);
875                 break;
876
877         default:
878                 max_data_size -= sizeof (struct mthca_raddr_seg);
879                 break;
880         }
881                 return max_data_size;
882 }
883
884 static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
885 {
886         /* We don't support inline data for kernel QPs (yet). */
887         return pd->ibpd.ucontext ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
888 }
889
890 static void mthca_adjust_qp_caps(struct mthca_dev *dev,
891                                  struct mthca_pd *pd,
892                                  struct mthca_qp *qp)
893 {
894         int max_data_size = mthca_max_data_size(dev, qp,
895                 min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift));
896
897         qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
898
899         qp->sq.max_gs = min(dev->limits.max_sg,
900                 (int)(max_data_size / sizeof (struct mthca_data_seg)));
901         qp->rq.max_gs = min(dev->limits.max_sg,
902                 (int)((min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
903                 sizeof (struct mthca_next_seg)) / sizeof (struct mthca_data_seg)));     
904 }
905
906 /*
907  * Allocate and register buffer for WQEs.  qp->rq.max, sq.max,
908  * rq.max_gs and sq.max_gs must all be assigned.
909  * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
910  * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
911  * queue)
912  */
913 static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
914                                struct mthca_pd *pd,
915                                struct mthca_qp *qp)
916 {
917         int size;
918         int err = -ENOMEM;
919         
920         HCA_ENTER(HCA_DBG_QP);
921         size = sizeof (struct mthca_next_seg) +
922                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
923
924         if (size > dev->limits.max_desc_sz)
925                 return -EINVAL;
926
927         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
928              qp->rq.wqe_shift++)
929                 ; /* nothing */
930
931         size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
932         switch (qp->transport) {
933                 case MLX:
934                         size += 2 * sizeof (struct mthca_data_seg);
935                         break;
936
937                 case UD:
938                         size += mthca_is_memfree(dev) ?
939                                 sizeof (struct mthca_arbel_ud_seg) :
940                                 sizeof (struct mthca_tavor_ud_seg);
941                         break;
942                 
943                 case UC:
944                         size += sizeof (struct mthca_raddr_seg);
945                         break;
946                 
947                 case RC:
948                         size += sizeof (struct mthca_raddr_seg);
949                         /*
950                          * An atomic op will require an atomic segment, a
951                          * remote address segment and one scatter entry.
952                          */
953                         size = max(size,
954                                  sizeof (struct mthca_atomic_seg) +
955                                  sizeof (struct mthca_raddr_seg) +
956                                  sizeof (struct mthca_data_seg));
957                         break;
958                         
959                 default:
960                         break;
961         }
962                 
963         /* Make sure that we have enough space for a bind request */
964         size = max(size, sizeof (struct mthca_bind_seg));
965         
966         size += sizeof (struct mthca_next_seg);
967         
968         if (size > dev->limits.max_desc_sz)
969                 return -EINVAL;
970
971         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
972              qp->sq.wqe_shift++)
973                 ; /* nothing */
974
975         qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
976                                     1 << qp->sq.wqe_shift);
977
978         /*
979          * If this is a userspace QP, we don't actually have to
980          * allocate anything.  All we need is to calculate the WQE
981          * sizes and the send_wqe_offset, so we're done now.
982          */
983         if (pd->ibpd.ucontext)
984                 return 0;
985
986         size = (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset +
987                           (qp->sq.max << qp->sq.wqe_shift));
988
989         qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
990                            GFP_KERNEL);
991         if (!qp->wrid)
992                 goto err_out;
993
994         err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
995                               &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
996         if (err)
997                 goto err_out;
998         
999         HCA_EXIT(HCA_DBG_QP);
1000         return 0;
1001
1002 err_out:
1003         kfree(qp->wrid);
1004         return err;
1005 }
1006
1007 static void mthca_free_wqe_buf(struct mthca_dev *dev,
1008                                struct mthca_qp *qp)
1009 {
1010         mthca_buf_free(dev, (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset +
1011                                        (qp->sq.max << qp->sq.wqe_shift)),
1012                        &qp->queue, qp->is_direct, &qp->mr);
1013         kfree(qp->wrid);
1014 }
1015
1016 static int mthca_map_memfree(struct mthca_dev *dev,
1017                              struct mthca_qp *qp)
1018 {
1019         int ret;
1020
1021         if (mthca_is_memfree(dev)) {
1022                 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
1023                 if (ret)
1024                         return ret;
1025
1026                 ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
1027                 if (ret)
1028                         goto err_qpc;
1029
1030                 ret = mthca_table_get(dev, dev->qp_table.rdb_table,
1031                                       qp->qpn << dev->qp_table.rdb_shift);
1032                 if (ret)
1033                         goto err_eqpc;
1034
1035         }
1036
1037         return 0;
1038
1039 err_eqpc:
1040         mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1041
1042 err_qpc:
1043         mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1044
1045         return ret;
1046 }
1047
1048 static void mthca_unmap_memfree(struct mthca_dev *dev,
1049                                 struct mthca_qp *qp)
1050 {
1051         mthca_table_put(dev, dev->qp_table.rdb_table,
1052                         qp->qpn << dev->qp_table.rdb_shift);
1053         mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1054         mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1055 }
1056
1057 static int mthca_alloc_memfree(struct mthca_dev *dev,
1058                                struct mthca_qp *qp)
1059 {
1060         int ret = 0;
1061
1062         if (mthca_is_memfree(dev)) {
1063                 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1064                                                  qp->qpn, &qp->rq.db);
1065                 if (qp->rq.db_index < 0)
1066                         return ret;
1067
1068                 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1069                                                  qp->qpn, &qp->sq.db);
1070                 if (qp->sq.db_index < 0)
1071                         mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1072         }
1073
1074         return ret;
1075 }
1076
1077 static void mthca_free_memfree(struct mthca_dev *dev,
1078                                struct mthca_qp *qp)
1079 {
1080         if (mthca_is_memfree(dev)) {
1081                 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
1082                 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1083         }
1084 }
1085
1086 static int mthca_alloc_qp_common(struct mthca_dev *dev,
1087                                  struct mthca_pd *pd,
1088                                  struct mthca_cq *send_cq,
1089                                  struct mthca_cq *recv_cq,
1090                                  enum ib_sig_type send_policy,
1091                                  struct mthca_qp *qp)
1092 {
1093         int ret;
1094         int i;
1095
1096         atomic_set(&qp->refcount, 1);
1097         init_waitqueue_head(&qp->wait);
1098         qp->state        = IBQPS_RESET;
1099         qp->atomic_rd_en = 0;
1100         qp->resp_depth   = 0;
1101         qp->sq_policy    = send_policy;
1102         mthca_wq_init(&qp->sq);
1103         mthca_wq_init(&qp->rq);
1104
1105         UNREFERENCED_PARAMETER(send_cq);
1106         UNREFERENCED_PARAMETER(recv_cq);
1107         
1108         ret = mthca_map_memfree(dev, qp);
1109         if (ret)
1110                 return ret;
1111
1112         ret = mthca_alloc_wqe_buf(dev, pd, qp);
1113         if (ret) {
1114                 mthca_unmap_memfree(dev, qp);
1115                 return ret;
1116         }
1117
1118         mthca_adjust_qp_caps(dev, pd, qp);
1119
1120         /*
1121          * If this is a userspace QP, we're done now.  The doorbells
1122          * will be allocated and buffers will be initialized in
1123          * userspace.
1124          */
1125         if (pd->ibpd.ucontext)
1126                 return 0;
1127
1128         ret = mthca_alloc_memfree(dev, qp);
1129         if (ret) {
1130                 mthca_free_wqe_buf(dev, qp);
1131                 mthca_unmap_memfree(dev, qp);
1132                 return ret;
1133         }
1134
1135         if (mthca_is_memfree(dev)) {
1136                 struct mthca_next_seg *next;
1137                 struct mthca_data_seg *scatter;
1138                 int size = (sizeof (struct mthca_next_seg) +
1139                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
1140
1141                 for (i = 0; i < qp->rq.max; ++i) {
1142                         next = get_recv_wqe(qp, i);
1143                         next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) <<
1144                                                    qp->rq.wqe_shift);
1145                         next->ee_nds = cl_hton32(size);
1146
1147                         for (scatter = (void *) (next + 1);
1148                              (void *) scatter < (void *) ((u8*)next + (1 << qp->rq.wqe_shift));
1149                              ++scatter)
1150                                 scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY);
1151                 }
1152
1153                 for (i = 0; i < qp->sq.max; ++i) {
1154                         next = get_send_wqe(qp, i);
1155                         next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) <<
1156                                                     qp->sq.wqe_shift) +
1157                                                    qp->send_wqe_offset);
1158                 }
1159         }
1160
1161         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
1162         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
1163
1164         return 0;
1165 }
1166
1167 static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1168                                          struct mthca_pd *pd, struct mthca_qp *qp)
1169 {
1170         int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
1171
1172         /* Sanity check QP size before proceeding */
1173         if (cap->max_send_wr     > (u32)dev->limits.max_wqes ||
1174             cap->max_recv_wr     > (u32)dev->limits.max_wqes ||
1175             cap->max_send_sge    > (u32)dev->limits.max_sg   ||
1176             cap->max_recv_sge    > (u32)dev->limits.max_sg   ||
1177             cap->max_inline_data > (u32)mthca_max_inline_data(pd, max_data_size))
1178                 return -EINVAL;
1179
1180         /*
1181          * For MLX transport we need 2 extra S/G entries:
1182          * one for the header and one for the checksum at the end
1183          */
1184         if (qp->transport == MLX && cap->max_recv_sge + 2 > (u32)dev->limits.max_sg)
1185                 return -EINVAL;
1186
1187         if (mthca_is_memfree(dev)) {
1188                 qp->rq.max = cap->max_recv_wr ?
1189                         roundup_pow_of_two(cap->max_recv_wr) : 0;
1190                 qp->sq.max = cap->max_send_wr ?
1191                         roundup_pow_of_two(cap->max_send_wr) : 0;
1192         } else {
1193                 qp->rq.max = cap->max_recv_wr;
1194                 qp->sq.max = cap->max_send_wr;
1195         }
1196
1197         qp->rq.max_gs = cap->max_recv_sge;
1198         qp->sq.max_gs = MAX(cap->max_send_sge,
1199                               ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
1200                                     MTHCA_INLINE_CHUNK_SIZE) /
1201                               (int)sizeof (struct mthca_data_seg));
1202
1203         return 0;
1204 }
1205
1206 int mthca_alloc_qp(struct mthca_dev *dev,
1207                    struct mthca_pd *pd,
1208                    struct mthca_cq *send_cq,
1209                    struct mthca_cq *recv_cq,
1210                    enum ib_qp_type_t type,
1211                    enum ib_sig_type send_policy,
1212                    struct ib_qp_cap *cap,
1213                    struct mthca_qp *qp)
1214 {
1215         int err;
1216         SPIN_LOCK_PREP(lh);
1217
1218         err = mthca_set_qp_size(dev, cap, pd, qp);
1219         if (err)
1220                 return err;
1221
1222         switch (type) {
1223         case IB_QPT_RELIABLE_CONN: qp->transport = RC; break;
1224         case IB_QPT_UNRELIABLE_CONN: qp->transport = UC; break;
1225         case IB_QPT_UNRELIABLE_DGRM: qp->transport = UD; break;
1226         default: return -EINVAL;
1227         }
1228
1229         qp->qpn = mthca_alloc(&dev->qp_table.alloc);
1230         if (qp->qpn == -1)
1231                 return -ENOMEM;
1232
1233         err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1234                                     send_policy, qp);
1235         if (err) {
1236                 mthca_free(&dev->qp_table.alloc, qp->qpn);
1237                 return err;
1238         }
1239
1240         spin_lock_irq(&dev->qp_table.lock, &lh);
1241         mthca_array_set(&dev->qp_table.qp,
1242                         qp->qpn & (dev->limits.num_qps - 1), qp);
1243         spin_unlock_irq(&lh);
1244
1245         return 0;
1246 }
1247
1248 int mthca_alloc_sqp(struct mthca_dev *dev,
1249                     struct mthca_pd *pd,
1250                     struct mthca_cq *send_cq,
1251                     struct mthca_cq *recv_cq,
1252                     enum ib_sig_type send_policy,
1253                     struct ib_qp_cap *cap,
1254                     int qpn,
1255                     int port,
1256                     struct mthca_sqp *sqp)
1257 {
1258         u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1259         int err;
1260         SPIN_LOCK_PREP(lhs);
1261         SPIN_LOCK_PREP(lhr);
1262         SPIN_LOCK_PREP(lht);
1263
1264         err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
1265         if (err)
1266                 return err;
1267
1268         alloc_dma_zmem_map(dev, 
1269                 sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE, 
1270                 PCI_DMA_BIDIRECTIONAL,
1271                 &sqp->sg);
1272         if (!sqp->sg.page)
1273                 return -ENOMEM;
1274
1275         spin_lock_irq(&dev->qp_table.lock, &lht);
1276         if (mthca_array_get(&dev->qp_table.qp, mqpn))
1277                 err = -EBUSY;
1278         else
1279                 mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
1280         spin_unlock_irq(&lht);
1281
1282         if (err)
1283                 goto err_out;
1284
1285         sqp->port = port;
1286         sqp->qp.qpn       = mqpn;
1287         sqp->qp.transport = MLX;
1288
1289         err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1290                                     send_policy, &sqp->qp);
1291         if (err)
1292                 goto err_out_free;
1293
1294         atomic_inc(&pd->sqp_count);
1295
1296         return 0;
1297
1298  err_out_free:
1299         /*
1300          * Lock CQs here, so that CQ polling code can do QP lookup
1301          * without taking a lock.
1302          */
1303         spin_lock_irq(&send_cq->lock, &lhs);
1304         if (send_cq != recv_cq)
1305                 spin_lock(&recv_cq->lock, &lhr);
1306
1307         spin_lock(&dev->qp_table.lock, &lht);
1308         mthca_array_clear(&dev->qp_table.qp, mqpn);
1309         spin_unlock(&lht);
1310
1311         if (send_cq != recv_cq)
1312                 spin_unlock(&lhr);
1313         spin_unlock_irq(&lhs);
1314
1315  err_out:
1316         free_dma_mem_map(dev, &sqp->sg, PCI_DMA_BIDIRECTIONAL);
1317
1318         return err;
1319 }
1320
1321 void mthca_free_qp(struct mthca_dev *dev,
1322                    struct mthca_qp *qp)
1323 {
1324         u8 status;
1325         struct mthca_cq *send_cq;
1326         struct mthca_cq *recv_cq;
1327         SPIN_LOCK_PREP(lhs);
1328         SPIN_LOCK_PREP(lhr);
1329         SPIN_LOCK_PREP(lht);
1330
1331         send_cq = to_mcq(qp->ibqp.send_cq);
1332         recv_cq = to_mcq(qp->ibqp.recv_cq);
1333
1334         /*
1335          * Lock CQs here, so that CQ polling code can do QP lookup
1336          * without taking a lock.
1337          */
1338         spin_lock_irq(&send_cq->lock, &lhs);
1339         if (send_cq != recv_cq)
1340                 spin_lock(&recv_cq->lock, &lhr);
1341
1342         spin_lock(&dev->qp_table.lock, &lht);
1343         mthca_array_clear(&dev->qp_table.qp,
1344                           qp->qpn & (dev->limits.num_qps - 1));
1345         spin_unlock(&lht);
1346
1347         if (send_cq != recv_cq)
1348                 spin_unlock(&lhr);
1349         spin_unlock_irq(&lhs);
1350
1351         atomic_dec(&qp->refcount);
1352         wait_event(&qp->wait, !atomic_read(&qp->refcount));
1353
1354         if (qp->state != IBQPS_RESET)
1355                 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
1356
1357         /*
1358          * If this is a userspace QP, the buffers, MR, CQs and so on
1359          * will be cleaned up in userspace, so all we have to do is
1360          * unref the mem-free tables and free the QPN in our table.
1361          */
1362         if (!qp->ibqp.ucontext) {
1363                 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
1364                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1365                 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
1366                         mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn,
1367                                        qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1368
1369                 mthca_free_memfree(dev, qp);
1370                 mthca_free_wqe_buf(dev, qp);
1371         }
1372
1373         mthca_unmap_memfree(dev, qp);
1374
1375         if (is_sqp(dev, qp)) {
1376                 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
1377                 free_dma_mem_map(dev, &to_msqp(qp)->sg, PCI_DMA_BIDIRECTIONAL);
1378         } else
1379                 mthca_free(&dev->qp_table.alloc, qp->qpn);
1380 }
1381
1382 static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr)
1383 {
1384
1385         enum mthca_wr_opcode opcode = -1; //= wr->wr_type;
1386
1387         switch (wr->wr_type) {
1388                 case WR_SEND: 
1389                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND;
1390                         break;
1391                 case WR_RDMA_WRITE:     
1392                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE;
1393                         break;
1394                 case WR_RDMA_READ:              opcode = MTHCA_OPCODE_RDMA_READ; break;
1395                 case WR_COMPARE_SWAP:           opcode = MTHCA_OPCODE_ATOMIC_CS; break;
1396                 case WR_FETCH_ADD:                      opcode = MTHCA_OPCODE_ATOMIC_FA; break;
1397                 default:                                                opcode = MTHCA_OPCODE_INVALID;break;
1398         }
1399         return opcode;
1400 }
1401
1402 /* Create UD header for an MLX send and build a data segment for it */
1403 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1404                             int ind, struct _ib_send_wr *wr,
1405                             struct mthca_mlx_seg *mlx,
1406                             struct mthca_data_seg *data)
1407 {
1408         enum ib_wr_opcode opcode = conv_ibal_wr_opcode(wr);
1409         int header_size;
1410         int err;
1411         u16 pkey;
1412         CPU_2_BE64_PREP;
1413
1414         ib_ud_header_init(256, /* assume a MAD */
1415                 mthca_ah_grh_present(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)),
1416                 &sqp->ud_header);
1417
1418         err = mthca_read_ah(dev, to_mah((struct ib_ah *)wr->dgrm.ud.h_av), &sqp->ud_header);
1419         if (err){
1420                 HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_AV, ("read av error%p\n",
1421                         to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av));
1422                 return err;
1423         }
1424         mlx->flags &= ~cl_hton32(MTHCA_NEXT_SOLICIT | 1);
1425         mlx->flags |= cl_hton32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
1426                                   (sqp->ud_header.lrh.destination_lid ==
1427                                    IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
1428                                   (sqp->ud_header.lrh.service_level << 8));
1429         mlx->rlid = sqp->ud_header.lrh.destination_lid;
1430         mlx->vcrc = 0;
1431
1432         switch (opcode) {
1433         case MTHCA_OPCODE_SEND:
1434                 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1435                 sqp->ud_header.immediate_present = 0;
1436                 break;
1437         case MTHCA_OPCODE_SEND_IMM:
1438                 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1439                 sqp->ud_header.immediate_present = 1;
1440                 sqp->ud_header.immediate_data = wr->immediate_data;
1441                 break;
1442         default:
1443                 return -EINVAL;
1444         }
1445
1446         sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
1447         if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1448                 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1449         sqp->ud_header.bth.solicited_event = (u8)!!(wr->send_opt & IB_SEND_OPT_SOLICITED);
1450         if (!sqp->qp.ibqp.qp_num)
1451                 ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port,
1452                                    sqp->pkey_index, &pkey);
1453         else
1454                 ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port,
1455                                    wr->dgrm.ud.pkey_index, &pkey);
1456         sqp->ud_header.bth.pkey = cl_hton16(pkey);
1457         sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;
1458         sqp->ud_header.bth.psn = cl_hton32((sqp->send_psn++) & ((1 << 24) - 1));
1459         sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?
1460                                                cl_hton32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
1461         sqp->ud_header.deth.source_qpn = cl_hton32(sqp->qp.ibqp.qp_num);
1462
1463         header_size = ib_ud_header_pack(&sqp->ud_header,
1464                                         (u8*)sqp->sg.page +
1465                                         ind * MTHCA_UD_HEADER_SIZE);
1466
1467         data->byte_count = cl_hton32(header_size);
1468         data->lkey       = cl_hton32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
1469         data->addr       = CPU_2_BE64(sqp->sg.dma_address +
1470                                        ind * MTHCA_UD_HEADER_SIZE);
1471
1472         return 0;
1473 }
1474
1475 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
1476                                     struct ib_cq *ib_cq)
1477 {
1478         unsigned cur;
1479         struct mthca_cq *cq;
1480         SPIN_LOCK_PREP(lh);
1481
1482         cur = wq->head - wq->tail;
1483         if (likely((int)cur + nreq < wq->max))
1484                 return 0;
1485
1486         cq = to_mcq(ib_cq);
1487         spin_lock(&cq->lock, &lh);
1488         cur = wq->head - wq->tail;
1489         spin_unlock(&lh);
1490
1491         return (int)cur + nreq >= wq->max;
1492 }
1493
1494 int mthca_tavor_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr,
1495                           struct _ib_send_wr **bad_wr)
1496 {
1497         struct mthca_dev *dev = to_mdev(ibqp->device);
1498         struct mthca_qp *qp = to_mqp(ibqp);
1499         u8 *wqe;
1500         u8 *prev_wqe;
1501         int err = 0;
1502         int nreq;
1503         int i;
1504         int size;
1505         int size0 = 0;
1506         u32 f0 = 0;
1507         int ind;
1508         u8 op0 = 0;
1509         enum ib_wr_opcode opcode;
1510         SPIN_LOCK_PREP(lh);
1511
1512         spin_lock_irqsave(&qp->sq.lock, &lh);
1513
1514         /* XXX check that state is OK to post send */
1515
1516         ind = qp->sq.next_ind;
1517
1518         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
1519                 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1520                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail,"
1521                                         " %d max, %d nreq)\n", qp->qpn,
1522                                         qp->sq.head, qp->sq.tail,
1523                                         qp->sq.max, nreq));
1524                         err = -ENOMEM;
1525                         *bad_wr = wr;
1526                         goto out;
1527                 }
1528
1529                 wqe = get_send_wqe(qp, ind);
1530                 prev_wqe = qp->sq.last;
1531                 qp->sq.last = wqe;
1532                 opcode = conv_ibal_wr_opcode(wr);
1533
1534                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
1535                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
1536                 ((struct mthca_next_seg *) wqe)->flags =
1537                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
1538                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1539                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
1540                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
1541                         cl_hton32(1);
1542                 if (opcode == MTHCA_OPCODE_SEND_IMM||
1543                     opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
1544                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
1545
1546                 wqe += sizeof (struct mthca_next_seg);
1547                 size = sizeof (struct mthca_next_seg) / 16;
1548
1549                 switch (qp->transport) {
1550                 case RC:
1551                         switch (opcode) {
1552                         case MTHCA_OPCODE_ATOMIC_CS:
1553                         case MTHCA_OPCODE_ATOMIC_FA:
1554                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1555                                         cl_hton64(wr->remote_ops.vaddr);
1556                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1557                                         cl_hton32(wr->remote_ops.rkey);
1558                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1559
1560                                 wqe += sizeof (struct mthca_raddr_seg);
1561
1562                                 if (opcode == MTHCA_OPCODE_ATOMIC_CS) {
1563                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
1564                                                 cl_hton64(wr->remote_ops.atomic2);
1565                                         ((struct mthca_atomic_seg *) wqe)->compare =
1566                                                 cl_hton64(wr->remote_ops.atomic1);
1567                                 } else {
1568                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
1569                                                 cl_hton64(wr->remote_ops.atomic1);
1570                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
1571                                 }
1572
1573                                 wqe += sizeof (struct mthca_atomic_seg);
1574                                 size += (sizeof (struct mthca_raddr_seg) +
1575                                         sizeof (struct mthca_atomic_seg)) / 16 ;
1576                                 break;
1577
1578                         case MTHCA_OPCODE_RDMA_READ:
1579                         case MTHCA_OPCODE_RDMA_WRITE:
1580                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
1581                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1582                                         cl_hton64(wr->remote_ops.vaddr);
1583                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1584                                         cl_hton32(wr->remote_ops.rkey);
1585                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1586                                 wqe += sizeof (struct mthca_raddr_seg);
1587                                 size += sizeof (struct mthca_raddr_seg) / 16;
1588                                 break;
1589
1590                         default:
1591                                 /* No extra segments required for sends */
1592                                 break;
1593                         }
1594
1595                         break;
1596
1597                 case UC:
1598                         switch (opcode) {
1599                         case MTHCA_OPCODE_RDMA_WRITE:
1600                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
1601                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1602                                         cl_hton64(wr->remote_ops.vaddr);
1603                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1604                                         cl_hton32(wr->remote_ops.rkey);
1605                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1606                                 wqe += sizeof (struct mthca_raddr_seg);
1607                                 size += sizeof (struct mthca_raddr_seg) / 16;
1608                                 break;
1609
1610                         default:
1611                                 /* No extra segments required for sends */
1612                                 break;
1613                         }
1614
1615                         break;
1616
1617                 case UD:
1618                         ((struct mthca_tavor_ud_seg *) wqe)->lkey =
1619                                 cl_hton32(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->key);
1620                         ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
1621                                 cl_hton64(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->avdma);
1622                         ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
1623                         ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
1624
1625                         wqe += sizeof (struct mthca_tavor_ud_seg);
1626                         size += sizeof (struct mthca_tavor_ud_seg) / 16;
1627                         break;
1628
1629                 case MLX:
1630                         err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1631                                                (void*)(wqe - sizeof (struct mthca_next_seg)),
1632                                                (void*)wqe);
1633                         if (err) {
1634                                 *bad_wr = wr;
1635                                 goto out;
1636                         }
1637                         wqe += sizeof (struct mthca_data_seg);
1638                         size += sizeof (struct mthca_data_seg) / 16;
1639                         break;
1640                 }
1641
1642                 if ((int)(int)wr->num_ds > qp->sq.max_gs) {
1643                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x too many gathers\n",qp->qpn));
1644                         err = -EINVAL;
1645                         *bad_wr = wr;
1646                         goto out;
1647                 }
1648
1649                 for (i = 0; i < (int)wr->num_ds; ++i) {
1650                         ((struct mthca_data_seg *) wqe)->byte_count =
1651                                 cl_hton32(wr->ds_array[i].length);
1652                         ((struct mthca_data_seg *) wqe)->lkey =
1653                                 cl_hton32(wr->ds_array[i].lkey);
1654                         ((struct mthca_data_seg *) wqe)->addr =
1655                                 cl_hton64(wr->ds_array[i].vaddr);
1656                         wqe += sizeof (struct mthca_data_seg);
1657                         size += sizeof (struct mthca_data_seg) / 16;
1658                         HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,("SQ %06x [%02x]  lkey 0x%08x vaddr 0x%I64x 0x%x\n",qp->qpn,i,
1659                                 (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length));
1660                 }
1661
1662                 /* Add one more inline data segment for ICRC */
1663                 if (qp->transport == MLX) {
1664                         ((struct mthca_data_seg *) wqe)->byte_count =
1665                                 cl_hton32((unsigned long)((1 << 31) | 4));
1666                         ((u32 *) wqe)[1] = 0;
1667                         wqe += sizeof (struct mthca_data_seg);
1668                         size += sizeof (struct mthca_data_seg) / 16;
1669                 }
1670
1671                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
1672
1673                 if (opcode == MTHCA_OPCODE_INVALID) {
1674                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn));
1675                         err = -EINVAL;
1676                         *bad_wr = wr;
1677                         goto out;
1678                 }
1679
1680                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
1681                         cl_hton32(((ind << qp->sq.wqe_shift) +  
1682                         qp->send_wqe_offset) |opcode);
1683                 wmb();
1684                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1685                         cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
1686                                 ((wr->send_opt & IB_SEND_OPT_FENCE) ?
1687                                 MTHCA_NEXT_FENCE : 0));
1688
1689                 if (!size0) {
1690                         size0 = size;
1691                         op0   = opcode;
1692                 }
1693
1694                 dump_wqe( TRACE_LEVEL_VERBOSE, (u32*)qp->sq.last,qp);
1695
1696                 ++ind;
1697                 if (unlikely(ind >= qp->sq.max))
1698                         ind -= qp->sq.max;
1699         }
1700
1701 out:
1702         if (likely(nreq)) {
1703                 __be32 doorbell[2];
1704
1705                 doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) +
1706                                            qp->send_wqe_offset) | f0 | op0);
1707                 doorbell[1] = cl_hton32((qp->qpn << 8) | size0);
1708
1709                 wmb();
1710
1711                 mthca_write64(doorbell,
1712                               dev->kar + MTHCA_SEND_DOORBELL,
1713                               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1714         }
1715
1716         qp->sq.next_ind = ind;
1717         qp->sq.head    += nreq;
1718
1719         spin_unlock_irqrestore(&lh);
1720         return err;
1721 }
1722
1723 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr,
1724                              struct _ib_recv_wr **bad_wr)
1725 {
1726         struct mthca_dev *dev = to_mdev(ibqp->device);
1727         struct mthca_qp *qp = to_mqp(ibqp);
1728         __be32 doorbell[2];
1729         int err = 0;
1730         int nreq;
1731         int i;
1732         int size;
1733         int size0 = 0;
1734         int ind;
1735         u8 *wqe;
1736         u8 *prev_wqe;
1737         SPIN_LOCK_PREP(lh);
1738
1739         spin_lock_irqsave(&qp->rq.lock, &lh);
1740
1741         /* XXX check that state is OK to post receive */
1742
1743         ind = qp->rq.next_ind;
1744
1745         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
1746                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
1747                         nreq = 0;
1748
1749                         doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1750                         doorbell[1] = cl_hton32(qp->qpn << 8);
1751
1752                         wmb();
1753
1754                         mthca_write64(doorbell, dev->kar + MTHCA_RECEIVE_DOORBELL,
1755                       MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1756
1757                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
1758                         size0 = 0;
1759                 }
1760                 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1761                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail,"
1762                                         " %d max, %d nreq)\n", qp->qpn,
1763                                         qp->rq.head, qp->rq.tail,
1764                                         qp->rq.max, nreq));
1765                         err = -ENOMEM;
1766                         *bad_wr = wr;
1767                         goto out;
1768                 }
1769
1770                 wqe = get_recv_wqe(qp, ind);
1771                 prev_wqe = qp->rq.last;
1772                 qp->rq.last = wqe;
1773
1774                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
1775                 ((struct mthca_next_seg *) wqe)->ee_nds =
1776                         cl_hton32(MTHCA_NEXT_DBD);
1777                 ((struct mthca_next_seg *) wqe)->flags = 0;
1778
1779                 wqe += sizeof (struct mthca_next_seg);
1780                 size = sizeof (struct mthca_next_seg) / 16;
1781
1782                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
1783                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("RQ %06x too many gathers\n",qp->qpn));
1784                         err = -EINVAL;
1785                         *bad_wr = wr;
1786                         goto out;
1787                 }
1788
1789                 for (i = 0; i < (int)wr->num_ds; ++i) {
1790                         ((struct mthca_data_seg *) wqe)->byte_count =
1791                                 cl_hton32(wr->ds_array[i].length);
1792                         ((struct mthca_data_seg *) wqe)->lkey =
1793                                 cl_hton32(wr->ds_array[i].lkey);
1794                         ((struct mthca_data_seg *) wqe)->addr =
1795                                 cl_hton64(wr->ds_array[i].vaddr);
1796                         wqe += sizeof (struct mthca_data_seg);
1797                         size += sizeof (struct mthca_data_seg) / 16;
1798 //                      HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("RQ %06x [%02x]  lkey 0x%08x vaddr 0x%I64x 0x %x 0x%08x\n",i,qp->qpn,
1799 //                              (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length, wr->wr_id));
1800                 }
1801
1802                 qp->wrid[ind] = wr->wr_id;
1803
1804                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
1805                         cl_hton32((ind << qp->rq.wqe_shift) | 1);
1806                 wmb();
1807                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1808                         cl_hton32(MTHCA_NEXT_DBD | size);
1809
1810                 if (!size0)
1811                         size0 = size;
1812
1813                 dump_wqe(TRACE_LEVEL_VERBOSE,  (u32*)wqe ,qp);
1814                 
1815                 ++ind;
1816                 if (unlikely(ind >= qp->rq.max))
1817                         ind -= qp->rq.max;
1818         }
1819
1820 out:
1821         if (likely(nreq)) {
1822                 doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1823                 doorbell[1] = cl_hton32((qp->qpn << 8) | nreq);
1824
1825                 wmb();
1826
1827                 mthca_write64(doorbell, dev->kar + MTHCA_RECEIVE_DOORBELL,
1828               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1829         }
1830
1831         qp->rq.next_ind = ind;
1832         qp->rq.head    += nreq;
1833
1834         spin_unlock_irqrestore(&lh);
1835         return err;
1836 }
1837
1838 int mthca_arbel_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr,
1839                           struct _ib_send_wr **bad_wr)
1840 {
1841         struct mthca_dev *dev = to_mdev(ibqp->device);
1842         struct mthca_qp *qp = to_mqp(ibqp);
1843         __be32 doorbell[2];
1844         u8 *wqe;
1845         u8 *prev_wqe;
1846         int err = 0;
1847         int nreq;
1848         int i;
1849         int size;
1850         int size0 = 0;
1851         u32 f0 = 0;
1852         int ind;
1853         u8 op0 = 0;
1854         enum ib_wr_opcode opcode;
1855         SPIN_LOCK_PREP(lh);
1856
1857         spin_lock_irqsave(&qp->sq.lock, &lh);
1858
1859         /* XXX check that state is OK to post send */
1860
1861         ind = qp->sq.head & (qp->sq.max - 1);
1862
1863         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
1864                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
1865                         nreq = 0;
1866                         doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
1867                                 ((qp->sq.head & 0xffff) << 8) |f0 | op0);
1868                         doorbell[1] = cl_hton32((qp->qpn << 8) | size0);
1869                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
1870                         size0 = 0;
1871
1872                         /*
1873                          * Make sure that descriptors are written before
1874                          * doorbell record.
1875                          */
1876                         wmb();
1877                         *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
1878
1879                         /*
1880                          * Make sure doorbell record is written before we
1881                          * write MMIO send doorbell.
1882                          */
1883                         wmb();
1884                         mthca_write64(doorbell, dev->kar + MTHCA_SEND_DOORBELL,
1885                                 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1886                 }
1887
1888                 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1889                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail,"
1890                                         " %d max, %d nreq)\n", qp->qpn,
1891                                         qp->sq.head, qp->sq.tail,
1892                                         qp->sq.max, nreq));
1893                         err = -ENOMEM;
1894                         *bad_wr = wr;
1895                         goto out;
1896                 }
1897
1898                 wqe = get_send_wqe(qp, ind);
1899                 prev_wqe = qp->sq.last;
1900                 qp->sq.last = wqe;
1901                 opcode = conv_ibal_wr_opcode(wr);
1902
1903                 ((struct mthca_next_seg *) wqe)->flags =
1904                         ((wr->send_opt & IB_SEND_OPT_SIGNALED) ?
1905                          cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1906                         ((wr->send_opt & IB_SEND_OPT_SOLICITED) ?
1907                          cl_hton32(MTHCA_NEXT_SOLICIT) : 0)   |
1908                         cl_hton32(1);
1909                 if (opcode == MTHCA_OPCODE_SEND_IMM||
1910                         opcode == MTHCA_OPCODE_RDMA_WRITE_IMM)
1911                         ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data;
1912
1913                 wqe += sizeof (struct mthca_next_seg);
1914                 size = sizeof (struct mthca_next_seg) / 16;
1915
1916                 switch (qp->transport) {
1917                 case RC:
1918                         switch (opcode) {
1919                         case MTHCA_OPCODE_ATOMIC_CS:
1920                         case MTHCA_OPCODE_ATOMIC_FA:
1921                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1922                                         cl_hton64(wr->remote_ops.vaddr);
1923                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1924                                         cl_hton32(wr->remote_ops.rkey);
1925                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1926
1927                                 wqe += sizeof (struct mthca_raddr_seg);
1928
1929                                 if (opcode == MTHCA_OPCODE_ATOMIC_FA) {
1930                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
1931                                                 cl_hton64(wr->remote_ops.atomic2);
1932                                         ((struct mthca_atomic_seg *) wqe)->compare =
1933                                                 cl_hton64(wr->remote_ops.atomic1);
1934                                 } else {
1935                                         ((struct mthca_atomic_seg *) wqe)->swap_add =
1936                                                 cl_hton64(wr->remote_ops.atomic1);
1937                                         ((struct mthca_atomic_seg *) wqe)->compare = 0;
1938                                 }
1939
1940                                 wqe += sizeof (struct mthca_atomic_seg);
1941                                 size += (sizeof (struct mthca_raddr_seg) +
1942                                         sizeof (struct mthca_atomic_seg)) / 16 ;
1943                                 break;
1944
1945                         case MTHCA_OPCODE_RDMA_READ:
1946                         case MTHCA_OPCODE_RDMA_WRITE:
1947                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
1948                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1949                                         cl_hton64(wr->remote_ops.vaddr);
1950                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1951                                         cl_hton32(wr->remote_ops.rkey);
1952                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1953                                 wqe += sizeof (struct mthca_raddr_seg);
1954                                 size += sizeof (struct mthca_raddr_seg) / 16;
1955                                 break;
1956
1957                         default:
1958                                 /* No extra segments required for sends */
1959                                 break;
1960                         }
1961
1962                         break;
1963
1964                 case UC:
1965                         switch (opcode) {
1966                         case MTHCA_OPCODE_RDMA_WRITE:
1967                         case MTHCA_OPCODE_RDMA_WRITE_IMM:
1968                                 ((struct mthca_raddr_seg *) wqe)->raddr =
1969                                         cl_hton64(wr->remote_ops.vaddr);
1970                                 ((struct mthca_raddr_seg *) wqe)->rkey =
1971                                         cl_hton32(wr->remote_ops.rkey);
1972                                 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1973                                 wqe += sizeof (struct mthca_raddr_seg);
1974                                 size += sizeof (struct mthca_raddr_seg) / 16;
1975                                 break;
1976
1977                         default:
1978                                 /* No extra segments required for sends */
1979                                 break;
1980                         }
1981
1982                         break;
1983
1984                 case UD:
1985                         memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
1986                                to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, MTHCA_AV_SIZE);
1987                         ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp;
1988                         ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey;
1989
1990                         wqe += sizeof (struct mthca_arbel_ud_seg);
1991                         size += sizeof (struct mthca_arbel_ud_seg) / 16;
1992                         break;
1993
1994                 case MLX:
1995                         err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1996                                                (void*)(wqe - sizeof (struct mthca_next_seg)),
1997                                                (void*)wqe);
1998                         if (err) {
1999                                 *bad_wr = wr;
2000                                 goto out;
2001                         }
2002                         wqe += sizeof (struct mthca_data_seg);
2003                         size += sizeof (struct mthca_data_seg) / 16;
2004                         break;
2005                 }
2006
2007                 if ((int)wr->num_ds > qp->sq.max_gs) {
2008                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x full too many gathers\n",qp->qpn));
2009                         err = -EINVAL;
2010                         *bad_wr = wr;
2011                         goto out;
2012                 }
2013
2014                 for (i = 0; i < (int)wr->num_ds; ++i) {
2015                         ((struct mthca_data_seg *) wqe)->byte_count =
2016                                 cl_hton32(wr->ds_array[i].length);
2017                         ((struct mthca_data_seg *) wqe)->lkey =
2018                                 cl_hton32(wr->ds_array[i].lkey);
2019                         ((struct mthca_data_seg *) wqe)->addr =
2020                                 cl_hton64(wr->ds_array[i].vaddr);
2021                         wqe += sizeof (struct mthca_data_seg);
2022                         size += sizeof (struct mthca_data_seg) / 16;
2023                 }
2024
2025                 /* Add one more inline data segment for ICRC */
2026                 if (qp->transport == MLX) {
2027                         ((struct mthca_data_seg *) wqe)->byte_count =
2028                                 cl_hton32((unsigned long)((1 << 31) | 4));
2029                         ((u32 *) wqe)[1] = 0;
2030                         wqe += sizeof (struct mthca_data_seg);
2031                         size += sizeof (struct mthca_data_seg) / 16;
2032                 }
2033
2034                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
2035
2036                 if (opcode == MTHCA_OPCODE_INVALID) {
2037                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn));
2038                         err = -EINVAL;
2039                         *bad_wr = wr;
2040                         goto out;
2041                 }
2042
2043                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
2044                         cl_hton32(((ind << qp->sq.wqe_shift) +
2045                         qp->send_wqe_offset) |opcode);
2046                 wmb();
2047                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
2048                         cl_hton32(MTHCA_NEXT_DBD | size |
2049                         ((wr->send_opt & IB_SEND_OPT_FENCE) ?
2050                         MTHCA_NEXT_FENCE : 0));
2051                 
2052                 if (!size0) {
2053                         size0 = size;
2054                         op0   = opcode;
2055                 }
2056
2057                 ++ind;
2058                 if (unlikely(ind >= qp->sq.max))
2059                         ind -= qp->sq.max;
2060         }
2061
2062 out:
2063         if (likely(nreq)) {
2064                 doorbell[0] = cl_hton32((nreq << 24) |
2065                         ((qp->sq.head & 0xffff) << 8) |f0 | op0);
2066                 doorbell[1] = cl_hton32((qp->qpn << 8) | size0);
2067                 qp->sq.head += nreq;
2068
2069                 /*
2070                  * Make sure that descriptors are written before
2071                  * doorbell record.
2072                  */
2073                 wmb();
2074                 *qp->sq.db = cl_hton32(qp->sq.head & 0xffff);
2075
2076                 /*
2077                  * Make sure doorbell record is written before we
2078                  * write MMIO send doorbell.
2079                  */
2080                 wmb();
2081                 mthca_write64(doorbell,
2082                               dev->kar + MTHCA_SEND_DOORBELL,
2083                               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
2084         }
2085
2086         spin_unlock_irqrestore(&lh);
2087         return err;
2088 }
2089
2090 int mthca_arbel_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr,
2091                              struct _ib_recv_wr **bad_wr)
2092 {
2093         struct mthca_qp *qp = to_mqp(ibqp);
2094         int err = 0;
2095         int nreq;
2096         int ind;
2097         int i;
2098         u8 *wqe;
2099         SPIN_LOCK_PREP(lh);
2100
2101         spin_lock_irqsave(&qp->rq.lock, &lh);
2102
2103         /* XXX check that state is OK to post receive */
2104
2105         ind = qp->rq.head & (qp->rq.max - 1);
2106
2107         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
2108                 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2109                         HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail,"
2110                                         " %d max, %d nreq)\n", qp->qpn,
2111                                         qp->rq.head, qp->rq.tail,
2112                                         qp->rq.max, nreq));
2113                         err = -ENOMEM;
2114                         *bad_wr = wr;
2115                         goto out;
2116                 }
2117
2118                 wqe = get_recv_wqe(qp, ind);
2119
2120                 ((struct mthca_next_seg *) wqe)->flags = 0;
2121
2122                 wqe += sizeof (struct mthca_next_seg);
2123
2124                 if (unlikely((int)wr->num_ds > qp->rq.max_gs)) {
2125                         HCA_PRINT(TRACE_LEVEL_ERROR  ,HCA_DBG_QP ,("RQ %06x full too many scatter\n",qp->qpn));
2126                         err = -EINVAL;
2127                         *bad_wr = wr;
2128                         goto out;
2129                 }
2130
2131                 for (i = 0; i < (int)wr->num_ds; ++i) {
2132                         ((struct mthca_data_seg *) wqe)->byte_count =
2133                                 cl_hton32(wr->ds_array[i].length);
2134                         ((struct mthca_data_seg *) wqe)->lkey =
2135                                 cl_hton32(wr->ds_array[i].lkey);
2136                         ((struct mthca_data_seg *) wqe)->addr =
2137                                 cl_hton64(wr->ds_array[i].vaddr);
2138                         wqe += sizeof (struct mthca_data_seg);
2139                 }
2140
2141                 if (i < qp->rq.max_gs) {
2142                         ((struct mthca_data_seg *) wqe)->byte_count = 0;
2143                         ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY);
2144                         ((struct mthca_data_seg *) wqe)->addr = 0;
2145                 }
2146
2147                 qp->wrid[ind] = wr->wr_id;
2148
2149                 ++ind;
2150                 if (unlikely(ind >= qp->rq.max))
2151                         ind -= qp->rq.max;
2152         }
2153 out:
2154         if (likely(nreq)) {
2155                 qp->rq.head += nreq;
2156
2157                 /*
2158                  * Make sure that descriptors are written before
2159                  * doorbell record.
2160                  */
2161                 wmb();
2162                 *qp->rq.db = cl_hton32(qp->rq.head & 0xffff);
2163         }
2164
2165         spin_unlock_irqrestore(&lh);
2166         return err;
2167 }
2168
2169 void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
2170                        int index, int *dbd, __be32 *new_wqe)
2171 {
2172         struct mthca_next_seg *next;
2173
2174         UNREFERENCED_PARAMETER(dev);
2175         
2176         /*
2177          * For SRQs, all WQEs generate a CQE, so we're always at the
2178          * end of the doorbell chain.
2179          */
2180         if (qp->ibqp.srq) {
2181                 *new_wqe = 0;
2182                 return;
2183         }
2184
2185         if (is_send)
2186                 next = get_send_wqe(qp, index);
2187         else
2188                 next = get_recv_wqe(qp, index);
2189
2190         *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD));
2191         if (next->ee_nds & cl_hton32(0x3f))
2192                 *new_wqe = (next->nda_op & cl_hton32((unsigned long)~0x3f)) |
2193                         (next->ee_nds & cl_hton32(0x3f));
2194         else
2195                 *new_wqe = 0;
2196 }
2197
2198 int mthca_init_qp_table(struct mthca_dev *dev)
2199 {
2200         int err;
2201         u8 status;
2202         int i;
2203
2204         spin_lock_init(&dev->qp_table.lock);
2205         fill_state_table();
2206
2207         /*
2208          * We reserve 2 extra QPs per port for the special QPs.  The
2209          * special QP for port 1 has to be even, so round up.
2210          */
2211         dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
2212         err = mthca_alloc_init(&dev->qp_table.alloc,
2213                                dev->limits.num_qps,
2214                                (1 << 24) - 1,
2215                                dev->qp_table.sqp_start +
2216                                MTHCA_MAX_PORTS * 2);
2217         if (err)
2218                 return err;
2219
2220         err = mthca_array_init(&dev->qp_table.qp,
2221                                dev->limits.num_qps);
2222         if (err) {
2223                 mthca_alloc_cleanup(&dev->qp_table.alloc);
2224                 return err;
2225         }
2226
2227         for (i = 0; i < 2; ++i) {
2228                 err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_QP1 : IB_QPT_QP0,
2229                                             dev->qp_table.sqp_start + i * 2,
2230                                             &status);
2231                 if (err)
2232                         goto err_out;
2233                 if (status) {
2234                         HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("CONF_SPECIAL_QP returned "
2235                                    "status %02x, aborting.\n",
2236                                    status));
2237                         err = -EINVAL;
2238                         goto err_out;
2239                 }
2240         }
2241         return 0;
2242
2243  err_out:
2244         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP1, 0, &status);
2245         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP0, 0, &status);
2246
2247         mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2248         mthca_alloc_cleanup(&dev->qp_table.alloc);
2249
2250         return err;
2251 }
2252
2253 void mthca_cleanup_qp_table(struct mthca_dev *dev)
2254 {
2255         u8 status;
2256
2257         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP1, 0, &status);
2258         mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP0, 0, &status);
2259
2260         mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2261         mthca_alloc_cleanup(&dev->qp_table.alloc);
2262 }
2263