16527d2ad6a3a752da9181c0089577ceb68a94ac
[mirror/winof/.git] / hw / mlx4 / kernel / bus / ib / qp.c
1 /*\r
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.\r
3  *\r
4  * This software is available to you under a choice of one of two\r
5  * licenses.  You may choose to be licensed under the terms of the GNU\r
6  * General Public License (GPL) Version 2, available from the file\r
7  * COPYING in the main directory of this source tree, or the\r
8  * OpenIB.org BSD license below:\r
9  *\r
10  *     Redistribution and use in source and binary forms, with or\r
11  *     without modification, are permitted provided that the following\r
12  *     conditions are met:\r
13  *\r
14  *      - Redistributions of source code must retain the above\r
15  *        copyright notice, this list of conditions and the following\r
16  *        disclaimer.\r
17  *\r
18  *      - Redistributions in binary form must reproduce the above\r
19  *        copyright notice, this list of conditions and the following\r
20  *        disclaimer in the documentation and/or other materials\r
21  *        provided with the distribution.\r
22  *\r
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
30  * SOFTWARE.\r
31  */\r
32 \r
33 #include "mlx4_ib.h"\r
34 #include "ib_cache.h"\r
35 #include "ib_pack.h"\r
36 #include "qp.h"\r
37 #include "user.h"\r
38 \r
39 enum {\r
40         MLX4_IB_ACK_REQ_FREQ    = 8,\r
41 };\r
42 \r
43 enum {\r
44         MLX4_IB_DEFAULT_SCHED_QUEUE     = 0x83,\r
45         MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f\r
46 };\r
47 \r
48 enum {\r
49         /*\r
50          * Largest possible UD header: send with GRH and immediate data.\r
51          */\r
52         MLX4_IB_UD_HEADER_SIZE          = 72\r
53 };\r
54 \r
55 struct mlx4_ib_sqp {\r
56         struct mlx4_ib_qp       qp;\r
57         int                     pkey_index;\r
58         u32                     qkey;\r
59         u32                     send_psn;\r
60         struct ib_ud_header     ud_header;\r
61         u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];\r
62 };\r
63 \r
64 enum {\r
65         MLX4_IB_MIN_SQ_STRIDE = 6\r
66 };\r
67 \r
68 static const __be32 mlx4_ib_opcode[] = {\r
69         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),         /*      [IB_WR_RDMA_WRITE]                      */\r
70         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),     /*      [IB_WR_RDMA_WRITE_WITH_IMM] */\r
71         __constant_cpu_to_be32(MLX4_OPCODE_SEND),                       /*      [IB_WR_SEND]                            */\r
72         __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),           /*      [IB_WR_SEND_WITH_IMM]           */\r
73         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),          /*      [IB_WR_RDMA_READ]                       */\r
74         __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),          /*      [IB_WR_ATOMIC_CMP_AND_SWP]      */\r
75         __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),          /*      [IB_WR_ATOMIC_FETCH_AND_ADD]*/\r
76         __constant_cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6))                              /* [IB_WR_LSO]                                  */\r
77 };\r
78 \r
79 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)\r
80 {\r
81         return container_of(mqp, struct mlx4_ib_sqp, qp);\r
82 }\r
83 \r
84 static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
85 {\r
86         return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
87                 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;\r
88 }\r
89 \r
90 static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
91 {\r
92         return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
93                 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;\r
94 }\r
95 \r
96 static void *get_wqe(struct mlx4_ib_qp *qp, int offset)\r
97 {\r
98         if (qp->buf.nbufs == 1)\r
99                 return qp->buf.u.direct.buf + offset;\r
100         else\r
101                 return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +\r
102                         (offset & (PAGE_SIZE - 1));\r
103 }\r
104 \r
105 static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)\r
106 {\r
107         return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));\r
108 }\r
109 \r
110 static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)\r
111 {\r
112         return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));\r
113 }\r
114 \r
115 /*\r
116  * Stamp a SQ WQE so that it is invalid if prefetched by marking the\r
117  * first four bytes of every 64 byte chunk with 0xffffffff, except for\r
118  * the very first chunk of the WQE.\r
119  */\r
120 static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)\r
121 {\r
122         u32 *wqe = get_send_wqe(qp, n);\r
123         int i;\r
124 \r
125         for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)\r
126                 wqe[i] = 0xffffffff;\r
127 }\r
128 \r
129 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)\r
130 {\r
131         ib_event_rec_t event;\r
132         struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;\r
133 \r
134         if (type == MLX4_EVENT_TYPE_PATH_MIG)\r
135                 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;\r
136 \r
137         switch (type) {\r
138         case MLX4_EVENT_TYPE_PATH_MIG:\r
139                 event.type = IB_EVENT_PATH_MIG;\r
140                 break;\r
141         case MLX4_EVENT_TYPE_COMM_EST:\r
142                 event.type = IB_EVENT_COMM_EST;\r
143                 break;\r
144         case MLX4_EVENT_TYPE_SQ_DRAINED:\r
145                 event.type = IB_EVENT_SQ_DRAINED;\r
146                 break;\r
147         case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:\r
148                 event.type = IB_EVENT_QP_LAST_WQE_REACHED;\r
149                 break;\r
150         case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:\r
151                 event.type = IB_EVENT_QP_FATAL;\r
152                 break;\r
153         case MLX4_EVENT_TYPE_PATH_MIG_FAILED:\r
154                 event.type = IB_EVENT_PATH_MIG_ERR;\r
155                 break;\r
156         case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:\r
157                 event.type = IB_EVENT_QP_REQ_ERR;\r
158                 break;\r
159         case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:\r
160                 event.type = IB_EVENT_QP_ACCESS_ERR;\r
161                 break;\r
162         default:\r
163                 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "\r
164                        "on QP %06x\n", type, qp->qpn);\r
165                 return;\r
166         }\r
167 \r
168         event.context = ibqp->qp_context;\r
169         ibqp->event_handler(&event);\r
170 }\r
171 \r
172 static int send_wqe_overhead(enum ib_qp_type type, u32 flags)\r
173 {\r
174         /*\r
175          * UD WQEs must have a datagram segment.\r
176          * RC and UC WQEs might have a remote address segment.\r
177          * MLX WQEs need two extra inline data segments (for the UD\r
178          * header and space for the ICRC).\r
179          */\r
180         switch (type) {\r
181         case IB_QPT_UD:\r
182                 return sizeof (struct mlx4_wqe_ctrl_seg)  +\r
183                         sizeof (struct mlx4_wqe_datagram_seg) +\r
184                         ((flags & MLX4_IB_QP_LSO) ? 64 : 0);\r
185         case IB_QPT_UC:\r
186                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
187                         sizeof (struct mlx4_wqe_raddr_seg);\r
188         case IB_QPT_RC:\r
189                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
190                         sizeof (struct mlx4_wqe_atomic_seg) +\r
191                         sizeof (struct mlx4_wqe_raddr_seg);\r
192         case IB_QPT_SMI:\r
193         case IB_QPT_GSI:\r
194                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
195                         ALIGN(MLX4_IB_UD_HEADER_SIZE +\r
196                               DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,\r
197                                            MLX4_INLINE_ALIGN) *\r
198                               sizeof (struct mlx4_wqe_inline_seg),\r
199                               sizeof (struct mlx4_wqe_data_seg)) +\r
200                         ALIGN(4 +\r
201                               sizeof (struct mlx4_wqe_inline_seg),\r
202                               sizeof (struct mlx4_wqe_data_seg));\r
203         default:\r
204                 return sizeof (struct mlx4_wqe_ctrl_seg);\r
205         }\r
206 }\r
207 \r
208 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
209                        int is_user, int has_srq, struct mlx4_ib_qp *qp)\r
210 {\r
211         /* Sanity check RQ size before proceeding */\r
212         if ((int)cap->max_recv_wr  > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||\r
213             (int)cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))\r
214                 return -EINVAL;\r
215 \r
216         if (has_srq) {\r
217                 /* QPs attached to an SRQ should have no RQ */\r
218                 if (cap->max_recv_wr)\r
219                         return -EINVAL;\r
220 \r
221                 qp->rq.wqe_cnt = qp->rq.max_gs = 0;\r
222         } else {\r
223                 /* HW requires >= 1 RQ entry with >= 1 gather entry */\r
224                 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))\r
225                         return -EINVAL;\r
226 \r
227                 qp->rq.wqe_cnt   = roundup_pow_of_two(max(1U, cap->max_recv_wr));\r
228                 qp->rq.max_gs    = roundup_pow_of_two(max(1U, cap->max_recv_sge));\r
229                 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));\r
230         }\r
231 \r
232         /* leave userspace return values as they were, so as not to break ABI */\r
233         if (is_user) {\r
234                 cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;\r
235                 cap->max_recv_sge = qp->rq.max_gs;\r
236         } else {\r
237                 cap->max_recv_wr  = qp->rq.max_post =\r
238                         min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);\r
239                 cap->max_recv_sge = min(qp->rq.max_gs,\r
240                                         min(dev->dev->caps.max_sq_sg,\r
241                                         dev->dev->caps.max_rq_sg));\r
242         }\r
243         /* We don't support inline sends for kernel QPs (yet) */\r
244 \r
245         return 0;\r
246 }\r
247 \r
248 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
249                               enum ib_qp_type type, struct mlx4_ib_qp *qp)\r
250 {\r
251         /* Sanity check SQ size before proceeding */\r
252         if ((int)cap->max_send_wr       > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE  ||\r
253             (int)cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||\r
254             (int)cap->max_inline_data + send_wqe_overhead(type, qp->flags) +\r
255             (int)sizeof(struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)\r
256                 return -EINVAL;\r
257 \r
258         /*\r
259          * For MLX transport we need 2 extra S/G entries:\r
260          * one for the header and one for the checksum at the end\r
261          */\r
262         if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&\r
263             (int)cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)\r
264                 return -EINVAL;\r
265 \r
266         qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *\r
267                                                         sizeof (struct mlx4_wqe_data_seg),\r
268                                                         cap->max_inline_data +\r
269                                                         sizeof (struct mlx4_wqe_inline_seg)) +\r
270                                                     send_wqe_overhead(type,qp->flags)));\r
271         qp->sq.wqe_shift = max(MLX4_IB_SQ_MIN_WQE_SHIFT, qp->sq.wqe_shift);\r
272         qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type,qp->flags)) /\r
273                 sizeof (struct mlx4_wqe_data_seg);\r
274 \r
275         /*\r
276          * We need to leave 2 KB + 1 WQE of headroom in the SQ to\r
277          * allow HW to prefetch.\r
278          */\r
279         qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);\r
280         qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);\r
281 \r
282         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
283                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
284         if (qp->rq.wqe_shift > qp->sq.wqe_shift) {\r
285                 qp->rq.offset = 0;\r
286                 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;\r
287         } else {\r
288                 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;\r
289                 qp->sq.offset = 0;\r
290         }\r
291 \r
292         cap->max_send_wr = qp->sq.max_post =\r
293                 min(qp->sq.wqe_cnt - qp->sq_spare_wqes,\r
294                         dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE);\r
295         cap->max_send_sge = min(qp->sq.max_gs,\r
296                                 min(dev->dev->caps.max_sq_sg,\r
297                                         dev->dev->caps.max_rq_sg));\r
298         /* We don't support inline sends for kernel QPs (yet) */\r
299         cap->max_inline_data = 0;\r
300 \r
301         return 0;\r
302 }\r
303 \r
304 static int set_user_sq_size(struct mlx4_ib_dev *dev,\r
305                             struct mlx4_ib_qp *qp,\r
306                             struct mlx4_ib_create_qp *ucmd)\r
307 {\r
308         /* Sanity check SQ size before proceeding */\r
309         if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes       ||\r
310             ucmd->log_sq_stride >\r
311                 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||\r
312             ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)\r
313                 return -EINVAL;\r
314 \r
315         qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;\r
316         qp->sq.wqe_shift = ucmd->log_sq_stride;\r
317 \r
318         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
319                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
320 \r
321         return 0;\r
322 }\r
323 \r
324 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,\r
325                             struct ib_qp_init_attr *init_attr,\r
326                             struct ib_udata *udata, u32 sqpn, struct mlx4_ib_qp *qp)\r
327 {\r
328         int err;\r
329 \r
330         mutex_init(&qp->mutex);\r
331         spin_lock_init(&qp->sq.lock);\r
332         spin_lock_init(&qp->rq.lock);\r
333 \r
334         qp->state        = XIB_QPS_RESET;\r
335         qp->atomic_rd_en = 0;\r
336         qp->resp_depth   = 0;\r
337 \r
338         qp->rq.head         = 0;\r
339         qp->rq.tail         = 0;\r
340         qp->sq.head         = 0;\r
341         qp->sq.tail         = 0;\r
342 \r
343         err = set_rq_size(dev, &init_attr->cap, !!pd->p_uctx, !!init_attr->srq, qp);\r
344         if (err)\r
345                 goto err;\r
346 \r
347         if (pd->p_uctx) {\r
348                 struct mlx4_ib_create_qp ucmd;\r
349 \r
350                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {\r
351                         err = -EFAULT;\r
352                         goto err;\r
353                 }\r
354 \r
355                 qp->sq_no_prefetch = ucmd.sq_no_prefetch;\r
356 \r
357                 err = set_user_sq_size(dev, qp, &ucmd);\r
358                 if (err)\r
359                         goto err;\r
360 \r
361                 qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,\r
362                                        qp->buf_size, 0, FALSE);\r
363                 if (IS_ERR(qp->umem)) {\r
364                         err = PTR_ERR(qp->umem);\r
365                         goto err;\r
366                 }\r
367 \r
368                 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),\r
369                                     ilog2(qp->umem->page_size), &qp->mtt);\r
370                 if (err)\r
371                         goto err_buf;\r
372 \r
373                 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);\r
374                 if (err)\r
375                         goto err_mtt;\r
376 \r
377                 if (!init_attr->srq) {\r
378                         err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx),\r
379                                                   ucmd.db_addr, &qp->db);\r
380                         if (err)\r
381                                 goto err_mtt;\r
382                 }\r
383         } else {\r
384                 qp->sq_no_prefetch = 0;\r
385                 \r
386                 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)\r
387                         qp->flags |= MLX4_IB_QP_LSO;\r
388                 \r
389                 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);\r
390                 if (err)\r
391                         goto err;\r
392 \r
393                 if (!init_attr->srq) {\r
394                         err = mlx4_ib_db_alloc(dev, &qp->db, 0);\r
395                         if (err)\r
396                                 goto err;\r
397 \r
398                         *qp->db.db = 0;\r
399                 }\r
400 \r
401                 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {\r
402                         err = -ENOMEM;\r
403                         goto err_db;\r
404                 }\r
405 \r
406                 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,\r
407                                     &qp->mtt);\r
408                 if (err)\r
409                         goto err_buf;\r
410 \r
411                 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);\r
412                 if (err)\r
413                         goto err_mtt;\r
414 \r
415                 if (qp->sq.wqe_cnt) {\r
416                         qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
417                         if (!qp->sq.wrid) {\r
418                                 err = -ENOMEM;\r
419                                 goto err_wrid;\r
420                         }\r
421                 }                       \r
422 \r
423                 if (qp->rq.wqe_cnt) {\r
424                         qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
425                         if (!qp->rq.wrid) {\r
426                                 err = -ENOMEM;\r
427                                 goto err_wrid;\r
428                         }\r
429                 }\r
430         }\r
431 \r
432         if (!sqpn)\r
433                 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn);\r
434         if (err)\r
435                 goto err_wrid;\r
436 \r
437         err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);\r
438         if (err)\r
439                 goto err_wrid;\r
440 \r
441         if (err) {\r
442                 mlx4_qp_release_range(dev->dev, sqpn, 1);\r
443                 goto err_wrid;\r
444         }\r
445 \r
446         /*\r
447          * Hardware wants QPN written in big-endian order (after\r
448          * shifting) for send doorbell.  Precompute this value to save\r
449          * a little bit when posting sends.\r
450          */\r
451         qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);\r
452 \r
453         if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)\r
454                 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
455         else\r
456                 qp->sq_signal_bits = 0;\r
457 \r
458         qp->mqp.event = mlx4_ib_qp_event;\r
459 \r
460         return 0;\r
461 \r
462 err_wrid:\r
463         if (pd->p_uctx) {\r
464                 if (!init_attr->srq)\r
465                         mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx),\r
466                                               &qp->db);\r
467         } else {\r
468                 if (qp->sq.wrid)\r
469                         kfree(qp->sq.wrid);\r
470                 if (qp->rq.wrid)\r
471                         kfree(qp->rq.wrid);\r
472         }\r
473 \r
474 err_mtt:\r
475         mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
476 \r
477 err_buf:\r
478         if (pd->p_uctx)\r
479                 ib_umem_release(qp->umem);\r
480         else\r
481                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
482 \r
483 err_db:\r
484         if (!pd->p_uctx && !init_attr->srq)\r
485                 mlx4_ib_db_free(dev, &qp->db);\r
486 \r
487 err:\r
488         return err;\r
489 }\r
490 \r
491 static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)\r
492 {\r
493         switch (state) {\r
494         case XIB_QPS_RESET:     return MLX4_QP_STATE_RST;\r
495         case XIB_QPS_INIT:      return MLX4_QP_STATE_INIT;\r
496         case XIB_QPS_RTR:       return MLX4_QP_STATE_RTR;\r
497         case XIB_QPS_RTS:       return MLX4_QP_STATE_RTS;\r
498         case XIB_QPS_SQD:       return MLX4_QP_STATE_SQD;\r
499         case XIB_QPS_SQE:       return MLX4_QP_STATE_SQER;\r
500         case XIB_QPS_ERR:       return MLX4_QP_STATE_ERR;\r
501         default:                return -1;\r
502         }\r
503 }\r
504 \r
505 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
506 {\r
507         if (send_cq == recv_cq)\r
508                 spin_lock_irq(&send_cq->lock);\r
509         else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
510                 spin_lock_irq(&send_cq->lock);\r
511                 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);\r
512         } else {\r
513                 spin_lock_irq(&recv_cq->lock);\r
514                 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);\r
515         }\r
516 }\r
517 \r
518 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
519 {\r
520         if (send_cq == recv_cq)\r
521                 spin_unlock_irq(&send_cq->lock);\r
522         else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
523                 spin_unlock(&recv_cq->lock);\r
524                 spin_unlock_irq(&send_cq->lock);\r
525         } else {\r
526                 spin_unlock(&send_cq->lock);\r
527                 spin_unlock_irq(&recv_cq->lock);\r
528         }\r
529 }\r
530 \r
531 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,\r
532                               int is_user)\r
533 {\r
534         struct mlx4_ib_cq *send_cq, *recv_cq;\r
535 \r
536         if (qp->state != XIB_QPS_RESET)\r
537                 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),\r
538                                    MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))\r
539                         printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",\r
540                                qp->mqp.qpn);\r
541 \r
542         send_cq = to_mcq(qp->ibqp.send_cq);\r
543         recv_cq = to_mcq(qp->ibqp.recv_cq);\r
544 \r
545         mlx4_ib_lock_cqs(send_cq, recv_cq);\r
546 \r
547         if (!is_user) {\r
548                 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,\r
549                                  qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);\r
550                 if (send_cq != recv_cq)\r
551                         __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);\r
552         }\r
553 \r
554         mlx4_qp_remove(dev->dev, &qp->mqp);\r
555 \r
556         mlx4_ib_unlock_cqs(send_cq, recv_cq);\r
557 \r
558         mlx4_qp_free(dev->dev, &qp->mqp);\r
559 \r
560         if (!is_sqp(dev, qp))\r
561                 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);\r
562 \r
563         mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
564 \r
565         if (is_user) {\r
566                 if (!qp->ibqp.srq)\r
567                         mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.p_uctx),\r
568                                               &qp->db);\r
569                 ib_umem_release(qp->umem);\r
570         } else {\r
571                 kfree(qp->sq.wrid);\r
572                 kfree(qp->rq.wrid);\r
573                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
574                 if (!qp->ibqp.srq)\r
575                         mlx4_ib_db_free(dev, &qp->db);\r
576         }\r
577 }\r
578 \r
579 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,\r
580                                 struct ib_qp_init_attr *init_attr,\r
581                                 struct ib_udata *udata)\r
582 {\r
583         struct mlx4_ib_dev *dev = to_mdev(pd->device);\r
584         struct mlx4_ib_sqp *sqp;\r
585         struct mlx4_ib_qp *qp;\r
586         int err;\r
587 \r
588         /* TODO: suggest to remove :We only support LSO, and only for kernel UD QPs. */\r
589         /*if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO)\r
590                 return ERR_PTR(-EINVAL);\r
591         if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO &&\r
592                 (pd->uobject || init_attr->qp_type != IB_QPT_UD))\r
593                 return ERR_PTR(-EINVAL);*/\r
594 \r
595         if (mlx4_is_barred(pd->device->dma_device))\r
596                 return ERR_PTR(-EFAULT);\r
597 \r
598         switch (init_attr->qp_type) {\r
599         case IB_QPT_RC:\r
600         case IB_QPT_UC:\r
601         case IB_QPT_UD:\r
602         {\r
603                 qp = kzalloc(sizeof *qp, GFP_KERNEL);\r
604                 if (!qp)\r
605                         return ERR_PTR(-ENOMEM);\r
606 \r
607                 err = create_qp_common(dev, pd, init_attr, udata, 0, qp);\r
608                 if (err) {\r
609                         kfree(qp);\r
610                         return ERR_PTR(err);\r
611                 }\r
612 \r
613                 qp->ibqp.qp_num = qp->mqp.qpn;\r
614 \r
615                 break;\r
616         }\r
617         case IB_QPT_SMI:\r
618         case IB_QPT_GSI:\r
619         {\r
620                 /* Userspace is not allowed to create special QPs: */\r
621                 if (pd->p_uctx)\r
622                         return ERR_PTR(-EINVAL);\r
623 \r
624                 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);\r
625                 if (!sqp)\r
626                         return ERR_PTR(-ENOMEM);\r
627 \r
628                 qp = &sqp->qp;\r
629 \r
630                 err = create_qp_common(dev, pd, init_attr, udata,\r
631                                        dev->dev->caps.sqp_start +\r
632                                        (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +\r
633                                        init_attr->port_num - 1,\r
634                                        qp);\r
635                 if (err) {\r
636                         kfree(sqp);\r
637                         return ERR_PTR(err);\r
638                 }\r
639 \r
640                 qp->port        = init_attr->port_num;\r
641                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;\r
642 \r
643                 break;\r
644         }\r
645         default:\r
646                 /* Don't support raw QPs */\r
647                 return ERR_PTR(-EINVAL);\r
648         }\r
649 \r
650         return &qp->ibqp;\r
651 }\r
652 \r
653 int mlx4_ib_destroy_qp(struct ib_qp *qp)\r
654 {\r
655         struct mlx4_ib_dev *dev = to_mdev(qp->device);\r
656         struct mlx4_ib_qp *mqp = to_mqp(qp);\r
657 \r
658         if (!mlx4_is_barred(dev->dev) && is_qp0(dev, mqp))\r
659                 mlx4_CLOSE_PORT(dev->dev, mqp->port);\r
660 \r
661         destroy_qp_common(dev, mqp, !!qp->pd->p_uctx);\r
662 \r
663         if (is_sqp(dev, mqp))\r
664                 kfree(to_msqp(mqp));\r
665         else\r
666                 kfree(mqp);\r
667 \r
668         return 0;\r
669 }\r
670 \r
671 static int to_mlx4_st(enum ib_qp_type type)\r
672 {\r
673         switch (type) {\r
674         case IB_QPT_RC:         return MLX4_QP_ST_RC;\r
675         case IB_QPT_UC:         return MLX4_QP_ST_UC;\r
676         case IB_QPT_UD:         return MLX4_QP_ST_UD;\r
677         case IB_QPT_SMI:\r
678         case IB_QPT_GSI:        return MLX4_QP_ST_MLX;\r
679         default:                return -1;\r
680         }\r
681 }\r
682 \r
683 static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,\r
684                                    int attr_mask)\r
685 {\r
686         u8 dest_rd_atomic;\r
687         u32 access_flags;\r
688         u32 hw_access_flags = 0;\r
689 \r
690         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
691                 dest_rd_atomic = attr->max_dest_rd_atomic;\r
692         else\r
693                 dest_rd_atomic = qp->resp_depth;\r
694 \r
695         if (attr_mask & IB_QP_ACCESS_FLAGS)\r
696                 access_flags = attr->qp_access_flags;\r
697         else\r
698                 access_flags = qp->atomic_rd_en;\r
699 \r
700         if (!dest_rd_atomic)\r
701                 access_flags &= IB_ACCESS_REMOTE_WRITE;\r
702 \r
703         if (access_flags & IB_ACCESS_REMOTE_READ)\r
704                 hw_access_flags |= MLX4_QP_BIT_RRE;\r
705         if (access_flags & IB_ACCESS_REMOTE_ATOMIC)\r
706                 hw_access_flags |= MLX4_QP_BIT_RAE;\r
707         if (access_flags & IB_ACCESS_REMOTE_WRITE)\r
708                 hw_access_flags |= MLX4_QP_BIT_RWE;\r
709 \r
710         return cpu_to_be32(hw_access_flags);\r
711 }\r
712 \r
713 static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,\r
714                             int attr_mask)\r
715 {\r
716         if (attr_mask & IB_QP_PKEY_INDEX)\r
717                 sqp->pkey_index = attr->pkey_index;\r
718         if (attr_mask & IB_QP_QKEY)\r
719                 sqp->qkey = attr->qkey;\r
720         if (attr_mask & IB_QP_SQ_PSN)\r
721                 sqp->send_psn = attr->sq_psn;\r
722 }\r
723 \r
724 static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)\r
725 {\r
726         path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);\r
727 }\r
728 \r
729 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,\r
730                          struct mlx4_qp_path *path, u8 port)\r
731 {\r
732         path->grh_mylmc     = ah->src_path_bits & 0x7f;\r
733         path->rlid          = cpu_to_be16(ah->dlid);\r
734         if (ah->static_rate) {\r
735                 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;\r
736                 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&\r
737                        !(1 << path->static_rate & dev->dev->caps.stat_rate_support))\r
738                         --path->static_rate;\r
739         } else\r
740                 path->static_rate = 0;\r
741         path->counter_index = 0xff;\r
742 \r
743         if (ah->ah_flags & IB_AH_GRH) {\r
744                 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {\r
745                         printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",\r
746                                ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);\r
747                         return -1;\r
748                 }\r
749 \r
750                 path->grh_mylmc |= 1 << 7;\r
751                 path->mgid_index = ah->grh.sgid_index;\r
752                 path->hop_limit  = ah->grh.hop_limit;\r
753                 path->tclass_flowlabel =\r
754                         cpu_to_be32((ah->grh.traffic_class << 20) |\r
755                                     (ah->grh.flow_label));\r
756                 memcpy(path->rgid, ah->grh.dgid.raw, 16);\r
757         }\r
758 \r
759         path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |\r
760                 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);\r
761 \r
762         return 0;\r
763 }\r
764 \r
765 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,\r
766                                const struct ib_qp_attr *attr, int attr_mask,\r
767                                enum ib_qp_state cur_state, enum ib_qp_state new_state)\r
768 {\r
769         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
770         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
771         struct mlx4_qp_context *context;\r
772         enum mlx4_qp_optpar optpar = 0;\r
773         int sqd_event;\r
774         int err = -EINVAL;\r
775 \r
776         context = kzalloc(sizeof *context, GFP_KERNEL);\r
777         if (!context)\r
778                 return -ENOMEM;\r
779 \r
780         context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |\r
781                                      (to_mlx4_st(ibqp->qp_type) << 16));\r
782         context->flags     |= cpu_to_be32(1 << 8); /* DE? */\r
783 \r
784         if (!(attr_mask & IB_QP_PATH_MIG_STATE))\r
785                 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
786         else {\r
787                 optpar |= MLX4_QP_OPTPAR_PM_STATE;\r
788                 switch (attr->path_mig_state) {\r
789                 case IB_MIG_MIGRATED:\r
790                         context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
791                         break;\r
792                 case IB_MIG_REARM:\r
793                         context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);\r
794                         break;\r
795                 case IB_MIG_ARMED:\r
796                         context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);\r
797                         break;\r
798                 }\r
799         }\r
800 \r
801         if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI )\r
802                 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;\r
803         else if (ibqp->qp_type == IB_QPT_UD) {\r
804                 if (qp->flags & MLX4_IB_QP_LSO)\r
805                         context->mtu_msgmax = (u8)((IB_MTU_4096 << 5) |\r
806                                         ilog2(dev->dev->caps.max_gso_sz));\r
807                 else\r
808                         context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;\r
809         } else if (attr_mask & IB_QP_PATH_MTU) {\r
810                 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {\r
811                         printk(KERN_ERR "path MTU (%u) is invalid\n",\r
812                                 attr->path_mtu);\r
813                         goto out;\r
814                 }\r
815                 context->mtu_msgmax = (u8)((attr->path_mtu << 5) |\r
816                         ilog2(dev->dev->caps.max_msg_sz));\r
817         }\r
818 \r
819         if (qp->rq.wqe_cnt)\r
820                 context->rq_size_stride = (u8)(ilog2(qp->rq.wqe_cnt) << 3);\r
821         context->rq_size_stride |= qp->rq.wqe_shift - 4;\r
822 \r
823         if (qp->sq.wqe_cnt)\r
824                 context->sq_size_stride = (u8)(ilog2(qp->sq.wqe_cnt) << 3);\r
825         context->sq_size_stride |= qp->sq.wqe_shift - 4;\r
826 \r
827         if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
828                 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;\r
829 \r
830         if (qp->ibqp.p_uctx)\r
831                 context->usr_page = cpu_to_be32(to_mucontext(ibqp->p_uctx)->uar.index);\r
832         else\r
833                 context->usr_page = cpu_to_be32(dev->priv_uar.index);\r
834 \r
835         if (attr_mask & IB_QP_DEST_QPN)\r
836                 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);\r
837 \r
838         if (attr_mask & IB_QP_PORT) {\r
839                 if (cur_state == XIB_QPS_SQD && new_state == XIB_QPS_SQD &&\r
840                     !(attr_mask & IB_QP_AV)) {\r
841                         mlx4_set_sched(&context->pri_path, attr->port_num);\r
842                         optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;\r
843                 }\r
844         }\r
845 \r
846         if (attr_mask & IB_QP_PKEY_INDEX) {\r
847                 context->pri_path.pkey_index = (u8)attr->pkey_index;\r
848                 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;\r
849         }\r
850 \r
851         if (attr_mask & IB_QP_AV) {\r
852                 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,\r
853                                   attr_mask & IB_QP_PORT ? attr->port_num : qp->port))\r
854                         goto out;\r
855 \r
856                 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |\r
857                            MLX4_QP_OPTPAR_SCHED_QUEUE);\r
858         }\r
859 \r
860         if (attr_mask & IB_QP_TIMEOUT) {\r
861                 context->pri_path.ackto = attr->timeout << 3;\r
862                 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;\r
863         }\r
864 \r
865         if (attr_mask & IB_QP_ALT_PATH) {\r
866                 if (attr->alt_port_num == 0 ||\r
867                     attr->alt_port_num > dev->dev->caps.num_ports)\r
868                         goto out;\r
869 \r
870                 if (attr->alt_pkey_index >=\r
871                     dev->dev->caps.pkey_table_len[attr->alt_port_num])\r
872                         goto out;\r
873 \r
874                 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,\r
875                                   attr->alt_port_num))\r
876                         goto out;\r
877 \r
878                 context->alt_path.pkey_index = (u8)attr->alt_pkey_index;\r
879                 context->alt_path.ackto = attr->alt_timeout << 3;\r
880                 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;\r
881         }\r
882 \r
883         context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pdn);\r
884         context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);\r
885 \r
886         if (attr_mask & IB_QP_RNR_RETRY) {\r
887                 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);\r
888                 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;\r
889         }\r
890 \r
891         if (attr_mask & IB_QP_RETRY_CNT) {\r
892                 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);\r
893                 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;\r
894         }\r
895 \r
896         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {\r
897                 if (attr->max_rd_atomic)\r
898                         context->params1 |=\r
899                                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);\r
900                 optpar |= MLX4_QP_OPTPAR_SRA_MAX;\r
901         }\r
902 \r
903         if (attr_mask & IB_QP_SQ_PSN)\r
904                 context->next_send_psn = cpu_to_be32(attr->sq_psn);\r
905 \r
906         context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);\r
907 \r
908         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {\r
909                 if (attr->max_dest_rd_atomic)\r
910                         context->params2 |=\r
911                                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);\r
912                 optpar |= MLX4_QP_OPTPAR_RRA_MAX;\r
913         }\r
914 \r
915         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {\r
916                 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);\r
917                 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;\r
918         }\r
919 \r
920         if (ibqp->srq)\r
921                 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);\r
922 \r
923         if (attr_mask & IB_QP_MIN_RNR_TIMER) {\r
924                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);\r
925                 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;\r
926         }\r
927         if (attr_mask & IB_QP_RQ_PSN)\r
928                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);\r
929 \r
930         context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);\r
931 \r
932         if (attr_mask & IB_QP_QKEY) {\r
933                 context->qkey = cpu_to_be32(attr->qkey);\r
934                 optpar |= MLX4_QP_OPTPAR_Q_KEY;\r
935         }\r
936 \r
937         if (ibqp->srq)\r
938                 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);\r
939 \r
940         if (!ibqp->srq && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
941                 context->db_rec_addr = cpu_to_be64(qp->db.dma.da);\r
942 \r
943         if (cur_state == XIB_QPS_INIT &&\r
944             new_state == XIB_QPS_RTR  &&\r
945             (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||\r
946              ibqp->qp_type == IB_QPT_UD)) {\r
947                 context->pri_path.sched_queue = (qp->port - 1) << 6;\r
948                 if (is_qp0(dev, qp))\r
949                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;\r
950                 else\r
951                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;\r
952         }\r
953 \r
954         if (cur_state == XIB_QPS_RTS && new_state == XIB_QPS_SQD        &&\r
955             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)\r
956                 sqd_event = 1;\r
957         else\r
958                 sqd_event = 0;\r
959 \r
960         /*\r
961          * Before passing a kernel QP to the HW, make sure that the\r
962          * ownership bits of the send queue are set and the SQ\r
963          * headroom is stamped so that the hardware doesn't start\r
964          * processing stale work requests.\r
965          */\r
966         if (!ibqp->p_uctx && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) {\r
967                 struct mlx4_wqe_ctrl_seg *ctrl;\r
968                 int i;\r
969 \r
970                 for (i = 0; i < qp->sq.wqe_cnt; ++i) {\r
971                         ctrl = get_send_wqe(qp, i);\r
972                         ctrl->owner_opcode = cpu_to_be32(1 << 31);\r
973 \r
974                         stamp_send_wqe(qp, i);\r
975                 }\r
976         }\r
977 \r
978         err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),\r
979                              to_mlx4_state(new_state), context, optpar,\r
980                              sqd_event, &qp->mqp);\r
981         if (err)\r
982                 goto out;\r
983 \r
984         qp->state = new_state;\r
985 \r
986         if (attr_mask & IB_QP_ACCESS_FLAGS)\r
987                 qp->atomic_rd_en = (u8)attr->qp_access_flags;\r
988         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
989                 qp->resp_depth = attr->max_dest_rd_atomic;\r
990         if (attr_mask & IB_QP_PORT)\r
991                 qp->port = attr->port_num;\r
992         if (attr_mask & IB_QP_ALT_PATH)\r
993                 qp->alt_port = attr->alt_port_num;\r
994 \r
995         if (is_sqp(dev, qp))\r
996                 store_sqp_attrs(to_msqp(qp), attr, attr_mask);\r
997 \r
998         /*\r
999          * If we moved QP0 to RTR, bring the IB link up; if we moved\r
1000          * QP0 to RESET or ERROR, bring the link back down.\r
1001          */\r
1002         if (is_qp0(dev, qp)) {\r
1003                 if (cur_state != XIB_QPS_RTR && new_state == XIB_QPS_RTR)\r
1004                         if (mlx4_INIT_PORT(dev->dev, qp->port))\r
1005                                 printk(KERN_WARNING "INIT_PORT failed for port %d\n",\r
1006                                        qp->port);\r
1007 \r
1008                 if (cur_state != XIB_QPS_RESET && cur_state != XIB_QPS_ERR &&\r
1009                     (new_state == XIB_QPS_RESET || new_state == XIB_QPS_ERR))\r
1010                         mlx4_CLOSE_PORT(dev->dev, qp->port);\r
1011         }\r
1012 \r
1013         /*\r
1014          * If we moved a kernel QP to RESET, clean up all old CQ\r
1015          * entries and reinitialize the QP.\r
1016          */\r
1017         if (new_state == XIB_QPS_RESET && !ibqp->p_uctx) {\r
1018                 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,\r
1019                                  ibqp->srq ? to_msrq(ibqp->srq): NULL);\r
1020                 if (ibqp->send_cq != ibqp->recv_cq)\r
1021                         mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);\r
1022 \r
1023                 qp->rq.head = 0;\r
1024                 qp->rq.tail = 0;\r
1025                 qp->sq.head = 0;\r
1026                 qp->sq.tail = 0;\r
1027                 if (!ibqp->srq)\r
1028                         *qp->db.db  = 0;\r
1029         }\r
1030 \r
1031 out:\r
1032         kfree(context);\r
1033         return err;\r
1034 }\r
1035 \r
1036 static struct ib_qp_attr mlx4_ib_qp_attr;\r
1037 static int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1];\r
1038 \r
1039 void mlx4_ib_qp_init()\r
1040 {\r
1041         memset( &mlx4_ib_qp_attr, 0, sizeof(mlx4_ib_qp_attr) );\r
1042         mlx4_ib_qp_attr.port_num = 1;\r
1043 \r
1044         memset( &mlx4_ib_qp_attr_mask_table, 0, sizeof(mlx4_ib_qp_attr_mask_table) );\r
1045         mlx4_ib_qp_attr_mask_table[IB_QPT_UD]  = (IB_QP_PKEY_INDEX              |\r
1046                                 IB_QP_PORT                      |\r
1047                                 IB_QP_QKEY);\r
1048         mlx4_ib_qp_attr_mask_table[IB_QPT_UC]  = (IB_QP_PKEY_INDEX              |\r
1049                                 IB_QP_PORT                      |\r
1050                                 IB_QP_ACCESS_FLAGS);\r
1051         mlx4_ib_qp_attr_mask_table[IB_QPT_RC]  = (IB_QP_PKEY_INDEX              |\r
1052                                 IB_QP_PORT                      |\r
1053                                 IB_QP_ACCESS_FLAGS);\r
1054         mlx4_ib_qp_attr_mask_table[IB_QPT_SMI] = (IB_QP_PKEY_INDEX              |\r
1055                                 IB_QP_QKEY);\r
1056         mlx4_ib_qp_attr_mask_table[IB_QPT_GSI] = (IB_QP_PKEY_INDEX              |\r
1057                                 IB_QP_QKEY);\r
1058 }\r
1059 \r
1060 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,\r
1061                       int attr_mask, struct ib_udata *udata)\r
1062 {\r
1063         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
1064         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1065         enum ib_qp_state cur_state, new_state;\r
1066         int err = -EINVAL;\r
1067 \r
1068         UNUSED_PARAM(udata);\r
1069         \r
1070         if (mlx4_is_barred(dev->dev))\r
1071                 return -EFAULT; \r
1072 \r
1073         mutex_lock(&qp->mutex);\r
1074 \r
1075         cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;\r
1076         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;\r
1077 \r
1078         if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))\r
1079                 goto out;\r
1080 \r
1081         if ((attr_mask & IB_QP_PORT) &&\r
1082             (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {\r
1083                 goto out;\r
1084         }\r
1085 \r
1086         if (attr_mask & IB_QP_PKEY_INDEX) {\r
1087                 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;\r
1088                 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])\r
1089                         goto out;\r
1090         }\r
1091 \r
1092         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&\r
1093             attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {\r
1094                 goto out;\r
1095         }\r
1096 \r
1097         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&\r
1098             attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {\r
1099                 goto out;\r
1100         }\r
1101 \r
1102         if (cur_state == new_state && cur_state == XIB_QPS_RESET) {\r
1103                 err = 0;\r
1104                 goto out;\r
1105         }\r
1106 \r
1107         if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_ERR) {\r
1108                 err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,\r
1109                                           mlx4_ib_qp_attr_mask_table[ibqp->qp_type],\r
1110                                           XIB_QPS_RESET, XIB_QPS_INIT);\r
1111                 if (err)\r
1112                         goto out;\r
1113                 cur_state = XIB_QPS_INIT;\r
1114         }\r
1115 \r
1116         err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);\r
1117 \r
1118 out:\r
1119         mutex_unlock(&qp->mutex);\r
1120         return err;\r
1121 }\r
1122 \r
1123 static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr)\r
1124 {\r
1125 \r
1126         enum ib_wr_opcode opcode = -1; //= wr->wr_type;\r
1127 \r
1128         switch (wr->wr_type) {\r
1129                 case WR_SEND: \r
1130                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_SEND_WITH_IMM : IB_WR_SEND;\r
1131                         break;\r
1132                 case WR_LSO:\r
1133                         opcode = IB_WR_LSO;\r
1134                         break;\r
1135                 case WR_RDMA_WRITE:     \r
1136                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_RDMA_WRITE_WITH_IMM : IB_WR_RDMA_WRITE;\r
1137                         break;\r
1138                 case WR_RDMA_READ:\r
1139                         opcode = IB_WR_RDMA_READ;\r
1140                         break;\r
1141                 case WR_COMPARE_SWAP:\r
1142                         opcode = IB_WR_ATOMIC_CMP_AND_SWP;\r
1143                         break;\r
1144                 case WR_FETCH_ADD:\r
1145                         opcode = IB_WR_ATOMIC_FETCH_AND_ADD;\r
1146                         break;\r
1147         }\r
1148         return opcode;\r
1149 }\r
1150 \r
1151 static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,\r
1152                             void *wqe)\r
1153 {\r
1154         enum ib_wr_opcode opcode = to_wr_opcode(wr);\r
1155         struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;\r
1156         struct mlx4_wqe_mlx_seg *mlx = wqe;\r
1157         struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);\r
1158         struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);\r
1159         __be16 pkey;\r
1160         int send_size;\r
1161         int header_size;\r
1162         int spc;\r
1163         u32 i;\r
1164 \r
1165         send_size = 0;\r
1166         for (i = 0; i < wr->num_ds; ++i)\r
1167                 send_size += wr->ds_array[i].length;\r
1168 \r
1169         ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);\r
1170 \r
1171         sqp->ud_header.lrh.service_level   =\r
1172                 (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);\r
1173         sqp->ud_header.lrh.destination_lid = ah->av.dlid;\r
1174         sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid & 0x7f);\r
1175         if (mlx4_ib_ah_grh_present(ah)) {\r
1176                 sqp->ud_header.grh.traffic_class =\r
1177                         (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);\r
1178                 sqp->ud_header.grh.flow_label    =\r
1179                         ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);\r
1180                 sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;\r
1181                 ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),\r
1182                                   ah->av.gid_index, &sqp->ud_header.grh.source_gid);\r
1183                 memcpy(sqp->ud_header.grh.destination_gid.raw,\r
1184                        ah->av.dgid, 16);\r
1185         }\r
1186 \r
1187         mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
1188         mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |\r
1189                                   (sqp->ud_header.lrh.destination_lid ==\r
1190                                    XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |\r
1191                                   (sqp->ud_header.lrh.service_level << 8));\r
1192         mlx->rlid   = sqp->ud_header.lrh.destination_lid;\r
1193 \r
1194         switch (opcode) {\r
1195         case IB_WR_SEND:\r
1196                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;\r
1197                 sqp->ud_header.immediate_present = 0;\r
1198                 break;\r
1199         case IB_WR_SEND_WITH_IMM:\r
1200                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;\r
1201                 sqp->ud_header.immediate_present = 1;\r
1202                 sqp->ud_header.immediate_data    = wr->immediate_data;\r
1203                 break;\r
1204         default:\r
1205                 return -EINVAL;\r
1206         }\r
1207 \r
1208         sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;\r
1209         if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)\r
1210                 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;\r
1211         sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED));\r
1212         if (!sqp->qp.ibqp.qp_num)\r
1213                 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);\r
1214         else\r
1215                 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey);\r
1216         sqp->ud_header.bth.pkey = pkey;\r
1217         sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;\r
1218         sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));\r
1219         sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?\r
1220                 cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;\r
1221         sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);\r
1222 \r
1223         header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);\r
1224 \r
1225 #if 0\r
1226         {\r
1227                 printk(KERN_ERR "built UD header of size %d:\n", header_size);\r
1228                 for (i = 0; i < header_size / 4; ++i) {\r
1229                         if (i % 8 == 0)\r
1230                                 printk("  [%02x] ", i * 4);\r
1231                         printk(" %08x",\r
1232                                be32_to_cpu(((__be32 *) sqp->header_buf)[i]));\r
1233                         if ((i + 1) % 8 == 0)\r
1234                                 printk("\n");\r
1235                 }\r
1236                 printk("\n");\r
1237         }\r
1238 #endif\r
1239 \r
1240         /*\r
1241          * Inline data segments may not cross a 64 byte boundary.  If\r
1242          * our UD header is bigger than the space available up to the\r
1243          * next 64 byte boundary in the WQE, use two inline data\r
1244          * segments to hold the UD header.\r
1245          */\r
1246         spc = MLX4_INLINE_ALIGN -\r
1247                 ((u32)(ULONG_PTR)(inl + 1) & (MLX4_INLINE_ALIGN - 1));\r
1248         if (header_size <= spc) {\r
1249                 inl->byte_count = cpu_to_be32(1 << 31 | header_size);\r
1250                 memcpy(inl + 1, sqp->header_buf, header_size);\r
1251                 i = 1;\r
1252         } else {\r
1253                 inl->byte_count = cpu_to_be32(1 << 31 | spc);\r
1254                 memcpy(inl + 1, sqp->header_buf, spc);\r
1255 \r
1256                 inl = (void*)((u8*)(inl + 1) + spc);\r
1257                 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);\r
1258                 /*\r
1259                  * Need a barrier here to make sure all the data is\r
1260                  * visible before the byte_count field is set.\r
1261                  * Otherwise the HCA prefetcher could grab the 64-byte\r
1262                  * chunk with this inline segment and get a valid (!=\r
1263                  * 0xffffffff) byte count but stale data, and end up\r
1264                  * generating a packet with bad headers.\r
1265                  *\r
1266                  * The first inline segment's byte_count field doesn't\r
1267                  * need a barrier, because it comes after a\r
1268                  * control/MLX segment and therefore is at an offset\r
1269                  * of 16 mod 64.\r
1270                  */\r
1271                 wmb();\r
1272                 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));\r
1273                 i = 2;\r
1274         }\r
1275 \r
1276         return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);\r
1277 }\r
1278 \r
1279 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)\r
1280 {\r
1281         unsigned cur;\r
1282         struct mlx4_ib_cq *cq;\r
1283 \r
1284         cur = wq->head - wq->tail;\r
1285         if (likely((int)cur + nreq < wq->max_post))\r
1286                 return 0;\r
1287 \r
1288         cq = to_mcq(ib_cq);\r
1289         spin_lock(&cq->lock);\r
1290         cur = wq->head - wq->tail;\r
1291         spin_unlock(&cq->lock);\r
1292 \r
1293         return (int)cur + nreq >= wq->max_post;\r
1294 }\r
1295 \r
1296 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,\r
1297                                           u64 remote_addr, __be32 rkey)\r
1298 {\r
1299         rseg->raddr    = cpu_to_be64(remote_addr);\r
1300         rseg->rkey     = rkey;\r
1301         rseg->reserved = 0;\r
1302 }\r
1303 \r
1304 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr)\r
1305 {\r
1306         if (wr->wr_type == WR_COMPARE_SWAP) {\r
1307                 aseg->swap_add = wr->remote_ops.atomic2;\r
1308                 aseg->compare  = wr->remote_ops.atomic1;\r
1309         } else {\r
1310                 aseg->swap_add = wr->remote_ops.atomic1;\r
1311                 aseg->compare  = 0;\r
1312         }\r
1313 \r
1314 }\r
1315 \r
1316 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,\r
1317                              ib_send_wr_t *wr)\r
1318 {\r
1319         memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av));\r
1320         dseg->dqpn = wr->dgrm.ud.remote_qp;\r
1321         dseg->qkey = wr->dgrm.ud.remote_qkey;\r
1322 }\r
1323 \r
1324 static void set_mlx_icrc_seg(void *dseg)\r
1325 {\r
1326         u32 *t = dseg;\r
1327         struct mlx4_wqe_inline_seg *iseg = dseg;\r
1328 \r
1329         t[1] = 0;\r
1330 \r
1331         /*\r
1332          * Need a barrier here before writing the byte_count field to\r
1333          * make sure that all the data is visible before the\r
1334          * byte_count field is set.  Otherwise, if the segment begins\r
1335          * a new cacheline, the HCA prefetcher could grab the 64-byte\r
1336          * chunk and get a valid (!= * 0xffffffff) byte count but\r
1337          * stale data, and end up sending the wrong data.\r
1338          */\r
1339         wmb();\r
1340 \r
1341         iseg->byte_count = cpu_to_be32((1 << 31) | 4);\r
1342 }\r
1343 \r
1344 static void set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
1345 {\r
1346         dseg->lkey       = cpu_to_be32(sg->lkey);\r
1347         dseg->addr       = cpu_to_be64(sg->vaddr);\r
1348 \r
1349         /*\r
1350          * Need a barrier here before writing the byte_count field to\r
1351          * make sure that all the data is visible before the\r
1352          * byte_count field is set.  Otherwise, if the segment begins\r
1353          * a new cacheline, the HCA prefetcher could grab the 64-byte\r
1354          * chunk and get a valid (!= * 0xffffffff) byte count but\r
1355          * stale data, and end up sending the wrong data.\r
1356          */\r
1357         wmb();\r
1358 \r
1359         dseg->byte_count = cpu_to_be32(sg->length);\r
1360 }\r
1361 \r
1362 static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
1363 {\r
1364         dseg->byte_count = cpu_to_be32(sg->length);\r
1365         dseg->lkey       = cpu_to_be32(sg->lkey);\r
1366         dseg->addr       = cpu_to_be64(sg->vaddr);\r
1367 }\r
1368 \r
1369 static int build_lso_seg(struct mlx4_lso_seg *wqe, ib_send_wr_t *wr,\r
1370                                                  struct mlx4_ib_qp *qp, unsigned *lso_seg_len)\r
1371  {\r
1372         unsigned halign = ALIGN(sizeof *wqe + wr->dgrm.ud.hlen, 16);\r
1373         void * ds;\r
1374         /*\r
1375         * This is a temporary limitation and will be removed in\r
1376          a forthcoming FW release:\r
1377         */\r
1378         if (unlikely(halign > 64))\r
1379                 return -EINVAL;\r
1380 \r
1381         if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&\r
1382                 wr->num_ds > qp->sq.max_gs - (halign >> 4)))\r
1383                 return -EINVAL;\r
1384         *lso_seg_len = halign;\r
1385          ds =  (u8 *) (void *) wqe + halign;\r
1386         \r
1387         //TODO: use memcpy from physical/virtual addr we can get directly from the ipoib at first data segmentmemcpy(wqe->header, , );\r
1388         memcpy(wqe->header, wr->dgrm.ud.header, wr->dgrm.ud.hlen);\r
1389         \r
1390         /* make sure LSO header is written before overwriting stamping */\r
1391         wmb();\r
1392 \r
1393         wqe->mss_hdr_size = cpu_to_be32((wr->dgrm.ud.mss - wr->dgrm.ud.hlen) << 16 |\r
1394                                                                         wr->dgrm.ud.hlen);\r
1395         \r
1396         return 0;\r
1397 }\r
1398 \r
1399 \r
1400 int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,\r
1401                       ib_send_wr_t **bad_wr)\r
1402 {\r
1403         enum ib_wr_opcode opcode;\r
1404         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1405         u8 *wqe /*, *wqe_start*/;\r
1406         struct mlx4_wqe_ctrl_seg *ctrl;\r
1407         struct mlx4_wqe_data_seg *dseg;\r
1408         unsigned long flags;\r
1409         int nreq;\r
1410         int err = 0;\r
1411         int ind;\r
1412         int size;\r
1413         unsigned seglen;\r
1414         int i;\r
1415         int j = 0;\r
1416 \r
1417         if (mlx4_is_barred(ibqp->device->dma_device))\r
1418                 return -EFAULT;\r
1419 \r
1420         spin_lock_irqsave(&qp->sq.lock, &flags);\r
1421 \r
1422         ind = qp->sq.head;\r
1423 \r
1424         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
1425                 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {\r
1426                         err = -ENOMEM;\r
1427                         if (bad_wr)\r
1428                                 *bad_wr = wr;\r
1429                         goto out;\r
1430                 }\r
1431 \r
1432                 if (unlikely(wr->num_ds > (u32)qp->sq.max_gs)) {\r
1433                         err = -EINVAL;\r
1434                         if (bad_wr)\r
1435                                 *bad_wr = wr;\r
1436                         goto out;\r
1437                 }\r
1438 \r
1439                 /*wqe_start = */\r
1440                 wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));\r
1441                 ctrl = (void*)wqe;\r
1442                 qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;\r
1443                 opcode = to_wr_opcode(wr);\r
1444 \r
1445                 ctrl->srcrb_flags =\r
1446                         (wr->send_opt & IB_SEND_OPT_SIGNALED ?\r
1447                          cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |\r
1448                         (wr->send_opt & IB_SEND_OPT_SOLICITED ?\r
1449                          cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |\r
1450                         (wr->send_opt & IB_SEND_OPT_TX_IP_CSUM ?\r
1451                          cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM) : 0) |\r
1452                         (wr->send_opt & IB_SEND_OPT_TX_TCP_UDP_CSUM ?\r
1453                          cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |\r
1454                         qp->sq_signal_bits;\r
1455 \r
1456                 if (opcode == IB_WR_SEND_WITH_IMM ||\r
1457                     opcode == IB_WR_RDMA_WRITE_WITH_IMM)\r
1458                         ctrl->imm = wr->immediate_data;\r
1459                 else\r
1460                         ctrl->imm = 0;\r
1461 \r
1462                 wqe += sizeof *ctrl;\r
1463                 size = sizeof *ctrl / 16;\r
1464 \r
1465                 switch (ibqp->qp_type) {\r
1466                 case IB_QPT_RC:\r
1467                 case IB_QPT_UC:\r
1468                         switch (opcode) {\r
1469                         case IB_WR_ATOMIC_CMP_AND_SWP:\r
1470                         case IB_WR_ATOMIC_FETCH_AND_ADD:\r
1471                                 set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
1472                                               wr->remote_ops.rkey);\r
1473                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
1474 \r
1475                                 set_atomic_seg((void*)wqe, wr);\r
1476                                 wqe  += sizeof (struct mlx4_wqe_atomic_seg);\r
1477 \r
1478                                 size += (sizeof (struct mlx4_wqe_raddr_seg) +\r
1479                                          sizeof (struct mlx4_wqe_atomic_seg)) / 16;\r
1480 \r
1481                                 break;\r
1482 \r
1483                         case IB_WR_RDMA_READ:\r
1484                         case IB_WR_RDMA_WRITE:\r
1485                         case IB_WR_RDMA_WRITE_WITH_IMM:\r
1486                                 set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
1487                                               wr->remote_ops.rkey);\r
1488                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
1489                                 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;\r
1490                                 break;\r
1491 \r
1492                         default:\r
1493                                 /* No extra segments required for sends */\r
1494                                 break;\r
1495                         }\r
1496                         break;\r
1497 \r
1498                 case IB_QPT_UD:\r
1499                         set_datagram_seg((void*)wqe, wr);\r
1500                         wqe  += sizeof (struct mlx4_wqe_datagram_seg);\r
1501                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;\r
1502                         if (wr->wr_type == WR_LSO) {\r
1503                                 err = build_lso_seg((struct mlx4_lso_seg *)(void *)wqe, wr, qp, &seglen);\r
1504                                 if (unlikely(err)) {\r
1505                                         *bad_wr = wr;\r
1506                                         goto out;\r
1507                                 }\r
1508 #define I64_CACHE_LINE          64\r
1509 #define OPCODE_INVALID_BIT      6\r
1510                                 // WQE bug treatment for LSO case, when LSO header is large enough\r
1511                                 if (unlikely (seglen > I64_CACHE_LINE)) {\r
1512                                         ctrl->owner_opcode |= cpu_to_be32 ( 1 << OPCODE_INVALID_BIT);\r
1513                                 }\r
1514                                 wqe  += seglen;\r
1515                                 size += seglen / 16;\r
1516                                 j=1;\r
1517                         }\r
1518                         break;\r
1519 \r
1520                 case IB_QPT_SMI:\r
1521                 case IB_QPT_GSI:\r
1522                         err = build_mlx_header(to_msqp(qp), wr, ctrl);\r
1523                         if (err < 0) {\r
1524                                 if (bad_wr)\r
1525                                         *bad_wr = wr;\r
1526                                 goto out;\r
1527                         }\r
1528                         wqe  += err;\r
1529                         size += err / 16;\r
1530 \r
1531                         err = 0;\r
1532                         break;\r
1533 \r
1534                 default:\r
1535                         break;\r
1536                 }\r
1537 \r
1538                 /*\r
1539                  * Write data segments in reverse order, so as to\r
1540                  * overwrite cacheline stamp last within each\r
1541                  * cacheline.  This avoids issues with WQE\r
1542                  * prefetching.\r
1543                  */\r
1544 \r
1545                 dseg = (void*)wqe;\r
1546                 dseg += wr->num_ds - 1;\r
1547                 size += wr->num_ds * (sizeof (struct mlx4_wqe_data_seg) / 16);\r
1548 \r
1549                 /* Add one more inline data segment for ICRC for MLX sends */\r
1550                 if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||\r
1551                              qp->ibqp.qp_type == IB_QPT_GSI)) {\r
1552                         set_mlx_icrc_seg(dseg + 1);\r
1553                         size += sizeof (struct mlx4_wqe_data_seg) / 16;\r
1554                 }\r
1555 \r
1556                 for (i = wr->num_ds - 1; i >= 0; --i, --dseg)\r
1557                         set_data_seg(dseg, wr->ds_array + i);\r
1558 \r
1559                 ctrl->fence_size = (u8)((wr->send_opt & IB_SEND_OPT_FENCE ?\r
1560                                     MLX4_WQE_CTRL_FENCE : 0) | size);\r
1561 \r
1562                 /*\r
1563                  * Make sure descriptor is fully written before\r
1564                  * setting ownership bit (because HW can start\r
1565                  * executing as soon as we do).\r
1566                  */\r
1567                 wmb();\r
1568 \r
1569                 if (opcode < 0 || opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {\r
1570                         err = -EINVAL;\r
1571                         goto out;\r
1572                 }\r
1573 \r
1574                 ctrl->owner_opcode = mlx4_ib_opcode[opcode] |\r
1575                         (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);\r
1576 \r
1577                 /*\r
1578                  * We can improve latency by not stamping the last\r
1579                  * send queue WQE until after ringing the doorbell, so\r
1580                  * only stamp here if there are still more WQEs to post.\r
1581                  */\r
1582                 if (wr->p_next)\r
1583                         stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &\r
1584                                        (qp->sq.wqe_cnt - 1));\r
1585 \r
1586                 ++ind;\r
1587         }\r
1588 \r
1589 //printk("ctrl->srcrb_flags & MLX4_WQE_CTRL_TCP_UDP_CSUM =%d \n", ctrl->srcrb_flags & cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM ));\r
1590 \r
1591 out:\r
1592 //WQE printout\r
1593 #if 0   \r
1594         if (j) {\r
1595                 u32 *ds = (u32 *) wqe_start;\r
1596                 printk("WQE DUMP:\n");cq.c.their\r
1597                 for (j = 0; j < ctrl->fence_size*4; ++j) {\r
1598                         printk("%d %08x\n", j,be32_to_cpu(*ds));\r
1599                         ++ds;\r
1600                 }\r
1601         }\r
1602 #endif  \r
1603         if (likely(nreq)) {\r
1604                 qp->sq.head += nreq;\r
1605 \r
1606                 /*\r
1607                  * Make sure that descriptors are written before\r
1608                  * doorbell record.\r
1609                  */\r
1610                 wmb();\r
1611 \r
1612                 writel(qp->doorbell_qpn,\r
1613                        (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);\r
1614 \r
1615 #if 0\r
1616                 if (qp->mqp.qpn == 0x41)\r
1617                         cl_dbg_out( "[MLX4_BUS] mlx4_ib_post_send : qtype %d, qpn %#x, nreq %d, sq.head %#x, wqe_ix %d, db %p \n", \r
1618                                 ibqp->qp_type, qp->mqp.qpn, nreq, qp->sq.head, ind, \r
1619                                 (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL );\r
1620 #endif          \r
1621                 /*\r
1622                  * Make sure doorbells don't leak out of SQ spinlock\r
1623                  * and reach the HCA out of order.\r
1624                  */\r
1625                 mmiowb();\r
1626 \r
1627                 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &\r
1628                                (qp->sq.wqe_cnt - 1));\r
1629         }\r
1630 \r
1631         spin_unlock_irqrestore(&qp->sq.lock, flags);\r
1632 \r
1633         return err;\r
1634 }\r
1635 \r
1636 int mlx4_ib_post_recv(struct ib_qp *ibqp, ib_recv_wr_t *wr,\r
1637                       ib_recv_wr_t **bad_wr)\r
1638 {\r
1639         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1640         struct mlx4_wqe_data_seg *scat;\r
1641         unsigned long flags;\r
1642         int err = 0;\r
1643         int nreq;\r
1644         int ind;\r
1645         int i;\r
1646 \r
1647         if (mlx4_is_barred(ibqp->device->dma_device))\r
1648                 return -EFAULT;\r
1649 \r
1650         spin_lock_irqsave(&qp->rq.lock, &flags);\r
1651 \r
1652         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);\r
1653 \r
1654         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
1655                 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {\r
1656                         err = -ENOMEM;\r
1657                         if (bad_wr)\r
1658                                 *bad_wr = wr;\r
1659                         goto out;\r
1660                 }\r
1661 \r
1662                 if (unlikely(wr->num_ds > (u32)qp->rq.max_gs)) {\r
1663                         err = -EINVAL;\r
1664                         if (bad_wr)\r
1665                                 *bad_wr = wr;\r
1666                         goto out;\r
1667                 }\r
1668 \r
1669                 scat = get_recv_wqe(qp, ind);\r
1670 \r
1671                 for (i = 0; i < (int)wr->num_ds; ++i)\r
1672                         __set_data_seg(scat + i, wr->ds_array + i);\r
1673 \r
1674                 if (i < qp->rq.max_gs) {\r
1675                         scat[i].byte_count = 0;\r
1676                         scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);\r
1677                         scat[i].addr       = 0;\r
1678                 }\r
1679 \r
1680                 qp->rq.wrid[ind] = wr->wr_id;\r
1681 \r
1682                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);\r
1683         }\r
1684 \r
1685 out:\r
1686         if (likely(nreq)) {\r
1687                 qp->rq.head += nreq;\r
1688 \r
1689                 /*\r
1690                  * Make sure that descriptors are written before\r
1691                  * doorbell record.\r
1692                  */\r
1693                 wmb();\r
1694 \r
1695                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);\r
1696 \r
1697 #if 0\r
1698                 if (qp->mqp.qpn == 0x41)\r
1699                         cl_dbg_out( "[MLX4_BUS] mlx4_ib_post_recv : qtype %d, qpn %#x, nreq %d, rq.head %#x, wqe_ix %d, db_obj %p, db %p \n", \r
1700                                 ibqp->qp_type, qp->mqp.qpn, nreq, qp->rq.head, ind, &qp->db, qp->db.db );\r
1701 #endif          \r
1702         }\r
1703 \r
1704         spin_unlock_irqrestore(&qp->rq.lock, flags);\r
1705 \r
1706         return err;\r
1707 }\r
1708 \r
1709 static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)\r
1710 {\r
1711         switch (mlx4_state) {\r
1712         case MLX4_QP_STATE_RST:      return XIB_QPS_RESET;\r
1713         case MLX4_QP_STATE_INIT:     return XIB_QPS_INIT;\r
1714         case MLX4_QP_STATE_RTR:      return XIB_QPS_RTR;\r
1715         case MLX4_QP_STATE_RTS:      return XIB_QPS_RTS;\r
1716         case MLX4_QP_STATE_SQ_DRAINING:\r
1717         case MLX4_QP_STATE_SQD:      return XIB_QPS_SQD;\r
1718         case MLX4_QP_STATE_SQER:     return XIB_QPS_SQE;\r
1719         case MLX4_QP_STATE_ERR:      return XIB_QPS_ERR;\r
1720         default:                     return -1;\r
1721         }\r
1722 }\r
1723 \r
1724 static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)\r
1725 {\r
1726         switch (mlx4_mig_state) {\r
1727         case MLX4_QP_PM_ARMED:          return IB_MIG_ARMED;\r
1728         case MLX4_QP_PM_REARM:          return IB_MIG_REARM;\r
1729         case MLX4_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;\r
1730         default: return -1;\r
1731         }\r
1732 }\r
1733 \r
1734 static int to_ib_qp_access_flags(int mlx4_flags)\r
1735 {\r
1736         int ib_flags = 0;\r
1737 \r
1738         if (mlx4_flags & MLX4_QP_BIT_RRE)\r
1739                 ib_flags |= IB_ACCESS_REMOTE_READ;\r
1740         if (mlx4_flags & MLX4_QP_BIT_RWE)\r
1741                 ib_flags |= IB_ACCESS_REMOTE_WRITE;\r
1742         if (mlx4_flags & MLX4_QP_BIT_RAE)\r
1743                 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;\r
1744 \r
1745         return ib_flags;\r
1746 }\r
1747 \r
1748 static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,\r
1749                                 struct mlx4_qp_path *path)\r
1750 {\r
1751         memset(ib_ah_attr, 0, sizeof *ib_ah_attr);\r
1752         ib_ah_attr->port_num      = path->sched_queue & 0x40 ? 2 : 1;\r
1753 \r
1754         if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)\r
1755                 return;\r
1756 \r
1757         ib_ah_attr->dlid          = be16_to_cpu(path->rlid);\r
1758         ib_ah_attr->sl            = (path->sched_queue >> 2) & 0xf;\r
1759         ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;\r
1760         ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;\r
1761         ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;\r
1762         if (ib_ah_attr->ah_flags) {\r
1763                 ib_ah_attr->grh.sgid_index = path->mgid_index;\r
1764                 ib_ah_attr->grh.hop_limit  = path->hop_limit;\r
1765                 ib_ah_attr->grh.traffic_class =\r
1766                         (u8)((be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff);\r
1767                 ib_ah_attr->grh.flow_label =\r
1768                         be32_to_cpu(path->tclass_flowlabel) & 0xfffff;\r
1769                 memcpy(ib_ah_attr->grh.dgid.raw,\r
1770                         path->rgid, sizeof ib_ah_attr->grh.dgid.raw);\r
1771         }\r
1772 }\r
1773 \r
1774 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,\r
1775                      struct ib_qp_init_attr *qp_init_attr)\r
1776 {\r
1777         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
1778         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1779         struct mlx4_qp_context context;\r
1780         int mlx4_state;\r
1781         int err;\r
1782 \r
1783         UNUSED_PARAM(qp_attr_mask);\r
1784 \r
1785         if (mlx4_is_barred(dev->dev))\r
1786                 return -EFAULT;\r
1787         \r
1788         if (qp->state == XIB_QPS_RESET) {\r
1789                 qp_attr->qp_state = XIB_QPS_RESET;\r
1790                 goto done;\r
1791         }\r
1792 \r
1793         err = mlx4_qp_query(dev->dev, &qp->mqp, &context);\r
1794         if (err)\r
1795                 return -EINVAL;\r
1796 \r
1797         mlx4_state = be32_to_cpu(context.flags) >> 28;\r
1798 \r
1799         qp_attr->qp_state            = to_ib_qp_state(mlx4_state);\r
1800         qp_attr->path_mtu            = context.mtu_msgmax >> 5;\r
1801         qp_attr->path_mig_state      =\r
1802                 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);\r
1803         qp_attr->qkey                = be32_to_cpu(context.qkey);\r
1804         qp_attr->rq_psn              = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;\r
1805         qp_attr->sq_psn              = be32_to_cpu(context.next_send_psn) & 0xffffff;\r
1806         qp_attr->dest_qp_num         = be32_to_cpu(context.remote_qpn) & 0xffffff;\r
1807         qp_attr->qp_access_flags     =\r
1808                 to_ib_qp_access_flags(be32_to_cpu(context.params2));\r
1809 \r
1810         if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {\r
1811                 to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);\r
1812                 to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);\r
1813                 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;\r
1814                 qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;\r
1815         }\r
1816 \r
1817         qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;\r
1818         if (qp_attr->qp_state == XIB_QPS_INIT)\r
1819                 qp_attr->port_num = qp->port;\r
1820         else\r
1821                 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;\r
1822 \r
1823         /* qp_attr->en_sqd_async_notify is only applicable in modify qp */\r
1824         qp_attr->sq_draining = (u8)(mlx4_state == MLX4_QP_STATE_SQ_DRAINING);\r
1825 \r
1826         qp_attr->max_rd_atomic = (u8)(1 << ((be32_to_cpu(context.params1) >> 21) & 0x7));\r
1827 \r
1828         qp_attr->max_dest_rd_atomic =\r
1829                 (u8)(1 << ((be32_to_cpu(context.params2) >> 21) & 0x7));\r
1830         qp_attr->min_rnr_timer      =\r
1831                 (u8)((be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f);\r
1832         qp_attr->timeout            = context.pri_path.ackto >> 3;\r
1833         qp_attr->retry_cnt          = (u8)((be32_to_cpu(context.params1) >> 16) & 0x7);\r
1834         qp_attr->rnr_retry          = (u8)((be32_to_cpu(context.params1) >> 13) & 0x7);\r
1835         qp_attr->alt_timeout        = context.alt_path.ackto >> 3;\r
1836 \r
1837 done:\r
1838         qp_attr->cur_qp_state        = qp_attr->qp_state;\r
1839         qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;\r
1840         qp_attr->cap.max_recv_sge    = qp->rq.max_gs;\r
1841 \r
1842         if (!ibqp->p_uctx) {\r
1843                 qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;\r
1844                 qp_attr->cap.max_send_sge = qp->sq.max_gs;\r
1845         } else {\r
1846                 qp_attr->cap.max_send_wr  = 0;\r
1847                 qp_attr->cap.max_send_sge = 0;\r
1848         }\r
1849 \r
1850         /*\r
1851          * We don't support inline sends for kernel QPs (yet), and we\r
1852          * don't know what userspace's value should be.\r
1853          */\r
1854         qp_attr->cap.max_inline_data = 0;\r
1855 \r
1856         qp_init_attr->cap            = qp_attr->cap;\r
1857 \r
1858         return 0;\r
1859 }\r
1860 \r