[MLX4] bug fix: prevent simultaneous issuing of modify_qp and query_qp commands ...
[mirror/winof/.git] / hw / mlx4 / kernel / bus / ib / qp.c
1 /*\r
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.\r
3  *\r
4  * This software is available to you under a choice of one of two\r
5  * licenses.  You may choose to be licensed under the terms of the GNU\r
6  * General Public License (GPL) Version 2, available from the file\r
7  * COPYING in the main directory of this source tree, or the\r
8  * OpenIB.org BSD license below:\r
9  *\r
10  *     Redistribution and use in source and binary forms, with or\r
11  *     without modification, are permitted provided that the following\r
12  *     conditions are met:\r
13  *\r
14  *      - Redistributions of source code must retain the above\r
15  *        copyright notice, this list of conditions and the following\r
16  *        disclaimer.\r
17  *\r
18  *      - Redistributions in binary form must reproduce the above\r
19  *        copyright notice, this list of conditions and the following\r
20  *        disclaimer in the documentation and/or other materials\r
21  *        provided with the distribution.\r
22  *\r
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
30  * SOFTWARE.\r
31  */\r
32 \r
33 #include "mlx4_ib.h"\r
34 #include "ib_cache.h"\r
35 #include "ib_pack.h"\r
36 #include "qp.h"\r
37 #include "user.h"\r
38 \r
39 enum {\r
40         MLX4_IB_ACK_REQ_FREQ    = 8,\r
41 };\r
42 \r
43 enum {\r
44         MLX4_IB_DEFAULT_SCHED_QUEUE     = 0x83,\r
45         MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f\r
46 };\r
47 \r
48 enum {\r
49         /*\r
50          * Largest possible UD header: send with GRH and immediate data.\r
51          */\r
52         MLX4_IB_UD_HEADER_SIZE          = 72\r
53 };\r
54 \r
55 struct mlx4_ib_sqp {\r
56         struct mlx4_ib_qp       qp;\r
57         int                     pkey_index;\r
58         u32                     qkey;\r
59         u32                     send_psn;\r
60         struct ib_ud_header     ud_header;\r
61         u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];\r
62 };\r
63 \r
64 enum {\r
65         MLX4_IB_MIN_SQ_STRIDE = 6\r
66 };\r
67 \r
68 static const __be32 mlx4_ib_opcode[] = {\r
69         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),         /*      [IB_WR_RDMA_WRITE]                      */\r
70         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),     /*      [IB_WR_RDMA_WRITE_WITH_IMM] */\r
71         __constant_cpu_to_be32(MLX4_OPCODE_SEND),                       /*      [IB_WR_SEND]                            */\r
72         __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),           /*      [IB_WR_SEND_WITH_IMM]           */\r
73         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),          /*      [IB_WR_RDMA_READ]                       */\r
74         __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),          /*      [IB_WR_ATOMIC_CMP_AND_SWP]      */\r
75         __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),          /*      [IB_WR_ATOMIC_FETCH_AND_ADD]*/\r
76         __constant_cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6))                              /* [IB_WR_LSO]                                  */\r
77 };\r
78 \r
79 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)\r
80 {\r
81         return container_of(mqp, struct mlx4_ib_sqp, qp);\r
82 }\r
83 \r
84 static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
85 {\r
86         return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
87                 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;\r
88 }\r
89 \r
90 static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
91 {\r
92         return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
93                 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;\r
94 }\r
95 \r
96 static void *get_wqe(struct mlx4_ib_qp *qp, int offset)\r
97 {\r
98         if (qp->buf.nbufs == 1)\r
99                 return qp->buf.u.direct.buf + offset;\r
100         else\r
101                 return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +\r
102                         (offset & (PAGE_SIZE - 1));\r
103 }\r
104 \r
105 static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)\r
106 {\r
107         return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));\r
108 }\r
109 \r
110 static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)\r
111 {\r
112         return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));\r
113 }\r
114 \r
115 /*\r
116  * Stamp a SQ WQE so that it is invalid if prefetched by marking the\r
117  * first four bytes of every 64 byte chunk with 0xffffffff, except for\r
118  * the very first chunk of the WQE.\r
119  */\r
120 static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)\r
121 {\r
122         u32 *wqe = get_send_wqe(qp, n);\r
123         int i;\r
124 \r
125         for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)\r
126                 wqe[i] = 0xffffffff;\r
127 }\r
128 \r
129 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)\r
130 {\r
131         ib_event_rec_t event;\r
132         struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;\r
133 \r
134         if (type == MLX4_EVENT_TYPE_PATH_MIG)\r
135                 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;\r
136 \r
137         switch (type) {\r
138         case MLX4_EVENT_TYPE_PATH_MIG:\r
139                 event.type = IB_EVENT_PATH_MIG;\r
140                 break;\r
141         case MLX4_EVENT_TYPE_COMM_EST:\r
142                 event.type = IB_EVENT_COMM_EST;\r
143                 break;\r
144         case MLX4_EVENT_TYPE_SQ_DRAINED:\r
145                 event.type = IB_EVENT_SQ_DRAINED;\r
146                 break;\r
147         case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:\r
148                 event.type = IB_EVENT_QP_LAST_WQE_REACHED;\r
149                 break;\r
150         case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:\r
151                 event.type = IB_EVENT_QP_FATAL;\r
152                 break;\r
153         case MLX4_EVENT_TYPE_PATH_MIG_FAILED:\r
154                 event.type = IB_EVENT_PATH_MIG_ERR;\r
155                 break;\r
156         case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:\r
157                 event.type = IB_EVENT_QP_REQ_ERR;\r
158                 break;\r
159         case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:\r
160                 event.type = IB_EVENT_QP_ACCESS_ERR;\r
161                 break;\r
162         default:\r
163                 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "\r
164                        "on QP %06x\n", type, qp->qpn);\r
165                 return;\r
166         }\r
167 \r
168         event.context = ibqp->qp_context;\r
169         ibqp->event_handler(&event);\r
170 }\r
171 \r
172 static int send_wqe_overhead(enum ib_qp_type type, u32 flags)\r
173 {\r
174         /*\r
175          * UD WQEs must have a datagram segment.\r
176          * RC and UC WQEs might have a remote address segment.\r
177          * MLX WQEs need two extra inline data segments (for the UD\r
178          * header and space for the ICRC).\r
179          */\r
180         switch (type) {\r
181         case IB_QPT_UD:\r
182                 return sizeof (struct mlx4_wqe_ctrl_seg)  +\r
183                         sizeof (struct mlx4_wqe_datagram_seg) +\r
184                         ((flags & MLX4_IB_QP_LSO) ? 64 : 0);\r
185         case IB_QPT_UC:\r
186                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
187                         sizeof (struct mlx4_wqe_raddr_seg);\r
188         case IB_QPT_RC:\r
189                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
190                         sizeof (struct mlx4_wqe_atomic_seg) +\r
191                         sizeof (struct mlx4_wqe_raddr_seg);\r
192         case IB_QPT_SMI:\r
193         case IB_QPT_GSI:\r
194                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
195                         ALIGN(MLX4_IB_UD_HEADER_SIZE +\r
196                               DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,\r
197                                            MLX4_INLINE_ALIGN) *\r
198                               sizeof (struct mlx4_wqe_inline_seg),\r
199                               sizeof (struct mlx4_wqe_data_seg)) +\r
200                         ALIGN(4 +\r
201                               sizeof (struct mlx4_wqe_inline_seg),\r
202                               sizeof (struct mlx4_wqe_data_seg));\r
203         default:\r
204                 return sizeof (struct mlx4_wqe_ctrl_seg);\r
205         }\r
206 }\r
207 \r
208 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
209                        int is_user, int has_srq, struct mlx4_ib_qp *qp)\r
210 {\r
211         /* Sanity check RQ size before proceeding */\r
212         if ((int)cap->max_recv_wr  > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||\r
213             (int)cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))\r
214                 return -EINVAL;\r
215 \r
216         if (has_srq) {\r
217                 /* QPs attached to an SRQ should have no RQ */\r
218                 if (cap->max_recv_wr)\r
219                         return -EINVAL;\r
220 \r
221                 qp->rq.wqe_cnt = qp->rq.max_gs = 0;\r
222         } else {\r
223                 /* HW requires >= 1 RQ entry with >= 1 gather entry */\r
224                 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))\r
225                         return -EINVAL;\r
226 \r
227                 qp->rq.wqe_cnt   = roundup_pow_of_two(max(1U, cap->max_recv_wr));\r
228                 qp->rq.max_gs    = roundup_pow_of_two(max(1U, cap->max_recv_sge));\r
229                 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));\r
230         }\r
231 \r
232         /* leave userspace return values as they were, so as not to break ABI */\r
233         if (is_user) {\r
234                 cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;\r
235                 cap->max_recv_sge = qp->rq.max_gs;\r
236         } else {\r
237                 cap->max_recv_wr  = qp->rq.max_post =\r
238                         min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);\r
239                 cap->max_recv_sge = min(qp->rq.max_gs,\r
240                                         min(dev->dev->caps.max_sq_sg,\r
241                                         dev->dev->caps.max_rq_sg));\r
242         }\r
243         /* We don't support inline sends for kernel QPs (yet) */\r
244 \r
245         return 0;\r
246 }\r
247 \r
248 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
249                               enum ib_qp_type type, struct mlx4_ib_qp *qp)\r
250 {\r
251         /* Sanity check SQ size before proceeding */\r
252         if ((int)cap->max_send_wr       > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE  ||\r
253             (int)cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||\r
254             (int)cap->max_inline_data + send_wqe_overhead(type, qp->flags) +\r
255             (int)sizeof(struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)\r
256                 return -EINVAL;\r
257 \r
258         /*\r
259          * For MLX transport we need 2 extra S/G entries:\r
260          * one for the header and one for the checksum at the end\r
261          */\r
262         if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&\r
263             (int)cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)\r
264                 return -EINVAL;\r
265 \r
266         qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *\r
267                                                         sizeof (struct mlx4_wqe_data_seg),\r
268                                                         cap->max_inline_data +\r
269                                                         sizeof (struct mlx4_wqe_inline_seg)) +\r
270                                                     send_wqe_overhead(type,qp->flags)));\r
271         qp->sq.wqe_shift = max(MLX4_IB_SQ_MIN_WQE_SHIFT, qp->sq.wqe_shift);\r
272         qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type,qp->flags)) /\r
273                 sizeof (struct mlx4_wqe_data_seg);\r
274 \r
275         /*\r
276          * We need to leave 2 KB + 1 WQE of headroom in the SQ to\r
277          * allow HW to prefetch.\r
278          */\r
279         qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);\r
280         qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);\r
281 \r
282         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
283                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
284         if (qp->rq.wqe_shift > qp->sq.wqe_shift) {\r
285                 qp->rq.offset = 0;\r
286                 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;\r
287         } else {\r
288                 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;\r
289                 qp->sq.offset = 0;\r
290         }\r
291 \r
292         cap->max_send_wr = qp->sq.max_post =\r
293                 min(qp->sq.wqe_cnt - qp->sq_spare_wqes,\r
294                         dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE);\r
295         cap->max_send_sge = min(qp->sq.max_gs,\r
296                                 min(dev->dev->caps.max_sq_sg,\r
297                                         dev->dev->caps.max_rq_sg));\r
298         /* We don't support inline sends for kernel QPs (yet) */\r
299         cap->max_inline_data = 0;\r
300 \r
301         return 0;\r
302 }\r
303 \r
304 static int set_user_sq_size(struct mlx4_ib_dev *dev,\r
305                             struct mlx4_ib_qp *qp,\r
306                             struct mlx4_ib_create_qp *ucmd)\r
307 {\r
308         /* Sanity check SQ size before proceeding */\r
309         if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes       ||\r
310             ucmd->log_sq_stride >\r
311                 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||\r
312             ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)\r
313                 return -EINVAL;\r
314 \r
315         qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;\r
316         qp->sq.wqe_shift = ucmd->log_sq_stride;\r
317 \r
318         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
319                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
320 \r
321         return 0;\r
322 }\r
323 \r
324 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,\r
325                             struct ib_qp_init_attr *init_attr,\r
326                             struct ib_udata *udata, u32 sqpn, struct mlx4_ib_qp *qp)\r
327 {\r
328         int err;\r
329         BOOLEAN range_allocated = FALSE;\r
330 \r
331         mutex_init(&qp->mutex);\r
332         spin_lock_init(&qp->sq.lock);\r
333         spin_lock_init(&qp->rq.lock);\r
334 \r
335         qp->state        = XIB_QPS_RESET;\r
336         qp->atomic_rd_en = 0;\r
337         qp->resp_depth   = 0;\r
338 \r
339         qp->rq.head         = 0;\r
340         qp->rq.tail         = 0;\r
341         qp->sq.head         = 0;\r
342         qp->sq.tail         = 0;\r
343 \r
344         err = set_rq_size(dev, &init_attr->cap, !!pd->p_uctx, !!init_attr->srq, qp);\r
345         if (err)\r
346                 goto err;\r
347 \r
348         if (pd->p_uctx) {\r
349                 struct mlx4_ib_create_qp ucmd;\r
350 \r
351                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {\r
352                         err = -EFAULT;\r
353                         goto err;\r
354                 }\r
355 \r
356                 qp->sq_no_prefetch = ucmd.sq_no_prefetch;\r
357 \r
358                 err = set_user_sq_size(dev, qp, &ucmd);\r
359                 if (err)\r
360                         goto err;\r
361 \r
362                 qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,\r
363                                        qp->buf_size, 0, FALSE);\r
364                 if (IS_ERR(qp->umem)) {\r
365                         err = PTR_ERR(qp->umem);\r
366                         goto err;\r
367                 }\r
368 \r
369                 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),\r
370                                     ilog2(qp->umem->page_size), &qp->mtt);\r
371                 if (err)\r
372                         goto err_buf;\r
373 \r
374                 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);\r
375                 if (err)\r
376                         goto err_mtt;\r
377 \r
378                 if (!init_attr->srq) {\r
379                         err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx),\r
380                                                   ucmd.db_addr, &qp->db);\r
381                         if (err)\r
382                                 goto err_mtt;\r
383                 }\r
384         } else {\r
385                 qp->sq_no_prefetch = 0;\r
386                 \r
387                 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)\r
388                         qp->flags |= MLX4_IB_QP_LSO;\r
389                 \r
390                 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);\r
391                 if (err)\r
392                         goto err;\r
393 \r
394                 if (!init_attr->srq) {\r
395                         err = mlx4_ib_db_alloc(dev, &qp->db, 0);\r
396                         if (err)\r
397                                 goto err;\r
398 \r
399                         *qp->db.db = 0;\r
400                 }\r
401 \r
402                 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {\r
403                         err = -ENOMEM;\r
404                         goto err_db;\r
405                 }\r
406 \r
407                 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,\r
408                                     &qp->mtt);\r
409                 if (err)\r
410                         goto err_buf;\r
411 \r
412                 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);\r
413                 if (err)\r
414                         goto err_mtt;\r
415 \r
416                 if (qp->sq.wqe_cnt) {\r
417                         qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
418                         if (!qp->sq.wrid) {\r
419                                 err = -ENOMEM;\r
420                                 goto err_wrid;\r
421                         }\r
422                 }                       \r
423 \r
424                 if (qp->rq.wqe_cnt) {\r
425                         qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
426                         if (!qp->rq.wrid) {\r
427                                 err = -ENOMEM;\r
428                                 goto err_wrid;\r
429                         }\r
430                 }\r
431         }\r
432 \r
433         if (!sqpn) {\r
434                         err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn);\r
435                 if (err)\r
436                         goto err_wrid;\r
437                 range_allocated = TRUE;\r
438                 \r
439         }\r
440 \r
441         err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);\r
442         if (err)\r
443                 goto err_range;\r
444 \r
445         /*\r
446          * Hardware wants QPN written in big-endian order (after\r
447          * shifting) for send doorbell.  Precompute this value to save\r
448          * a little bit when posting sends.\r
449          */\r
450         qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);\r
451 \r
452         if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)\r
453                 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
454         else\r
455                 qp->sq_signal_bits = 0;\r
456 \r
457         qp->mqp.event = mlx4_ib_qp_event;\r
458 \r
459         return 0;\r
460 \r
461 err_range:\r
462         if (range_allocated)\r
463                 mlx4_qp_release_range(dev->dev, sqpn, 1);\r
464 \r
465 err_wrid:\r
466         if (pd->p_uctx) {\r
467                 if (!init_attr->srq)\r
468                         mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx),\r
469                                               &qp->db);\r
470         } else {\r
471                 if (qp->sq.wrid)\r
472                         kfree(qp->sq.wrid);\r
473                 if (qp->rq.wrid)\r
474                         kfree(qp->rq.wrid);\r
475         }\r
476 \r
477 err_mtt:\r
478         mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
479 \r
480 err_buf:\r
481         if (pd->p_uctx)\r
482                 ib_umem_release(qp->umem);\r
483         else\r
484                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
485 \r
486 err_db:\r
487         if (!pd->p_uctx && !init_attr->srq)\r
488                 mlx4_ib_db_free(dev, &qp->db);\r
489 \r
490 err:\r
491         return err;\r
492 }\r
493 \r
494 static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)\r
495 {\r
496         switch (state) {\r
497         case XIB_QPS_RESET:     return MLX4_QP_STATE_RST;\r
498         case XIB_QPS_INIT:      return MLX4_QP_STATE_INIT;\r
499         case XIB_QPS_RTR:       return MLX4_QP_STATE_RTR;\r
500         case XIB_QPS_RTS:       return MLX4_QP_STATE_RTS;\r
501         case XIB_QPS_SQD:       return MLX4_QP_STATE_SQD;\r
502         case XIB_QPS_SQE:       return MLX4_QP_STATE_SQER;\r
503         case XIB_QPS_ERR:       return MLX4_QP_STATE_ERR;\r
504         default:                return -1;\r
505         }\r
506 }\r
507 \r
508 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
509 {\r
510         if (send_cq == recv_cq)\r
511                 spin_lock_irq(&send_cq->lock);\r
512         else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
513                 spin_lock_irq(&send_cq->lock);\r
514                 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);\r
515         } else {\r
516                 spin_lock_irq(&recv_cq->lock);\r
517                 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);\r
518         }\r
519 }\r
520 \r
521 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
522 {\r
523         if (send_cq == recv_cq)\r
524                 spin_unlock_irq(&send_cq->lock);\r
525         else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
526                 spin_unlock(&recv_cq->lock);\r
527                 spin_unlock_irq(&send_cq->lock);\r
528         } else {\r
529                 spin_unlock(&send_cq->lock);\r
530                 spin_unlock_irq(&recv_cq->lock);\r
531         }\r
532 }\r
533 \r
534 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,\r
535                               int is_user)\r
536 {\r
537         struct mlx4_ib_cq *send_cq, *recv_cq;\r
538         int zombi = 0;\r
539 \r
540         if (qp->state != XIB_QPS_RESET)\r
541                 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),\r
542                                 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) {\r
543                         printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",\r
544                                 qp->mqp.qpn);\r
545                         zombi = 1;\r
546                 }\r
547 \r
548         send_cq = to_mcq(qp->ibqp.send_cq);\r
549         recv_cq = to_mcq(qp->ibqp.recv_cq);\r
550 \r
551         mlx4_ib_lock_cqs(send_cq, recv_cq);\r
552 \r
553         if (!is_user) {\r
554                 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,\r
555                                  qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);\r
556                 if (send_cq != recv_cq)\r
557                         __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);\r
558         }\r
559 \r
560         mlx4_qp_remove(dev->dev, &qp->mqp);\r
561 \r
562         mlx4_ib_unlock_cqs(send_cq, recv_cq);\r
563 \r
564         mlx4_qp_free(dev->dev, &qp->mqp);\r
565 \r
566         if (!is_sqp(dev, qp) && !zombi )\r
567                 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);\r
568 \r
569         mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
570 \r
571         if (is_user) {\r
572                 if (!qp->ibqp.srq)\r
573                         mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.p_uctx),\r
574                                               &qp->db);\r
575                 ib_umem_release(qp->umem);\r
576         } else {\r
577                 kfree(qp->sq.wrid);\r
578                 kfree(qp->rq.wrid);\r
579                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
580                 if (!qp->ibqp.srq)\r
581                         mlx4_ib_db_free(dev, &qp->db);\r
582         }\r
583 }\r
584 \r
585 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,\r
586                                 struct ib_qp_init_attr *init_attr,\r
587                                 struct ib_udata *udata)\r
588 {\r
589         struct mlx4_ib_dev *dev = to_mdev(pd->device);\r
590         struct mlx4_ib_sqp *sqp;\r
591         struct mlx4_ib_qp *qp;\r
592         int err;\r
593 \r
594         /* TODO: suggest to remove :We only support LSO, and only for kernel UD QPs. */\r
595         /*if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO)\r
596                 return ERR_PTR(-EINVAL);\r
597         if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO &&\r
598                 (pd->uobject || init_attr->qp_type != IB_QPT_UD))\r
599                 return ERR_PTR(-EINVAL);*/\r
600 \r
601         if (mlx4_is_barred(pd->device->dma_device))\r
602                 return ERR_PTR(-EFAULT);\r
603 \r
604         switch (init_attr->qp_type) {\r
605         case IB_QPT_RC:\r
606         case IB_QPT_UC:\r
607         case IB_QPT_UD:\r
608         {\r
609                 qp = kzalloc(sizeof *qp, GFP_KERNEL);\r
610                 if (!qp)\r
611                         return ERR_PTR(-ENOMEM);\r
612 \r
613                 err = create_qp_common(dev, pd, init_attr, udata, 0, qp);\r
614                 if (err) {\r
615                         kfree(qp);\r
616                         return ERR_PTR(err);\r
617                 }\r
618 \r
619                 qp->ibqp.qp_num = qp->mqp.qpn;\r
620 \r
621                 break;\r
622         }\r
623         case IB_QPT_SMI:\r
624         case IB_QPT_GSI:\r
625         {\r
626                 /* Userspace is not allowed to create special QPs: */\r
627                 if (pd->p_uctx)\r
628                         return ERR_PTR(-EINVAL);\r
629 \r
630                 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);\r
631                 if (!sqp)\r
632                         return ERR_PTR(-ENOMEM);\r
633 \r
634                 qp = &sqp->qp;\r
635 \r
636                 err = create_qp_common(dev, pd, init_attr, udata,\r
637                                        dev->dev->caps.sqp_start +\r
638                                        (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +\r
639                                        init_attr->port_num - 1,\r
640                                        qp);\r
641                 if (err) {\r
642                         kfree(sqp);\r
643                         return ERR_PTR(err);\r
644                 }\r
645 \r
646                 qp->port        = init_attr->port_num;\r
647                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;\r
648 \r
649                 break;\r
650         }\r
651         default:\r
652                 /* Don't support raw QPs */\r
653                 return ERR_PTR(-EINVAL);\r
654         }\r
655 \r
656         return &qp->ibqp;\r
657 }\r
658 \r
659 int mlx4_ib_destroy_qp(struct ib_qp *qp)\r
660 {\r
661         struct mlx4_ib_dev *dev = to_mdev(qp->device);\r
662         struct mlx4_ib_qp *mqp = to_mqp(qp);\r
663 \r
664         if (!mlx4_is_barred(dev->dev) && is_qp0(dev, mqp))\r
665                 mlx4_CLOSE_PORT(dev->dev, mqp->port);\r
666 \r
667         destroy_qp_common(dev, mqp, !!qp->pd->p_uctx);\r
668 \r
669         if (is_sqp(dev, mqp))\r
670                 kfree(to_msqp(mqp));\r
671         else\r
672                 kfree(mqp);\r
673 \r
674         return 0;\r
675 }\r
676 \r
677 static int to_mlx4_st(enum ib_qp_type type)\r
678 {\r
679         switch (type) {\r
680         case IB_QPT_RC:         return MLX4_QP_ST_RC;\r
681         case IB_QPT_UC:         return MLX4_QP_ST_UC;\r
682         case IB_QPT_UD:         return MLX4_QP_ST_UD;\r
683         case IB_QPT_SMI:\r
684         case IB_QPT_GSI:        return MLX4_QP_ST_MLX;\r
685         default:                return -1;\r
686         }\r
687 }\r
688 \r
689 static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,\r
690                                    int attr_mask)\r
691 {\r
692         u8 dest_rd_atomic;\r
693         u32 access_flags;\r
694         u32 hw_access_flags = 0;\r
695 \r
696         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
697                 dest_rd_atomic = attr->max_dest_rd_atomic;\r
698         else\r
699                 dest_rd_atomic = qp->resp_depth;\r
700 \r
701         if (attr_mask & IB_QP_ACCESS_FLAGS)\r
702                 access_flags = attr->qp_access_flags;\r
703         else\r
704                 access_flags = qp->atomic_rd_en;\r
705 \r
706         if (!dest_rd_atomic)\r
707                 access_flags &= IB_ACCESS_REMOTE_WRITE;\r
708 \r
709         if (access_flags & IB_ACCESS_REMOTE_READ)\r
710                 hw_access_flags |= MLX4_QP_BIT_RRE;\r
711         if (access_flags & IB_ACCESS_REMOTE_ATOMIC)\r
712                 hw_access_flags |= MLX4_QP_BIT_RAE;\r
713         if (access_flags & IB_ACCESS_REMOTE_WRITE)\r
714                 hw_access_flags |= MLX4_QP_BIT_RWE;\r
715 \r
716         return cpu_to_be32(hw_access_flags);\r
717 }\r
718 \r
719 static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,\r
720                             int attr_mask)\r
721 {\r
722         if (attr_mask & IB_QP_PKEY_INDEX)\r
723                 sqp->pkey_index = attr->pkey_index;\r
724         if (attr_mask & IB_QP_QKEY)\r
725                 sqp->qkey = attr->qkey;\r
726         if (attr_mask & IB_QP_SQ_PSN)\r
727                 sqp->send_psn = attr->sq_psn;\r
728 }\r
729 \r
730 static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)\r
731 {\r
732         path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);\r
733 }\r
734 \r
735 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,\r
736                          struct mlx4_qp_path *path, u8 port)\r
737 {\r
738         path->grh_mylmc     = ah->src_path_bits & 0x7f;\r
739         path->rlid          = cpu_to_be16(ah->dlid);\r
740         if (ah->static_rate) {\r
741                 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;\r
742                 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&\r
743                        !(1 << path->static_rate & dev->dev->caps.stat_rate_support))\r
744                         --path->static_rate;\r
745         } else\r
746                 path->static_rate = 0;\r
747         path->counter_index = 0xff;\r
748 \r
749         if (ah->ah_flags & IB_AH_GRH) {\r
750                 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {\r
751                         printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",\r
752                                ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);\r
753                         return -1;\r
754                 }\r
755 \r
756                 path->grh_mylmc |= 1 << 7;\r
757                 path->mgid_index = ah->grh.sgid_index;\r
758                 path->hop_limit  = ah->grh.hop_limit;\r
759                 path->tclass_flowlabel =\r
760                         cpu_to_be32((ah->grh.traffic_class << 20) |\r
761                                     (ah->grh.flow_label));\r
762                 memcpy(path->rgid, ah->grh.dgid.raw, 16);\r
763         }\r
764 \r
765         path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |\r
766                 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);\r
767 \r
768         return 0;\r
769 }\r
770 \r
771 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,\r
772                                const struct ib_qp_attr *attr, int attr_mask,\r
773                                enum ib_qp_state cur_state, enum ib_qp_state new_state)\r
774 {\r
775         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
776         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
777         struct mlx4_qp_context *context;\r
778         enum mlx4_qp_optpar optpar = 0;\r
779         int sqd_event;\r
780         int err = -EINVAL;\r
781 \r
782         context = kzalloc(sizeof *context, GFP_KERNEL);\r
783         if (!context)\r
784                 return -ENOMEM;\r
785 \r
786         context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |\r
787                                      (to_mlx4_st(ibqp->qp_type) << 16));\r
788         context->flags     |= cpu_to_be32(1 << 8); /* DE? */\r
789 \r
790         if (!(attr_mask & IB_QP_PATH_MIG_STATE))\r
791                 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
792         else {\r
793                 optpar |= MLX4_QP_OPTPAR_PM_STATE;\r
794                 switch (attr->path_mig_state) {\r
795                 case IB_MIG_MIGRATED:\r
796                         context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
797                         break;\r
798                 case IB_MIG_REARM:\r
799                         context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);\r
800                         break;\r
801                 case IB_MIG_ARMED:\r
802                         context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);\r
803                         break;\r
804                 }\r
805         }\r
806 \r
807         if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI )\r
808                 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;\r
809         else if (ibqp->qp_type == IB_QPT_UD) {\r
810                 if (qp->flags & MLX4_IB_QP_LSO)\r
811                         context->mtu_msgmax = (u8)((IB_MTU_4096 << 5) |\r
812                                         ilog2(dev->dev->caps.max_gso_sz));\r
813                 else\r
814                         context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;\r
815         } else if (attr_mask & IB_QP_PATH_MTU) {\r
816                 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {\r
817                         printk(KERN_ERR "path MTU (%u) is invalid\n",\r
818                                 attr->path_mtu);\r
819                         goto out;\r
820                 }\r
821                 context->mtu_msgmax = (u8)((attr->path_mtu << 5) |\r
822                         ilog2(dev->dev->caps.max_msg_sz));\r
823         }\r
824 \r
825         if (qp->rq.wqe_cnt)\r
826                 context->rq_size_stride = (u8)(ilog2(qp->rq.wqe_cnt) << 3);\r
827         context->rq_size_stride |= qp->rq.wqe_shift - 4;\r
828 \r
829         if (qp->sq.wqe_cnt)\r
830                 context->sq_size_stride = (u8)(ilog2(qp->sq.wqe_cnt) << 3);\r
831         context->sq_size_stride |= qp->sq.wqe_shift - 4;\r
832 \r
833         if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
834                 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;\r
835 \r
836         if (qp->ibqp.p_uctx)\r
837                 context->usr_page = cpu_to_be32(to_mucontext(ibqp->p_uctx)->uar.index);\r
838         else\r
839                 context->usr_page = cpu_to_be32(dev->priv_uar.index);\r
840 \r
841         if (attr_mask & IB_QP_DEST_QPN)\r
842                 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);\r
843 \r
844         if (attr_mask & IB_QP_PORT) {\r
845                 if (cur_state == XIB_QPS_SQD && new_state == XIB_QPS_SQD &&\r
846                     !(attr_mask & IB_QP_AV)) {\r
847                         mlx4_set_sched(&context->pri_path, attr->port_num);\r
848                         optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;\r
849                 }\r
850         }\r
851 \r
852         if (attr_mask & IB_QP_PKEY_INDEX) {\r
853                 context->pri_path.pkey_index = (u8)attr->pkey_index;\r
854                 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;\r
855         }\r
856 \r
857         if (attr_mask & IB_QP_AV) {\r
858                 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,\r
859                                   attr_mask & IB_QP_PORT ? attr->port_num : qp->port))\r
860                         goto out;\r
861 \r
862                 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |\r
863                            MLX4_QP_OPTPAR_SCHED_QUEUE);\r
864         }\r
865 \r
866         if (attr_mask & IB_QP_TIMEOUT) {\r
867                 context->pri_path.ackto = attr->timeout << 3;\r
868                 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;\r
869         }\r
870 \r
871         if (attr_mask & IB_QP_ALT_PATH) {\r
872                 if (attr->alt_port_num == 0 ||\r
873                     attr->alt_port_num > dev->dev->caps.num_ports)\r
874                         goto out;\r
875 \r
876                 if (attr->alt_pkey_index >=\r
877                     dev->dev->caps.pkey_table_len[attr->alt_port_num])\r
878                         goto out;\r
879 \r
880                 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,\r
881                                   attr->alt_port_num))\r
882                         goto out;\r
883 \r
884                 context->alt_path.pkey_index = (u8)attr->alt_pkey_index;\r
885                 context->alt_path.ackto = attr->alt_timeout << 3;\r
886                 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;\r
887         }\r
888 \r
889         context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pdn);\r
890         context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);\r
891 \r
892         if (attr_mask & IB_QP_RNR_RETRY) {\r
893                 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);\r
894                 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;\r
895         }\r
896 \r
897         if (attr_mask & IB_QP_RETRY_CNT) {\r
898                 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);\r
899                 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;\r
900         }\r
901 \r
902         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {\r
903                 if (attr->max_rd_atomic)\r
904                         context->params1 |=\r
905                                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);\r
906                 optpar |= MLX4_QP_OPTPAR_SRA_MAX;\r
907         }\r
908 \r
909         if (attr_mask & IB_QP_SQ_PSN)\r
910                 context->next_send_psn = cpu_to_be32(attr->sq_psn);\r
911 \r
912         context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);\r
913 \r
914         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {\r
915                 if (attr->max_dest_rd_atomic)\r
916                         context->params2 |=\r
917                                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);\r
918                 optpar |= MLX4_QP_OPTPAR_RRA_MAX;\r
919         }\r
920 \r
921         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {\r
922                 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);\r
923                 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;\r
924         }\r
925 \r
926         if (ibqp->srq)\r
927                 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);\r
928 \r
929         if (attr_mask & IB_QP_MIN_RNR_TIMER) {\r
930                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);\r
931                 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;\r
932         }\r
933         if (attr_mask & IB_QP_RQ_PSN)\r
934                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);\r
935 \r
936         context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);\r
937 \r
938         if (attr_mask & IB_QP_QKEY) {\r
939                 context->qkey = cpu_to_be32(attr->qkey);\r
940                 optpar |= MLX4_QP_OPTPAR_Q_KEY;\r
941         }\r
942 \r
943         if (ibqp->srq)\r
944                 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);\r
945 \r
946         if (!ibqp->srq && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
947                 context->db_rec_addr = cpu_to_be64(qp->db.dma.da);\r
948 \r
949         if (cur_state == XIB_QPS_INIT &&\r
950             new_state == XIB_QPS_RTR  &&\r
951             (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||\r
952              ibqp->qp_type == IB_QPT_UD)) {\r
953                 context->pri_path.sched_queue = (qp->port - 1) << 6;\r
954                 if (is_qp0(dev, qp))\r
955                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;\r
956                 else\r
957                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;\r
958         }\r
959 \r
960         if (cur_state == XIB_QPS_RTS && new_state == XIB_QPS_SQD        &&\r
961             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)\r
962                 sqd_event = 1;\r
963         else\r
964                 sqd_event = 0;\r
965 \r
966         /*\r
967          * Before passing a kernel QP to the HW, make sure that the\r
968          * ownership bits of the send queue are set and the SQ\r
969          * headroom is stamped so that the hardware doesn't start\r
970          * processing stale work requests.\r
971          */\r
972         if (!ibqp->p_uctx && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) {\r
973                 struct mlx4_wqe_ctrl_seg *ctrl;\r
974                 int i;\r
975 \r
976                 for (i = 0; i < qp->sq.wqe_cnt; ++i) {\r
977                         ctrl = get_send_wqe(qp, i);\r
978                         ctrl->owner_opcode = cpu_to_be32(1 << 31);\r
979 \r
980                         stamp_send_wqe(qp, i);\r
981                 }\r
982         }\r
983 \r
984         err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),\r
985                              to_mlx4_state(new_state), context, optpar,\r
986                              sqd_event, &qp->mqp);\r
987         if (err)\r
988                 goto out;\r
989 \r
990         qp->state = new_state;\r
991 \r
992         if (attr_mask & IB_QP_ACCESS_FLAGS)\r
993                 qp->atomic_rd_en = (u8)attr->qp_access_flags;\r
994         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
995                 qp->resp_depth = attr->max_dest_rd_atomic;\r
996         if (attr_mask & IB_QP_PORT)\r
997                 qp->port = attr->port_num;\r
998         if (attr_mask & IB_QP_ALT_PATH)\r
999                 qp->alt_port = attr->alt_port_num;\r
1000 \r
1001         if (is_sqp(dev, qp))\r
1002                 store_sqp_attrs(to_msqp(qp), attr, attr_mask);\r
1003 \r
1004         /*\r
1005          * If we moved QP0 to RTR, bring the IB link up; if we moved\r
1006          * QP0 to RESET or ERROR, bring the link back down.\r
1007          */\r
1008         if (is_qp0(dev, qp)) {\r
1009                 if (cur_state != XIB_QPS_RTR && new_state == XIB_QPS_RTR)\r
1010                         if (mlx4_INIT_PORT(dev->dev, qp->port))\r
1011                                 printk(KERN_WARNING "INIT_PORT failed for port %d\n",\r
1012                                        qp->port);\r
1013 \r
1014                 if (cur_state != XIB_QPS_RESET && cur_state != XIB_QPS_ERR &&\r
1015                     (new_state == XIB_QPS_RESET || new_state == XIB_QPS_ERR))\r
1016                         mlx4_CLOSE_PORT(dev->dev, qp->port);\r
1017         }\r
1018 \r
1019         /*\r
1020          * If we moved a kernel QP to RESET, clean up all old CQ\r
1021          * entries and reinitialize the QP.\r
1022          */\r
1023         if (new_state == XIB_QPS_RESET && !ibqp->p_uctx) {\r
1024                 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,\r
1025                                  ibqp->srq ? to_msrq(ibqp->srq): NULL);\r
1026                 if (ibqp->send_cq != ibqp->recv_cq)\r
1027                         mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);\r
1028 \r
1029                 qp->rq.head = 0;\r
1030                 qp->rq.tail = 0;\r
1031                 qp->sq.head = 0;\r
1032                 qp->sq.tail = 0;\r
1033                 if (!ibqp->srq)\r
1034                         *qp->db.db  = 0;\r
1035         }\r
1036 \r
1037 out:\r
1038         kfree(context);\r
1039         return err;\r
1040 }\r
1041 \r
1042 static struct ib_qp_attr mlx4_ib_qp_attr;\r
1043 static int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1];\r
1044 \r
1045 void mlx4_ib_qp_init()\r
1046 {\r
1047         memset( &mlx4_ib_qp_attr, 0, sizeof(mlx4_ib_qp_attr) );\r
1048         mlx4_ib_qp_attr.port_num = 1;\r
1049 \r
1050         memset( &mlx4_ib_qp_attr_mask_table, 0, sizeof(mlx4_ib_qp_attr_mask_table) );\r
1051         mlx4_ib_qp_attr_mask_table[IB_QPT_UD]  = (IB_QP_PKEY_INDEX              |\r
1052                                 IB_QP_PORT                      |\r
1053                                 IB_QP_QKEY);\r
1054         mlx4_ib_qp_attr_mask_table[IB_QPT_UC]  = (IB_QP_PKEY_INDEX              |\r
1055                                 IB_QP_PORT                      |\r
1056                                 IB_QP_ACCESS_FLAGS);\r
1057         mlx4_ib_qp_attr_mask_table[IB_QPT_RC]  = (IB_QP_PKEY_INDEX              |\r
1058                                 IB_QP_PORT                      |\r
1059                                 IB_QP_ACCESS_FLAGS);\r
1060         mlx4_ib_qp_attr_mask_table[IB_QPT_SMI] = (IB_QP_PKEY_INDEX              |\r
1061                                 IB_QP_QKEY);\r
1062         mlx4_ib_qp_attr_mask_table[IB_QPT_GSI] = (IB_QP_PKEY_INDEX              |\r
1063                                 IB_QP_QKEY);\r
1064 }\r
1065 \r
1066 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,\r
1067                       int attr_mask, struct ib_udata *udata)\r
1068 {\r
1069         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
1070         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1071         enum ib_qp_state cur_state, new_state;\r
1072         int err = -EINVAL;\r
1073 \r
1074         UNUSED_PARAM(udata);\r
1075         \r
1076         if (mlx4_is_barred(dev->dev))\r
1077                 return -EFAULT; \r
1078 \r
1079         mutex_lock(&qp->mutex);\r
1080 \r
1081         cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;\r
1082         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;\r
1083 \r
1084         if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))\r
1085                 goto out;\r
1086 \r
1087         if ((attr_mask & IB_QP_PORT) &&\r
1088             (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {\r
1089                 goto out;\r
1090         }\r
1091 \r
1092         if (attr_mask & IB_QP_PKEY_INDEX) {\r
1093                 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;\r
1094                 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])\r
1095                         goto out;\r
1096         }\r
1097 \r
1098         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&\r
1099             attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {\r
1100                 goto out;\r
1101         }\r
1102 \r
1103         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&\r
1104             attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {\r
1105                 goto out;\r
1106         }\r
1107 \r
1108         if (cur_state == new_state && cur_state == XIB_QPS_RESET) {\r
1109                 err = 0;\r
1110                 goto out;\r
1111         }\r
1112 \r
1113         if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_ERR) {\r
1114                 err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,\r
1115                                           mlx4_ib_qp_attr_mask_table[ibqp->qp_type],\r
1116                                           XIB_QPS_RESET, XIB_QPS_INIT);\r
1117                 if (err)\r
1118                         goto out;\r
1119                 cur_state = XIB_QPS_INIT;\r
1120         }\r
1121 \r
1122         err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);\r
1123 \r
1124 out:\r
1125         mutex_unlock(&qp->mutex);\r
1126         return err;\r
1127 }\r
1128 \r
1129 static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr)\r
1130 {\r
1131 \r
1132         enum ib_wr_opcode opcode = -1; //= wr->wr_type;\r
1133 \r
1134         switch (wr->wr_type) {\r
1135                 case WR_SEND: \r
1136                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_SEND_WITH_IMM : IB_WR_SEND;\r
1137                         break;\r
1138                 case WR_LSO:\r
1139                         opcode = IB_WR_LSO;\r
1140                         break;\r
1141                 case WR_RDMA_WRITE:     \r
1142                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_RDMA_WRITE_WITH_IMM : IB_WR_RDMA_WRITE;\r
1143                         break;\r
1144                 case WR_RDMA_READ:\r
1145                         opcode = IB_WR_RDMA_READ;\r
1146                         break;\r
1147                 case WR_COMPARE_SWAP:\r
1148                         opcode = IB_WR_ATOMIC_CMP_AND_SWP;\r
1149                         break;\r
1150                 case WR_FETCH_ADD:\r
1151                         opcode = IB_WR_ATOMIC_FETCH_AND_ADD;\r
1152                         break;\r
1153         }\r
1154         return opcode;\r
1155 }\r
1156 \r
1157 static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,\r
1158                             void *wqe)\r
1159 {\r
1160         enum ib_wr_opcode opcode = to_wr_opcode(wr);\r
1161         struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;\r
1162         struct mlx4_wqe_mlx_seg *mlx = wqe;\r
1163         struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);\r
1164         struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);\r
1165         __be16 pkey;\r
1166         int send_size;\r
1167         int header_size;\r
1168         int spc;\r
1169         u32 i;\r
1170 \r
1171         send_size = 0;\r
1172         for (i = 0; i < wr->num_ds; ++i)\r
1173                 send_size += wr->ds_array[i].length;\r
1174 \r
1175         ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);\r
1176 \r
1177         sqp->ud_header.lrh.service_level   =\r
1178                 (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);\r
1179         sqp->ud_header.lrh.destination_lid = ah->av.dlid;\r
1180         sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid & 0x7f);\r
1181         if (mlx4_ib_ah_grh_present(ah)) {\r
1182                 sqp->ud_header.grh.traffic_class =\r
1183                         (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);\r
1184                 sqp->ud_header.grh.flow_label    =\r
1185                         ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);\r
1186                 sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;\r
1187                 ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),\r
1188                                   ah->av.gid_index, &sqp->ud_header.grh.source_gid);\r
1189                 memcpy(sqp->ud_header.grh.destination_gid.raw,\r
1190                        ah->av.dgid, 16);\r
1191         }\r
1192 \r
1193         mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
1194         mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |\r
1195                                   (sqp->ud_header.lrh.destination_lid ==\r
1196                                    XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |\r
1197                                   (sqp->ud_header.lrh.service_level << 8));\r
1198         mlx->rlid   = sqp->ud_header.lrh.destination_lid;\r
1199 \r
1200         switch (opcode) {\r
1201         case IB_WR_SEND:\r
1202                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;\r
1203                 sqp->ud_header.immediate_present = 0;\r
1204                 break;\r
1205         case IB_WR_SEND_WITH_IMM:\r
1206                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;\r
1207                 sqp->ud_header.immediate_present = 1;\r
1208                 sqp->ud_header.immediate_data    = wr->immediate_data;\r
1209                 break;\r
1210         default:\r
1211                 return -EINVAL;\r
1212         }\r
1213 \r
1214         sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;\r
1215         if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)\r
1216                 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;\r
1217         sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED));\r
1218         if (!sqp->qp.ibqp.qp_num)\r
1219                 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);\r
1220         else\r
1221                 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey);\r
1222         sqp->ud_header.bth.pkey = pkey;\r
1223         sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;\r
1224         sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));\r
1225         sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?\r
1226                 cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;\r
1227         sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);\r
1228 \r
1229         header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);\r
1230 \r
1231 #if 0\r
1232         {\r
1233                 printk(KERN_ERR "built UD header of size %d:\n", header_size);\r
1234                 for (i = 0; i < header_size / 4; ++i) {\r
1235                         if (i % 8 == 0)\r
1236                                 printk("  [%02x] ", i * 4);\r
1237                         printk(" %08x",\r
1238                                be32_to_cpu(((__be32 *) sqp->header_buf)[i]));\r
1239                         if ((i + 1) % 8 == 0)\r
1240                                 printk("\n");\r
1241                 }\r
1242                 printk("\n");\r
1243         }\r
1244 #endif\r
1245 \r
1246         /*\r
1247          * Inline data segments may not cross a 64 byte boundary.  If\r
1248          * our UD header is bigger than the space available up to the\r
1249          * next 64 byte boundary in the WQE, use two inline data\r
1250          * segments to hold the UD header.\r
1251          */\r
1252         spc = MLX4_INLINE_ALIGN -\r
1253                 ((u32)(ULONG_PTR)(inl + 1) & (MLX4_INLINE_ALIGN - 1));\r
1254         if (header_size <= spc) {\r
1255                 inl->byte_count = cpu_to_be32(1 << 31 | header_size);\r
1256                 memcpy(inl + 1, sqp->header_buf, header_size);\r
1257                 i = 1;\r
1258         } else {\r
1259                 inl->byte_count = cpu_to_be32(1 << 31 | spc);\r
1260                 memcpy(inl + 1, sqp->header_buf, spc);\r
1261 \r
1262                 inl = (void*)((u8*)(inl + 1) + spc);\r
1263                 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);\r
1264                 /*\r
1265                  * Need a barrier here to make sure all the data is\r
1266                  * visible before the byte_count field is set.\r
1267                  * Otherwise the HCA prefetcher could grab the 64-byte\r
1268                  * chunk with this inline segment and get a valid (!=\r
1269                  * 0xffffffff) byte count but stale data, and end up\r
1270                  * generating a packet with bad headers.\r
1271                  *\r
1272                  * The first inline segment's byte_count field doesn't\r
1273                  * need a barrier, because it comes after a\r
1274                  * control/MLX segment and therefore is at an offset\r
1275                  * of 16 mod 64.\r
1276                  */\r
1277                 wmb();\r
1278                 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));\r
1279                 i = 2;\r
1280         }\r
1281 \r
1282         return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);\r
1283 }\r
1284 \r
1285 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)\r
1286 {\r
1287         unsigned cur;\r
1288         struct mlx4_ib_cq *cq;\r
1289 \r
1290         cur = wq->head - wq->tail;\r
1291         if (likely((int)cur + nreq < wq->max_post))\r
1292                 return 0;\r
1293 \r
1294         cq = to_mcq(ib_cq);\r
1295         spin_lock(&cq->lock);\r
1296         cur = wq->head - wq->tail;\r
1297         spin_unlock(&cq->lock);\r
1298 \r
1299         return (int)cur + nreq >= wq->max_post;\r
1300 }\r
1301 \r
1302 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,\r
1303                                           u64 remote_addr, __be32 rkey)\r
1304 {\r
1305         rseg->raddr    = cpu_to_be64(remote_addr);\r
1306         rseg->rkey     = rkey;\r
1307         rseg->reserved = 0;\r
1308 }\r
1309 \r
1310 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr)\r
1311 {\r
1312         if (wr->wr_type == WR_COMPARE_SWAP) {\r
1313                 aseg->swap_add = wr->remote_ops.atomic2;\r
1314                 aseg->compare  = wr->remote_ops.atomic1;\r
1315         } else {\r
1316                 aseg->swap_add = wr->remote_ops.atomic1;\r
1317                 aseg->compare  = 0;\r
1318         }\r
1319 \r
1320 }\r
1321 \r
1322 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,\r
1323                              ib_send_wr_t *wr)\r
1324 {\r
1325         memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av));\r
1326         dseg->dqpn = wr->dgrm.ud.remote_qp;\r
1327         dseg->qkey = wr->dgrm.ud.remote_qkey;\r
1328 }\r
1329 \r
1330 static void set_mlx_icrc_seg(void *dseg)\r
1331 {\r
1332         u32 *t = dseg;\r
1333         struct mlx4_wqe_inline_seg *iseg = dseg;\r
1334 \r
1335         t[1] = 0;\r
1336 \r
1337         /*\r
1338          * Need a barrier here before writing the byte_count field to\r
1339          * make sure that all the data is visible before the\r
1340          * byte_count field is set.  Otherwise, if the segment begins\r
1341          * a new cacheline, the HCA prefetcher could grab the 64-byte\r
1342          * chunk and get a valid (!= * 0xffffffff) byte count but\r
1343          * stale data, and end up sending the wrong data.\r
1344          */\r
1345         wmb();\r
1346 \r
1347         iseg->byte_count = cpu_to_be32((1 << 31) | 4);\r
1348 }\r
1349 \r
1350 static void set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
1351 {\r
1352         dseg->lkey       = cpu_to_be32(sg->lkey);\r
1353         dseg->addr       = cpu_to_be64(sg->vaddr);\r
1354 \r
1355         /*\r
1356          * Need a barrier here before writing the byte_count field to\r
1357          * make sure that all the data is visible before the\r
1358          * byte_count field is set.  Otherwise, if the segment begins\r
1359          * a new cacheline, the HCA prefetcher could grab the 64-byte\r
1360          * chunk and get a valid (!= * 0xffffffff) byte count but\r
1361          * stale data, and end up sending the wrong data.\r
1362          */\r
1363         wmb();\r
1364 \r
1365         dseg->byte_count = cpu_to_be32(sg->length);\r
1366 }\r
1367 \r
1368 static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
1369 {\r
1370         dseg->byte_count = cpu_to_be32(sg->length);\r
1371         dseg->lkey       = cpu_to_be32(sg->lkey);\r
1372         dseg->addr       = cpu_to_be64(sg->vaddr);\r
1373 }\r
1374 \r
1375 static int build_lso_seg(struct mlx4_lso_seg *wqe, ib_send_wr_t *wr,\r
1376                                                  struct mlx4_ib_qp *qp, unsigned *lso_seg_len)\r
1377  {\r
1378         unsigned halign = ALIGN(sizeof *wqe + wr->dgrm.ud.hlen, 16);\r
1379         void * ds;\r
1380         /*\r
1381         * This is a temporary limitation and will be removed in\r
1382          a forthcoming FW release:\r
1383         */\r
1384         if (unlikely(halign > 64))\r
1385                 return -EINVAL;\r
1386 \r
1387         if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&\r
1388                 wr->num_ds > qp->sq.max_gs - (halign >> 4)))\r
1389                 return -EINVAL;\r
1390         *lso_seg_len = halign;\r
1391          ds =  (u8 *) (void *) wqe + halign;\r
1392         \r
1393         //TODO: use memcpy from physical/virtual addr we can get directly from the ipoib at first data segmentmemcpy(wqe->header, , );\r
1394         memcpy(wqe->header, wr->dgrm.ud.header, wr->dgrm.ud.hlen);\r
1395         \r
1396         /* make sure LSO header is written before overwriting stamping */\r
1397         wmb();\r
1398 \r
1399         wqe->mss_hdr_size = cpu_to_be32((wr->dgrm.ud.mss - wr->dgrm.ud.hlen) << 16 |\r
1400                                                                         wr->dgrm.ud.hlen);\r
1401         \r
1402         return 0;\r
1403 }\r
1404 \r
1405 \r
1406 int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,\r
1407                       ib_send_wr_t **bad_wr)\r
1408 {\r
1409         enum ib_wr_opcode opcode;\r
1410         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1411         u8 *wqe /*, *wqe_start*/;\r
1412         struct mlx4_wqe_ctrl_seg *ctrl;\r
1413         struct mlx4_wqe_data_seg *dseg;\r
1414         unsigned long flags;\r
1415         int nreq;\r
1416         int err = 0;\r
1417         int ind;\r
1418         int size;\r
1419         unsigned seglen;\r
1420         int i;\r
1421         int j = 0;\r
1422 \r
1423         if (mlx4_is_barred(ibqp->device->dma_device))\r
1424                 return -EFAULT;\r
1425 \r
1426         spin_lock_irqsave(&qp->sq.lock, &flags);\r
1427 \r
1428         ind = qp->sq.head;\r
1429 \r
1430         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
1431                 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {\r
1432                         err = -ENOMEM;\r
1433                         if (bad_wr)\r
1434                                 *bad_wr = wr;\r
1435                         goto out;\r
1436                 }\r
1437 \r
1438                 if (unlikely(wr->num_ds > (u32)qp->sq.max_gs)) {\r
1439                         err = -EINVAL;\r
1440                         if (bad_wr)\r
1441                                 *bad_wr = wr;\r
1442                         goto out;\r
1443                 }\r
1444 \r
1445                 /*wqe_start = */\r
1446                 wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));\r
1447                 ctrl = (void*)wqe;\r
1448                 qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;\r
1449                 opcode = to_wr_opcode(wr);\r
1450 \r
1451                 ctrl->srcrb_flags =\r
1452                         (wr->send_opt & IB_SEND_OPT_SIGNALED ?\r
1453                          cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |\r
1454                         (wr->send_opt & IB_SEND_OPT_SOLICITED ?\r
1455                          cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |\r
1456                         (wr->send_opt & IB_SEND_OPT_TX_IP_CSUM ?\r
1457                          cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM) : 0) |\r
1458                         (wr->send_opt & IB_SEND_OPT_TX_TCP_UDP_CSUM ?\r
1459                          cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |\r
1460                         qp->sq_signal_bits;\r
1461 \r
1462                 if (opcode == IB_WR_SEND_WITH_IMM ||\r
1463                     opcode == IB_WR_RDMA_WRITE_WITH_IMM)\r
1464                         ctrl->imm = wr->immediate_data;\r
1465                 else\r
1466                         ctrl->imm = 0;\r
1467 \r
1468                 wqe += sizeof *ctrl;\r
1469                 size = sizeof *ctrl / 16;\r
1470 \r
1471                 switch (ibqp->qp_type) {\r
1472                 case IB_QPT_RC:\r
1473                 case IB_QPT_UC:\r
1474                         switch (opcode) {\r
1475                         case IB_WR_ATOMIC_CMP_AND_SWP:\r
1476                         case IB_WR_ATOMIC_FETCH_AND_ADD:\r
1477                                 set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
1478                                               wr->remote_ops.rkey);\r
1479                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
1480 \r
1481                                 set_atomic_seg((void*)wqe, wr);\r
1482                                 wqe  += sizeof (struct mlx4_wqe_atomic_seg);\r
1483 \r
1484                                 size += (sizeof (struct mlx4_wqe_raddr_seg) +\r
1485                                          sizeof (struct mlx4_wqe_atomic_seg)) / 16;\r
1486 \r
1487                                 break;\r
1488 \r
1489                         case IB_WR_RDMA_READ:\r
1490                         case IB_WR_RDMA_WRITE:\r
1491                         case IB_WR_RDMA_WRITE_WITH_IMM:\r
1492                                 set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
1493                                               wr->remote_ops.rkey);\r
1494                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
1495                                 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;\r
1496                                 break;\r
1497 \r
1498                         default:\r
1499                                 /* No extra segments required for sends */\r
1500                                 break;\r
1501                         }\r
1502                         break;\r
1503 \r
1504                 case IB_QPT_UD:\r
1505                         set_datagram_seg((void*)wqe, wr);\r
1506                         wqe  += sizeof (struct mlx4_wqe_datagram_seg);\r
1507                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;\r
1508                         if (wr->wr_type == WR_LSO) {\r
1509                                 err = build_lso_seg((struct mlx4_lso_seg *)(void *)wqe, wr, qp, &seglen);\r
1510                                 if (unlikely(err)) {\r
1511                                         *bad_wr = wr;\r
1512                                         goto out;\r
1513                                 }\r
1514 #define I64_CACHE_LINE          64\r
1515 #define OPCODE_INVALID_BIT      6\r
1516                                 // WQE bug treatment for LSO case\r
1517                                 // If LSO segment is large enough (exceeds one cache block in size)\r
1518                                 // or if it small enough such that S/G element will be placed within the same cache block,\r
1519                                 // OPCODE_INVALID_BIT should be on in order to reread this WQE \r
1520                                 // More correct solution is \r
1521                                 //      (unlikely (seglen % I64_CACHE_LINE || seglen % (I64_CACHE_LINE-2) )) \r
1522                                 // but it will not be used in order to reduce calculations within Datapath\r
1523                                 // If LSO segment consists of 15 DWORDS, S/G elements block will nevertheless start from \r
1524                                 // the next cache block\r
1525                                 if (unlikely (seglen < I64_CACHE_LINE-4  || seglen > I64_CACHE_LINE ))\r
1526                                         ctrl->owner_opcode |= cpu_to_be32 ( 1 << OPCODE_INVALID_BIT);\r
1527                                 wqe  += seglen;\r
1528                                 size += seglen / 16;\r
1529                                 j=1;\r
1530                         }\r
1531                         break;\r
1532 \r
1533                 case IB_QPT_SMI:\r
1534                 case IB_QPT_GSI:\r
1535                         err = build_mlx_header(to_msqp(qp), wr, ctrl);\r
1536                         if (err < 0) {\r
1537                                 if (bad_wr)\r
1538                                         *bad_wr = wr;\r
1539                                 goto out;\r
1540                         }\r
1541                         wqe  += err;\r
1542                         size += err / 16;\r
1543 \r
1544                         err = 0;\r
1545                         break;\r
1546 \r
1547                 default:\r
1548                         break;\r
1549                 }\r
1550 \r
1551                 /*\r
1552                  * Write data segments in reverse order, so as to\r
1553                  * overwrite cacheline stamp last within each\r
1554                  * cacheline.  This avoids issues with WQE\r
1555                  * prefetching.\r
1556                  */\r
1557 \r
1558                 dseg = (void*)wqe;\r
1559                 dseg += wr->num_ds - 1;\r
1560                 size += wr->num_ds * (sizeof (struct mlx4_wqe_data_seg) / 16);\r
1561 \r
1562                 /* Add one more inline data segment for ICRC for MLX sends */\r
1563                 if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||\r
1564                              qp->ibqp.qp_type == IB_QPT_GSI)) {\r
1565                         set_mlx_icrc_seg(dseg + 1);\r
1566                         size += sizeof (struct mlx4_wqe_data_seg) / 16;\r
1567                 }\r
1568 \r
1569                 for (i = wr->num_ds - 1; i >= 0; --i, --dseg)\r
1570                         set_data_seg(dseg, wr->ds_array + i);\r
1571 \r
1572                 ctrl->fence_size = (u8)((wr->send_opt & IB_SEND_OPT_FENCE ?\r
1573                                     MLX4_WQE_CTRL_FENCE : 0) | size);\r
1574 \r
1575                 /*\r
1576                  * Make sure descriptor is fully written before\r
1577                  * setting ownership bit (because HW can start\r
1578                  * executing as soon as we do).\r
1579                  */\r
1580                 wmb();\r
1581 \r
1582                 if (opcode < 0 || opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {\r
1583                         err = -EINVAL;\r
1584                         goto out;\r
1585                 }\r
1586 \r
1587                 ctrl->owner_opcode = mlx4_ib_opcode[opcode] |\r
1588                         (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);\r
1589 \r
1590                 /*\r
1591                  * We can improve latency by not stamping the last\r
1592                  * send queue WQE until after ringing the doorbell, so\r
1593                  * only stamp here if there are still more WQEs to post.\r
1594                  */\r
1595                 if (wr->p_next)\r
1596                         stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &\r
1597                                        (qp->sq.wqe_cnt - 1));\r
1598 \r
1599                 ++ind;\r
1600         }\r
1601 \r
1602 //printk("ctrl->srcrb_flags & MLX4_WQE_CTRL_TCP_UDP_CSUM =%d \n", ctrl->srcrb_flags & cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM ));\r
1603 \r
1604 out:\r
1605 //WQE printout\r
1606 #if 0   \r
1607         if (j) {\r
1608                 u32 *ds = (u32 *) wqe_start;\r
1609                 printk("WQE DUMP:\n");cq.c.their\r
1610                 for (j = 0; j < ctrl->fence_size*4; ++j) {\r
1611                         printk("%d %08x\n", j,be32_to_cpu(*ds));\r
1612                         ++ds;\r
1613                 }\r
1614         }\r
1615 #endif  \r
1616         if (likely(nreq)) {\r
1617                 qp->sq.head += nreq;\r
1618 \r
1619                 /*\r
1620                  * Make sure that descriptors are written before\r
1621                  * doorbell record.\r
1622                  */\r
1623                 wmb();\r
1624 \r
1625                 writel(qp->doorbell_qpn,\r
1626                        (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);\r
1627 \r
1628 #if 0\r
1629                 if (qp->mqp.qpn == 0x41)\r
1630                         cl_dbg_out( "[MLX4_BUS] mlx4_ib_post_send : qtype %d, qpn %#x, nreq %d, sq.head %#x, wqe_ix %d, db %p \n", \r
1631                                 ibqp->qp_type, qp->mqp.qpn, nreq, qp->sq.head, ind, \r
1632                                 (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL );\r
1633 #endif          \r
1634                 /*\r
1635                  * Make sure doorbells don't leak out of SQ spinlock\r
1636                  * and reach the HCA out of order.\r
1637                  */\r
1638                 mmiowb();\r
1639 \r
1640                 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &\r
1641                                (qp->sq.wqe_cnt - 1));\r
1642         }\r
1643 \r
1644         spin_unlock_irqrestore(&qp->sq.lock, flags);\r
1645 \r
1646         return err;\r
1647 }\r
1648 \r
1649 int mlx4_ib_post_recv(struct ib_qp *ibqp, ib_recv_wr_t *wr,\r
1650                       ib_recv_wr_t **bad_wr)\r
1651 {\r
1652         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1653         struct mlx4_wqe_data_seg *scat;\r
1654         unsigned long flags;\r
1655         int err = 0;\r
1656         int nreq;\r
1657         int ind;\r
1658         int i;\r
1659 \r
1660         if (mlx4_is_barred(ibqp->device->dma_device))\r
1661                 return -EFAULT;\r
1662 \r
1663         spin_lock_irqsave(&qp->rq.lock, &flags);\r
1664 \r
1665         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);\r
1666 \r
1667         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
1668                 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {\r
1669                         err = -ENOMEM;\r
1670                         if (bad_wr)\r
1671                                 *bad_wr = wr;\r
1672                         goto out;\r
1673                 }\r
1674 \r
1675                 if (unlikely(wr->num_ds > (u32)qp->rq.max_gs)) {\r
1676                         err = -EINVAL;\r
1677                         if (bad_wr)\r
1678                                 *bad_wr = wr;\r
1679                         goto out;\r
1680                 }\r
1681 \r
1682                 scat = get_recv_wqe(qp, ind);\r
1683 \r
1684                 for (i = 0; i < (int)wr->num_ds; ++i)\r
1685                         __set_data_seg(scat + i, wr->ds_array + i);\r
1686 \r
1687                 if (i < qp->rq.max_gs) {\r
1688                         scat[i].byte_count = 0;\r
1689                         scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);\r
1690                         scat[i].addr       = 0;\r
1691                 }\r
1692 \r
1693                 qp->rq.wrid[ind] = wr->wr_id;\r
1694 \r
1695                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);\r
1696         }\r
1697 \r
1698 out:\r
1699         if (likely(nreq)) {\r
1700                 qp->rq.head += nreq;\r
1701 \r
1702                 /*\r
1703                  * Make sure that descriptors are written before\r
1704                  * doorbell record.\r
1705                  */\r
1706                 wmb();\r
1707 \r
1708                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);\r
1709 \r
1710 #if 0\r
1711                 if (qp->mqp.qpn == 0x41)\r
1712                         cl_dbg_out( "[MLX4_BUS] mlx4_ib_post_recv : qtype %d, qpn %#x, nreq %d, rq.head %#x, wqe_ix %d, db_obj %p, db %p \n", \r
1713                                 ibqp->qp_type, qp->mqp.qpn, nreq, qp->rq.head, ind, &qp->db, qp->db.db );\r
1714 #endif          \r
1715         }\r
1716 \r
1717         spin_unlock_irqrestore(&qp->rq.lock, flags);\r
1718 \r
1719         return err;\r
1720 }\r
1721 \r
1722 static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)\r
1723 {\r
1724         switch (mlx4_state) {\r
1725         case MLX4_QP_STATE_RST:      return XIB_QPS_RESET;\r
1726         case MLX4_QP_STATE_INIT:     return XIB_QPS_INIT;\r
1727         case MLX4_QP_STATE_RTR:      return XIB_QPS_RTR;\r
1728         case MLX4_QP_STATE_RTS:      return XIB_QPS_RTS;\r
1729         case MLX4_QP_STATE_SQ_DRAINING:\r
1730         case MLX4_QP_STATE_SQD:      return XIB_QPS_SQD;\r
1731         case MLX4_QP_STATE_SQER:     return XIB_QPS_SQE;\r
1732         case MLX4_QP_STATE_ERR:      return XIB_QPS_ERR;\r
1733         default:                     return -1;\r
1734         }\r
1735 }\r
1736 \r
1737 static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)\r
1738 {\r
1739         switch (mlx4_mig_state) {\r
1740         case MLX4_QP_PM_ARMED:          return IB_MIG_ARMED;\r
1741         case MLX4_QP_PM_REARM:          return IB_MIG_REARM;\r
1742         case MLX4_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;\r
1743         default: return -1;\r
1744         }\r
1745 }\r
1746 \r
1747 static int to_ib_qp_access_flags(int mlx4_flags)\r
1748 {\r
1749         int ib_flags = 0;\r
1750 \r
1751         if (mlx4_flags & MLX4_QP_BIT_RRE)\r
1752                 ib_flags |= IB_ACCESS_REMOTE_READ;\r
1753         if (mlx4_flags & MLX4_QP_BIT_RWE)\r
1754                 ib_flags |= IB_ACCESS_REMOTE_WRITE;\r
1755         if (mlx4_flags & MLX4_QP_BIT_RAE)\r
1756                 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;\r
1757 \r
1758         return ib_flags;\r
1759 }\r
1760 \r
1761 static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,\r
1762                                 struct mlx4_qp_path *path)\r
1763 {\r
1764         memset(ib_ah_attr, 0, sizeof *ib_ah_attr);\r
1765         ib_ah_attr->port_num      = path->sched_queue & 0x40 ? 2 : 1;\r
1766 \r
1767         if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)\r
1768                 return;\r
1769 \r
1770         ib_ah_attr->dlid          = be16_to_cpu(path->rlid);\r
1771         ib_ah_attr->sl            = (path->sched_queue >> 2) & 0xf;\r
1772         ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;\r
1773         ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;\r
1774         ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;\r
1775         if (ib_ah_attr->ah_flags) {\r
1776                 ib_ah_attr->grh.sgid_index = path->mgid_index;\r
1777                 ib_ah_attr->grh.hop_limit  = path->hop_limit;\r
1778                 ib_ah_attr->grh.traffic_class =\r
1779                         (u8)((be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff);\r
1780                 ib_ah_attr->grh.flow_label =\r
1781                         be32_to_cpu(path->tclass_flowlabel) & 0xfffff;\r
1782                 memcpy(ib_ah_attr->grh.dgid.raw,\r
1783                         path->rgid, sizeof ib_ah_attr->grh.dgid.raw);\r
1784         }\r
1785 }\r
1786 \r
1787 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,\r
1788                      struct ib_qp_init_attr *qp_init_attr)\r
1789 {\r
1790         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
1791         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1792         struct mlx4_qp_context context;\r
1793         int mlx4_state;\r
1794         int err = 0;\r
1795 \r
1796         UNUSED_PARAM(qp_attr_mask);\r
1797 \r
1798         if (mlx4_is_barred(dev->dev))\r
1799                 return -EFAULT;\r
1800 \r
1801         mutex_lock(&qp->mutex);\r
1802         \r
1803         if (qp->state == XIB_QPS_RESET) {\r
1804                 qp_attr->qp_state = XIB_QPS_RESET;\r
1805                 goto done;\r
1806         }\r
1807 \r
1808         err = mlx4_qp_query(dev->dev, &qp->mqp, &context);\r
1809         if (err) {\r
1810                 err = -EINVAL;\r
1811                 goto out;\r
1812         }\r
1813 \r
1814         mlx4_state = be32_to_cpu(context.flags) >> 28;\r
1815 \r
1816         qp_attr->qp_state            = to_ib_qp_state(mlx4_state);\r
1817         qp_attr->path_mtu            = context.mtu_msgmax >> 5;\r
1818         qp_attr->path_mig_state      =\r
1819                 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);\r
1820         qp_attr->qkey                = be32_to_cpu(context.qkey);\r
1821         qp_attr->rq_psn              = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;\r
1822         qp_attr->sq_psn              = be32_to_cpu(context.next_send_psn) & 0xffffff;\r
1823         qp_attr->dest_qp_num         = be32_to_cpu(context.remote_qpn) & 0xffffff;\r
1824         qp_attr->qp_access_flags     =\r
1825                 to_ib_qp_access_flags(be32_to_cpu(context.params2));\r
1826 \r
1827         if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {\r
1828                 to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);\r
1829                 to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);\r
1830                 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;\r
1831                 qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;\r
1832         }\r
1833 \r
1834         qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;\r
1835         if (qp_attr->qp_state == XIB_QPS_INIT)\r
1836                 qp_attr->port_num = qp->port;\r
1837         else\r
1838                 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;\r
1839 \r
1840         /* qp_attr->en_sqd_async_notify is only applicable in modify qp */\r
1841         qp_attr->sq_draining = (u8)(mlx4_state == MLX4_QP_STATE_SQ_DRAINING);\r
1842 \r
1843         qp_attr->max_rd_atomic = (u8)(1 << ((be32_to_cpu(context.params1) >> 21) & 0x7));\r
1844 \r
1845         qp_attr->max_dest_rd_atomic =\r
1846                 (u8)(1 << ((be32_to_cpu(context.params2) >> 21) & 0x7));\r
1847         qp_attr->min_rnr_timer      =\r
1848                 (u8)((be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f);\r
1849         qp_attr->timeout            = context.pri_path.ackto >> 3;\r
1850         qp_attr->retry_cnt          = (u8)((be32_to_cpu(context.params1) >> 16) & 0x7);\r
1851         qp_attr->rnr_retry          = (u8)((be32_to_cpu(context.params1) >> 13) & 0x7);\r
1852         qp_attr->alt_timeout        = context.alt_path.ackto >> 3;\r
1853 \r
1854 done:\r
1855         qp_attr->cur_qp_state        = qp_attr->qp_state;\r
1856         qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;\r
1857         qp_attr->cap.max_recv_sge    = qp->rq.max_gs;\r
1858 \r
1859         if (!ibqp->p_uctx) {\r
1860                 qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;\r
1861                 qp_attr->cap.max_send_sge = qp->sq.max_gs;\r
1862         } else {\r
1863                 qp_attr->cap.max_send_wr  = 0;\r
1864                 qp_attr->cap.max_send_sge = 0;\r
1865         }\r
1866 \r
1867         /*\r
1868          * We don't support inline sends for kernel QPs (yet), and we\r
1869          * don't know what userspace's value should be.\r
1870          */\r
1871         qp_attr->cap.max_inline_data = 0;\r
1872 \r
1873         qp_init_attr->cap            = qp_attr->cap;\r
1874 \r
1875 out:\r
1876         mutex_unlock(&qp->mutex);\r
1877         return err;\r
1878 }\r
1879 \r