[mlx4] Fix error path on create qp.
[mirror/winof/.git] / hw / mlx4 / kernel / bus / ib / qp.c
1 /*\r
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.\r
3  *\r
4  * This software is available to you under a choice of one of two\r
5  * licenses.  You may choose to be licensed under the terms of the GNU\r
6  * General Public License (GPL) Version 2, available from the file\r
7  * COPYING in the main directory of this source tree, or the\r
8  * OpenIB.org BSD license below:\r
9  *\r
10  *     Redistribution and use in source and binary forms, with or\r
11  *     without modification, are permitted provided that the following\r
12  *     conditions are met:\r
13  *\r
14  *      - Redistributions of source code must retain the above\r
15  *        copyright notice, this list of conditions and the following\r
16  *        disclaimer.\r
17  *\r
18  *      - Redistributions in binary form must reproduce the above\r
19  *        copyright notice, this list of conditions and the following\r
20  *        disclaimer in the documentation and/or other materials\r
21  *        provided with the distribution.\r
22  *\r
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
30  * SOFTWARE.\r
31  */\r
32 \r
33 #include "mlx4_ib.h"\r
34 #include "ib_cache.h"\r
35 #include "ib_pack.h"\r
36 #include "qp.h"\r
37 #include "user.h"\r
38 \r
39 enum {\r
40         MLX4_IB_ACK_REQ_FREQ    = 8,\r
41 };\r
42 \r
43 enum {\r
44         MLX4_IB_DEFAULT_SCHED_QUEUE     = 0x83,\r
45         MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f\r
46 };\r
47 \r
48 enum {\r
49         /*\r
50          * Largest possible UD header: send with GRH and immediate data.\r
51          */\r
52         MLX4_IB_UD_HEADER_SIZE          = 72\r
53 };\r
54 \r
55 struct mlx4_ib_sqp {\r
56         struct mlx4_ib_qp       qp;\r
57         int                     pkey_index;\r
58         u32                     qkey;\r
59         u32                     send_psn;\r
60         struct ib_ud_header     ud_header;\r
61         u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];\r
62 };\r
63 \r
64 enum {\r
65         MLX4_IB_MIN_SQ_STRIDE = 6\r
66 };\r
67 \r
68 static const __be32 mlx4_ib_opcode[] = {\r
69         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),         /*      [IB_WR_RDMA_WRITE]                      */\r
70         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),     /*      [IB_WR_RDMA_WRITE_WITH_IMM] */\r
71         __constant_cpu_to_be32(MLX4_OPCODE_SEND),                       /*      [IB_WR_SEND]                            */\r
72         __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),           /*      [IB_WR_SEND_WITH_IMM]           */\r
73         __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),          /*      [IB_WR_RDMA_READ]                       */\r
74         __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),          /*      [IB_WR_ATOMIC_CMP_AND_SWP]      */\r
75         __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),          /*      [IB_WR_ATOMIC_FETCH_AND_ADD]*/\r
76         __constant_cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6))                              /* [IB_WR_LSO]                                  */\r
77 };\r
78 \r
79 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)\r
80 {\r
81         return container_of(mqp, struct mlx4_ib_sqp, qp);\r
82 }\r
83 \r
84 static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
85 {\r
86         return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
87                 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;\r
88 }\r
89 \r
90 static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
91 {\r
92         return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
93                 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;\r
94 }\r
95 \r
96 static void *get_wqe(struct mlx4_ib_qp *qp, int offset)\r
97 {\r
98         if (qp->buf.nbufs == 1)\r
99                 return qp->buf.u.direct.buf + offset;\r
100         else\r
101                 return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +\r
102                         (offset & (PAGE_SIZE - 1));\r
103 }\r
104 \r
105 static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)\r
106 {\r
107         return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));\r
108 }\r
109 \r
110 static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)\r
111 {\r
112         return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));\r
113 }\r
114 \r
115 /*\r
116  * Stamp a SQ WQE so that it is invalid if prefetched by marking the\r
117  * first four bytes of every 64 byte chunk with 0xffffffff, except for\r
118  * the very first chunk of the WQE.\r
119  */\r
120 static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)\r
121 {\r
122         u32 *wqe = get_send_wqe(qp, n);\r
123         int i;\r
124 \r
125         for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)\r
126                 wqe[i] = 0xffffffff;\r
127 }\r
128 \r
129 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)\r
130 {\r
131         ib_event_rec_t event;\r
132         struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;\r
133 \r
134         if (type == MLX4_EVENT_TYPE_PATH_MIG)\r
135                 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;\r
136 \r
137         switch (type) {\r
138         case MLX4_EVENT_TYPE_PATH_MIG:\r
139                 event.type = IB_EVENT_PATH_MIG;\r
140                 break;\r
141         case MLX4_EVENT_TYPE_COMM_EST:\r
142                 event.type = IB_EVENT_COMM_EST;\r
143                 break;\r
144         case MLX4_EVENT_TYPE_SQ_DRAINED:\r
145                 event.type = IB_EVENT_SQ_DRAINED;\r
146                 break;\r
147         case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:\r
148                 event.type = IB_EVENT_QP_LAST_WQE_REACHED;\r
149                 break;\r
150         case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:\r
151                 event.type = IB_EVENT_QP_FATAL;\r
152                 break;\r
153         case MLX4_EVENT_TYPE_PATH_MIG_FAILED:\r
154                 event.type = IB_EVENT_PATH_MIG_ERR;\r
155                 break;\r
156         case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:\r
157                 event.type = IB_EVENT_QP_REQ_ERR;\r
158                 break;\r
159         case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:\r
160                 event.type = IB_EVENT_QP_ACCESS_ERR;\r
161                 break;\r
162         default:\r
163                 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "\r
164                        "on QP %06x\n", type, qp->qpn);\r
165                 return;\r
166         }\r
167 \r
168         event.context = ibqp->qp_context;\r
169         ibqp->event_handler(&event);\r
170 }\r
171 \r
172 static int send_wqe_overhead(enum ib_qp_type type, u32 flags)\r
173 {\r
174         /*\r
175          * UD WQEs must have a datagram segment.\r
176          * RC and UC WQEs might have a remote address segment.\r
177          * MLX WQEs need two extra inline data segments (for the UD\r
178          * header and space for the ICRC).\r
179          */\r
180         switch (type) {\r
181         case IB_QPT_UD:\r
182                 return sizeof (struct mlx4_wqe_ctrl_seg)  +\r
183                         sizeof (struct mlx4_wqe_datagram_seg) +\r
184                         ((flags & MLX4_IB_QP_LSO) ? 64 : 0);\r
185         case IB_QPT_UC:\r
186                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
187                         sizeof (struct mlx4_wqe_raddr_seg);\r
188         case IB_QPT_RC:\r
189                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
190                         sizeof (struct mlx4_wqe_atomic_seg) +\r
191                         sizeof (struct mlx4_wqe_raddr_seg);\r
192         case IB_QPT_SMI:\r
193         case IB_QPT_GSI:\r
194                 return sizeof (struct mlx4_wqe_ctrl_seg) +\r
195                         ALIGN(MLX4_IB_UD_HEADER_SIZE +\r
196                               DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,\r
197                                            MLX4_INLINE_ALIGN) *\r
198                               sizeof (struct mlx4_wqe_inline_seg),\r
199                               sizeof (struct mlx4_wqe_data_seg)) +\r
200                         ALIGN(4 +\r
201                               sizeof (struct mlx4_wqe_inline_seg),\r
202                               sizeof (struct mlx4_wqe_data_seg));\r
203         default:\r
204                 return sizeof (struct mlx4_wqe_ctrl_seg);\r
205         }\r
206 }\r
207 \r
208 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
209                        int is_user, int has_srq, struct mlx4_ib_qp *qp)\r
210 {\r
211         /* Sanity check RQ size before proceeding */\r
212         if ((int)cap->max_recv_wr  > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||\r
213             (int)cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))\r
214                 return -EINVAL;\r
215 \r
216         if (has_srq) {\r
217                 /* QPs attached to an SRQ should have no RQ */\r
218                 if (cap->max_recv_wr)\r
219                         return -EINVAL;\r
220 \r
221                 qp->rq.wqe_cnt = qp->rq.max_gs = 0;\r
222         } else {\r
223                 /* HW requires >= 1 RQ entry with >= 1 gather entry */\r
224                 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))\r
225                         return -EINVAL;\r
226 \r
227                 qp->rq.wqe_cnt   = roundup_pow_of_two(max(1U, cap->max_recv_wr));\r
228                 qp->rq.max_gs    = roundup_pow_of_two(max(1U, cap->max_recv_sge));\r
229                 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));\r
230         }\r
231 \r
232         /* leave userspace return values as they were, so as not to break ABI */\r
233         if (is_user) {\r
234                 cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;\r
235                 cap->max_recv_sge = qp->rq.max_gs;\r
236         } else {\r
237                 cap->max_recv_wr  = qp->rq.max_post =\r
238                         min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);\r
239                 cap->max_recv_sge = min(qp->rq.max_gs,\r
240                                         min(dev->dev->caps.max_sq_sg,\r
241                                         dev->dev->caps.max_rq_sg));\r
242         }\r
243         /* We don't support inline sends for kernel QPs (yet) */\r
244 \r
245         return 0;\r
246 }\r
247 \r
248 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
249                               enum ib_qp_type type, struct mlx4_ib_qp *qp)\r
250 {\r
251         /* Sanity check SQ size before proceeding */\r
252         if ((int)cap->max_send_wr       > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE  ||\r
253             (int)cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||\r
254             (int)cap->max_inline_data + send_wqe_overhead(type, qp->flags) +\r
255             (int)sizeof(struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)\r
256                 return -EINVAL;\r
257 \r
258         /*\r
259          * For MLX transport we need 2 extra S/G entries:\r
260          * one for the header and one for the checksum at the end\r
261          */\r
262         if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&\r
263             (int)cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)\r
264                 return -EINVAL;\r
265 \r
266         qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *\r
267                                                         sizeof (struct mlx4_wqe_data_seg),\r
268                                                         cap->max_inline_data +\r
269                                                         sizeof (struct mlx4_wqe_inline_seg)) +\r
270                                                     send_wqe_overhead(type,qp->flags)));\r
271         qp->sq.wqe_shift = max(MLX4_IB_SQ_MIN_WQE_SHIFT, qp->sq.wqe_shift);\r
272         qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type,qp->flags)) /\r
273                 sizeof (struct mlx4_wqe_data_seg);\r
274 \r
275         /*\r
276          * We need to leave 2 KB + 1 WQE of headroom in the SQ to\r
277          * allow HW to prefetch.\r
278          */\r
279         qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);\r
280         qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);\r
281 \r
282         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
283                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
284         if (qp->rq.wqe_shift > qp->sq.wqe_shift) {\r
285                 qp->rq.offset = 0;\r
286                 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;\r
287         } else {\r
288                 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;\r
289                 qp->sq.offset = 0;\r
290         }\r
291 \r
292         cap->max_send_wr = qp->sq.max_post =\r
293                 min(qp->sq.wqe_cnt - qp->sq_spare_wqes,\r
294                         dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE);\r
295         cap->max_send_sge = min(qp->sq.max_gs,\r
296                                 min(dev->dev->caps.max_sq_sg,\r
297                                         dev->dev->caps.max_rq_sg));\r
298         /* We don't support inline sends for kernel QPs (yet) */\r
299         cap->max_inline_data = 0;\r
300 \r
301         return 0;\r
302 }\r
303 \r
304 static int set_user_sq_size(struct mlx4_ib_dev *dev,\r
305                             struct mlx4_ib_qp *qp,\r
306                             struct mlx4_ib_create_qp *ucmd)\r
307 {\r
308         /* Sanity check SQ size before proceeding */\r
309         if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes       ||\r
310             ucmd->log_sq_stride >\r
311                 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||\r
312             ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)\r
313                 return -EINVAL;\r
314 \r
315         qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;\r
316         qp->sq.wqe_shift = ucmd->log_sq_stride;\r
317 \r
318         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
319                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
320 \r
321         return 0;\r
322 }\r
323 \r
324 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,\r
325                             struct ib_qp_init_attr *init_attr,\r
326                             struct ib_udata *udata, u32 sqpn, struct mlx4_ib_qp *qp)\r
327 {\r
328         int err;\r
329         BOOLEAN range_allocated = FALSE;\r
330 \r
331         mutex_init(&qp->mutex);\r
332         spin_lock_init(&qp->sq.lock);\r
333         spin_lock_init(&qp->rq.lock);\r
334 \r
335         qp->state        = XIB_QPS_RESET;\r
336         qp->atomic_rd_en = 0;\r
337         qp->resp_depth   = 0;\r
338 \r
339         qp->rq.head         = 0;\r
340         qp->rq.tail         = 0;\r
341         qp->sq.head         = 0;\r
342         qp->sq.tail         = 0;\r
343 \r
344         err = set_rq_size(dev, &init_attr->cap, !!pd->p_uctx, !!init_attr->srq, qp);\r
345         if (err)\r
346                 goto err;\r
347 \r
348         if (pd->p_uctx) {\r
349                 struct mlx4_ib_create_qp ucmd;\r
350 \r
351                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {\r
352                         err = -EFAULT;\r
353                         goto err;\r
354                 }\r
355 \r
356                 qp->sq_no_prefetch = ucmd.sq_no_prefetch;\r
357 \r
358                 err = set_user_sq_size(dev, qp, &ucmd);\r
359                 if (err)\r
360                         goto err;\r
361 \r
362                 qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,\r
363                                        qp->buf_size, 0, FALSE);\r
364                 if (IS_ERR(qp->umem)) {\r
365                         err = PTR_ERR(qp->umem);\r
366                         goto err;\r
367                 }\r
368 \r
369                 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),\r
370                                     ilog2(qp->umem->page_size), &qp->mtt);\r
371                 if (err)\r
372                         goto err_buf;\r
373 \r
374                 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);\r
375                 if (err)\r
376                         goto err_mtt;\r
377 \r
378                 if (!init_attr->srq) {\r
379                         err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx),\r
380                                                   ucmd.db_addr, &qp->db);\r
381                         if (err)\r
382                                 goto err_mtt;\r
383                 }\r
384         } else {\r
385                 qp->sq_no_prefetch = 0;\r
386                 \r
387                 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)\r
388                         qp->flags |= MLX4_IB_QP_LSO;\r
389                 \r
390                 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);\r
391                 if (err)\r
392                         goto err;\r
393 \r
394                 if (!init_attr->srq) {\r
395                         err = mlx4_ib_db_alloc(dev, &qp->db, 0);\r
396                         if (err)\r
397                                 goto err;\r
398 \r
399                         *qp->db.db = 0;\r
400                 }\r
401 \r
402                 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {\r
403                         err = -ENOMEM;\r
404                         goto err_db;\r
405                 }\r
406 \r
407                 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,\r
408                                     &qp->mtt);\r
409                 if (err)\r
410                         goto err_buf;\r
411 \r
412                 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);\r
413                 if (err)\r
414                         goto err_mtt;\r
415 \r
416                 if (qp->sq.wqe_cnt) {\r
417                         qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
418                         if (!qp->sq.wrid) {\r
419                                 err = -ENOMEM;\r
420                                 goto err_wrid;\r
421                         }\r
422                 }                       \r
423 \r
424                 if (qp->rq.wqe_cnt) {\r
425                         qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
426                         if (!qp->rq.wrid) {\r
427                                 err = -ENOMEM;\r
428                                 goto err_wrid;\r
429                         }\r
430                 }\r
431         }\r
432 \r
433         if (!sqpn) {\r
434                         err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn);\r
435                 if (err)\r
436                         goto err_wrid;\r
437                 range_allocated = TRUE;\r
438                 \r
439         }\r
440 \r
441         err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);\r
442         if (err)\r
443                 goto err_range;\r
444 \r
445         /*\r
446          * Hardware wants QPN written in big-endian order (after\r
447          * shifting) for send doorbell.  Precompute this value to save\r
448          * a little bit when posting sends.\r
449          */\r
450         qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);\r
451 \r
452         if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)\r
453                 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
454         else\r
455                 qp->sq_signal_bits = 0;\r
456 \r
457         qp->mqp.event = mlx4_ib_qp_event;\r
458 \r
459         return 0;\r
460 \r
461 err_range:\r
462         if (range_allocated)\r
463                 mlx4_qp_release_range(dev->dev, sqpn, 1);\r
464 \r
465 err_wrid:\r
466         if (pd->p_uctx) {\r
467                 if (!init_attr->srq)\r
468                         mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx),\r
469                                               &qp->db);\r
470         } else {\r
471                 if (qp->sq.wrid)\r
472                         kfree(qp->sq.wrid);\r
473                 if (qp->rq.wrid)\r
474                         kfree(qp->rq.wrid);\r
475         }\r
476 \r
477 err_mtt:\r
478         mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
479 \r
480 err_buf:\r
481         if (pd->p_uctx)\r
482                 ib_umem_release(qp->umem);\r
483         else\r
484                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
485 \r
486 err_db:\r
487         if (!pd->p_uctx && !init_attr->srq)\r
488                 mlx4_ib_db_free(dev, &qp->db);\r
489 \r
490 err:\r
491         return err;\r
492 }\r
493 \r
494 static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)\r
495 {\r
496         switch (state) {\r
497         case XIB_QPS_RESET:     return MLX4_QP_STATE_RST;\r
498         case XIB_QPS_INIT:      return MLX4_QP_STATE_INIT;\r
499         case XIB_QPS_RTR:       return MLX4_QP_STATE_RTR;\r
500         case XIB_QPS_RTS:       return MLX4_QP_STATE_RTS;\r
501         case XIB_QPS_SQD:       return MLX4_QP_STATE_SQD;\r
502         case XIB_QPS_SQE:       return MLX4_QP_STATE_SQER;\r
503         case XIB_QPS_ERR:       return MLX4_QP_STATE_ERR;\r
504         default:                return -1;\r
505         }\r
506 }\r
507 \r
508 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
509 {\r
510         if (send_cq == recv_cq)\r
511                 spin_lock_irq(&send_cq->lock);\r
512         else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
513                 spin_lock_irq(&send_cq->lock);\r
514                 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);\r
515         } else {\r
516                 spin_lock_irq(&recv_cq->lock);\r
517                 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);\r
518         }\r
519 }\r
520 \r
521 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
522 {\r
523         if (send_cq == recv_cq)\r
524                 spin_unlock_irq(&send_cq->lock);\r
525         else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
526                 spin_unlock(&recv_cq->lock);\r
527                 spin_unlock_irq(&send_cq->lock);\r
528         } else {\r
529                 spin_unlock(&send_cq->lock);\r
530                 spin_unlock_irq(&recv_cq->lock);\r
531         }\r
532 }\r
533 \r
534 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,\r
535                               int is_user)\r
536 {\r
537         struct mlx4_ib_cq *send_cq, *recv_cq;\r
538 \r
539         if (qp->state != XIB_QPS_RESET)\r
540                 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),\r
541                                    MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))\r
542                         printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",\r
543                                qp->mqp.qpn);\r
544 \r
545         send_cq = to_mcq(qp->ibqp.send_cq);\r
546         recv_cq = to_mcq(qp->ibqp.recv_cq);\r
547 \r
548         mlx4_ib_lock_cqs(send_cq, recv_cq);\r
549 \r
550         if (!is_user) {\r
551                 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,\r
552                                  qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);\r
553                 if (send_cq != recv_cq)\r
554                         __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);\r
555         }\r
556 \r
557         mlx4_qp_remove(dev->dev, &qp->mqp);\r
558 \r
559         mlx4_ib_unlock_cqs(send_cq, recv_cq);\r
560 \r
561         mlx4_qp_free(dev->dev, &qp->mqp);\r
562 \r
563         if (!is_sqp(dev, qp))\r
564                 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);\r
565 \r
566         mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
567 \r
568         if (is_user) {\r
569                 if (!qp->ibqp.srq)\r
570                         mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.p_uctx),\r
571                                               &qp->db);\r
572                 ib_umem_release(qp->umem);\r
573         } else {\r
574                 kfree(qp->sq.wrid);\r
575                 kfree(qp->rq.wrid);\r
576                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
577                 if (!qp->ibqp.srq)\r
578                         mlx4_ib_db_free(dev, &qp->db);\r
579         }\r
580 }\r
581 \r
582 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,\r
583                                 struct ib_qp_init_attr *init_attr,\r
584                                 struct ib_udata *udata)\r
585 {\r
586         struct mlx4_ib_dev *dev = to_mdev(pd->device);\r
587         struct mlx4_ib_sqp *sqp;\r
588         struct mlx4_ib_qp *qp;\r
589         int err;\r
590 \r
591         /* TODO: suggest to remove :We only support LSO, and only for kernel UD QPs. */\r
592         /*if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO)\r
593                 return ERR_PTR(-EINVAL);\r
594         if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO &&\r
595                 (pd->uobject || init_attr->qp_type != IB_QPT_UD))\r
596                 return ERR_PTR(-EINVAL);*/\r
597 \r
598         if (mlx4_is_barred(pd->device->dma_device))\r
599                 return ERR_PTR(-EFAULT);\r
600 \r
601         switch (init_attr->qp_type) {\r
602         case IB_QPT_RC:\r
603         case IB_QPT_UC:\r
604         case IB_QPT_UD:\r
605         {\r
606                 qp = kzalloc(sizeof *qp, GFP_KERNEL);\r
607                 if (!qp)\r
608                         return ERR_PTR(-ENOMEM);\r
609 \r
610                 err = create_qp_common(dev, pd, init_attr, udata, 0, qp);\r
611                 if (err) {\r
612                         kfree(qp);\r
613                         return ERR_PTR(err);\r
614                 }\r
615 \r
616                 qp->ibqp.qp_num = qp->mqp.qpn;\r
617 \r
618                 break;\r
619         }\r
620         case IB_QPT_SMI:\r
621         case IB_QPT_GSI:\r
622         {\r
623                 /* Userspace is not allowed to create special QPs: */\r
624                 if (pd->p_uctx)\r
625                         return ERR_PTR(-EINVAL);\r
626 \r
627                 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);\r
628                 if (!sqp)\r
629                         return ERR_PTR(-ENOMEM);\r
630 \r
631                 qp = &sqp->qp;\r
632 \r
633                 err = create_qp_common(dev, pd, init_attr, udata,\r
634                                        dev->dev->caps.sqp_start +\r
635                                        (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +\r
636                                        init_attr->port_num - 1,\r
637                                        qp);\r
638                 if (err) {\r
639                         kfree(sqp);\r
640                         return ERR_PTR(err);\r
641                 }\r
642 \r
643                 qp->port        = init_attr->port_num;\r
644                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;\r
645 \r
646                 break;\r
647         }\r
648         default:\r
649                 /* Don't support raw QPs */\r
650                 return ERR_PTR(-EINVAL);\r
651         }\r
652 \r
653         return &qp->ibqp;\r
654 }\r
655 \r
656 int mlx4_ib_destroy_qp(struct ib_qp *qp)\r
657 {\r
658         struct mlx4_ib_dev *dev = to_mdev(qp->device);\r
659         struct mlx4_ib_qp *mqp = to_mqp(qp);\r
660 \r
661         if (!mlx4_is_barred(dev->dev) && is_qp0(dev, mqp))\r
662                 mlx4_CLOSE_PORT(dev->dev, mqp->port);\r
663 \r
664         destroy_qp_common(dev, mqp, !!qp->pd->p_uctx);\r
665 \r
666         if (is_sqp(dev, mqp))\r
667                 kfree(to_msqp(mqp));\r
668         else\r
669                 kfree(mqp);\r
670 \r
671         return 0;\r
672 }\r
673 \r
674 static int to_mlx4_st(enum ib_qp_type type)\r
675 {\r
676         switch (type) {\r
677         case IB_QPT_RC:         return MLX4_QP_ST_RC;\r
678         case IB_QPT_UC:         return MLX4_QP_ST_UC;\r
679         case IB_QPT_UD:         return MLX4_QP_ST_UD;\r
680         case IB_QPT_SMI:\r
681         case IB_QPT_GSI:        return MLX4_QP_ST_MLX;\r
682         default:                return -1;\r
683         }\r
684 }\r
685 \r
686 static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,\r
687                                    int attr_mask)\r
688 {\r
689         u8 dest_rd_atomic;\r
690         u32 access_flags;\r
691         u32 hw_access_flags = 0;\r
692 \r
693         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
694                 dest_rd_atomic = attr->max_dest_rd_atomic;\r
695         else\r
696                 dest_rd_atomic = qp->resp_depth;\r
697 \r
698         if (attr_mask & IB_QP_ACCESS_FLAGS)\r
699                 access_flags = attr->qp_access_flags;\r
700         else\r
701                 access_flags = qp->atomic_rd_en;\r
702 \r
703         if (!dest_rd_atomic)\r
704                 access_flags &= IB_ACCESS_REMOTE_WRITE;\r
705 \r
706         if (access_flags & IB_ACCESS_REMOTE_READ)\r
707                 hw_access_flags |= MLX4_QP_BIT_RRE;\r
708         if (access_flags & IB_ACCESS_REMOTE_ATOMIC)\r
709                 hw_access_flags |= MLX4_QP_BIT_RAE;\r
710         if (access_flags & IB_ACCESS_REMOTE_WRITE)\r
711                 hw_access_flags |= MLX4_QP_BIT_RWE;\r
712 \r
713         return cpu_to_be32(hw_access_flags);\r
714 }\r
715 \r
716 static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,\r
717                             int attr_mask)\r
718 {\r
719         if (attr_mask & IB_QP_PKEY_INDEX)\r
720                 sqp->pkey_index = attr->pkey_index;\r
721         if (attr_mask & IB_QP_QKEY)\r
722                 sqp->qkey = attr->qkey;\r
723         if (attr_mask & IB_QP_SQ_PSN)\r
724                 sqp->send_psn = attr->sq_psn;\r
725 }\r
726 \r
727 static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)\r
728 {\r
729         path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);\r
730 }\r
731 \r
732 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,\r
733                          struct mlx4_qp_path *path, u8 port)\r
734 {\r
735         path->grh_mylmc     = ah->src_path_bits & 0x7f;\r
736         path->rlid          = cpu_to_be16(ah->dlid);\r
737         if (ah->static_rate) {\r
738                 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;\r
739                 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&\r
740                        !(1 << path->static_rate & dev->dev->caps.stat_rate_support))\r
741                         --path->static_rate;\r
742         } else\r
743                 path->static_rate = 0;\r
744         path->counter_index = 0xff;\r
745 \r
746         if (ah->ah_flags & IB_AH_GRH) {\r
747                 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {\r
748                         printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",\r
749                                ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);\r
750                         return -1;\r
751                 }\r
752 \r
753                 path->grh_mylmc |= 1 << 7;\r
754                 path->mgid_index = ah->grh.sgid_index;\r
755                 path->hop_limit  = ah->grh.hop_limit;\r
756                 path->tclass_flowlabel =\r
757                         cpu_to_be32((ah->grh.traffic_class << 20) |\r
758                                     (ah->grh.flow_label));\r
759                 memcpy(path->rgid, ah->grh.dgid.raw, 16);\r
760         }\r
761 \r
762         path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |\r
763                 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);\r
764 \r
765         return 0;\r
766 }\r
767 \r
768 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,\r
769                                const struct ib_qp_attr *attr, int attr_mask,\r
770                                enum ib_qp_state cur_state, enum ib_qp_state new_state)\r
771 {\r
772         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
773         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
774         struct mlx4_qp_context *context;\r
775         enum mlx4_qp_optpar optpar = 0;\r
776         int sqd_event;\r
777         int err = -EINVAL;\r
778 \r
779         context = kzalloc(sizeof *context, GFP_KERNEL);\r
780         if (!context)\r
781                 return -ENOMEM;\r
782 \r
783         context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |\r
784                                      (to_mlx4_st(ibqp->qp_type) << 16));\r
785         context->flags     |= cpu_to_be32(1 << 8); /* DE? */\r
786 \r
787         if (!(attr_mask & IB_QP_PATH_MIG_STATE))\r
788                 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
789         else {\r
790                 optpar |= MLX4_QP_OPTPAR_PM_STATE;\r
791                 switch (attr->path_mig_state) {\r
792                 case IB_MIG_MIGRATED:\r
793                         context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
794                         break;\r
795                 case IB_MIG_REARM:\r
796                         context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);\r
797                         break;\r
798                 case IB_MIG_ARMED:\r
799                         context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);\r
800                         break;\r
801                 }\r
802         }\r
803 \r
804         if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI )\r
805                 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;\r
806         else if (ibqp->qp_type == IB_QPT_UD) {\r
807                 if (qp->flags & MLX4_IB_QP_LSO)\r
808                         context->mtu_msgmax = (u8)((IB_MTU_4096 << 5) |\r
809                                         ilog2(dev->dev->caps.max_gso_sz));\r
810                 else\r
811                         context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;\r
812         } else if (attr_mask & IB_QP_PATH_MTU) {\r
813                 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {\r
814                         printk(KERN_ERR "path MTU (%u) is invalid\n",\r
815                                 attr->path_mtu);\r
816                         goto out;\r
817                 }\r
818                 context->mtu_msgmax = (u8)((attr->path_mtu << 5) |\r
819                         ilog2(dev->dev->caps.max_msg_sz));\r
820         }\r
821 \r
822         if (qp->rq.wqe_cnt)\r
823                 context->rq_size_stride = (u8)(ilog2(qp->rq.wqe_cnt) << 3);\r
824         context->rq_size_stride |= qp->rq.wqe_shift - 4;\r
825 \r
826         if (qp->sq.wqe_cnt)\r
827                 context->sq_size_stride = (u8)(ilog2(qp->sq.wqe_cnt) << 3);\r
828         context->sq_size_stride |= qp->sq.wqe_shift - 4;\r
829 \r
830         if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
831                 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;\r
832 \r
833         if (qp->ibqp.p_uctx)\r
834                 context->usr_page = cpu_to_be32(to_mucontext(ibqp->p_uctx)->uar.index);\r
835         else\r
836                 context->usr_page = cpu_to_be32(dev->priv_uar.index);\r
837 \r
838         if (attr_mask & IB_QP_DEST_QPN)\r
839                 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);\r
840 \r
841         if (attr_mask & IB_QP_PORT) {\r
842                 if (cur_state == XIB_QPS_SQD && new_state == XIB_QPS_SQD &&\r
843                     !(attr_mask & IB_QP_AV)) {\r
844                         mlx4_set_sched(&context->pri_path, attr->port_num);\r
845                         optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;\r
846                 }\r
847         }\r
848 \r
849         if (attr_mask & IB_QP_PKEY_INDEX) {\r
850                 context->pri_path.pkey_index = (u8)attr->pkey_index;\r
851                 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;\r
852         }\r
853 \r
854         if (attr_mask & IB_QP_AV) {\r
855                 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,\r
856                                   attr_mask & IB_QP_PORT ? attr->port_num : qp->port))\r
857                         goto out;\r
858 \r
859                 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |\r
860                            MLX4_QP_OPTPAR_SCHED_QUEUE);\r
861         }\r
862 \r
863         if (attr_mask & IB_QP_TIMEOUT) {\r
864                 context->pri_path.ackto = attr->timeout << 3;\r
865                 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;\r
866         }\r
867 \r
868         if (attr_mask & IB_QP_ALT_PATH) {\r
869                 if (attr->alt_port_num == 0 ||\r
870                     attr->alt_port_num > dev->dev->caps.num_ports)\r
871                         goto out;\r
872 \r
873                 if (attr->alt_pkey_index >=\r
874                     dev->dev->caps.pkey_table_len[attr->alt_port_num])\r
875                         goto out;\r
876 \r
877                 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,\r
878                                   attr->alt_port_num))\r
879                         goto out;\r
880 \r
881                 context->alt_path.pkey_index = (u8)attr->alt_pkey_index;\r
882                 context->alt_path.ackto = attr->alt_timeout << 3;\r
883                 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;\r
884         }\r
885 \r
886         context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pdn);\r
887         context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);\r
888 \r
889         if (attr_mask & IB_QP_RNR_RETRY) {\r
890                 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);\r
891                 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;\r
892         }\r
893 \r
894         if (attr_mask & IB_QP_RETRY_CNT) {\r
895                 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);\r
896                 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;\r
897         }\r
898 \r
899         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {\r
900                 if (attr->max_rd_atomic)\r
901                         context->params1 |=\r
902                                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);\r
903                 optpar |= MLX4_QP_OPTPAR_SRA_MAX;\r
904         }\r
905 \r
906         if (attr_mask & IB_QP_SQ_PSN)\r
907                 context->next_send_psn = cpu_to_be32(attr->sq_psn);\r
908 \r
909         context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);\r
910 \r
911         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {\r
912                 if (attr->max_dest_rd_atomic)\r
913                         context->params2 |=\r
914                                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);\r
915                 optpar |= MLX4_QP_OPTPAR_RRA_MAX;\r
916         }\r
917 \r
918         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {\r
919                 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);\r
920                 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;\r
921         }\r
922 \r
923         if (ibqp->srq)\r
924                 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);\r
925 \r
926         if (attr_mask & IB_QP_MIN_RNR_TIMER) {\r
927                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);\r
928                 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;\r
929         }\r
930         if (attr_mask & IB_QP_RQ_PSN)\r
931                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);\r
932 \r
933         context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);\r
934 \r
935         if (attr_mask & IB_QP_QKEY) {\r
936                 context->qkey = cpu_to_be32(attr->qkey);\r
937                 optpar |= MLX4_QP_OPTPAR_Q_KEY;\r
938         }\r
939 \r
940         if (ibqp->srq)\r
941                 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);\r
942 \r
943         if (!ibqp->srq && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
944                 context->db_rec_addr = cpu_to_be64(qp->db.dma.da);\r
945 \r
946         if (cur_state == XIB_QPS_INIT &&\r
947             new_state == XIB_QPS_RTR  &&\r
948             (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||\r
949              ibqp->qp_type == IB_QPT_UD)) {\r
950                 context->pri_path.sched_queue = (qp->port - 1) << 6;\r
951                 if (is_qp0(dev, qp))\r
952                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;\r
953                 else\r
954                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;\r
955         }\r
956 \r
957         if (cur_state == XIB_QPS_RTS && new_state == XIB_QPS_SQD        &&\r
958             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)\r
959                 sqd_event = 1;\r
960         else\r
961                 sqd_event = 0;\r
962 \r
963         /*\r
964          * Before passing a kernel QP to the HW, make sure that the\r
965          * ownership bits of the send queue are set and the SQ\r
966          * headroom is stamped so that the hardware doesn't start\r
967          * processing stale work requests.\r
968          */\r
969         if (!ibqp->p_uctx && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) {\r
970                 struct mlx4_wqe_ctrl_seg *ctrl;\r
971                 int i;\r
972 \r
973                 for (i = 0; i < qp->sq.wqe_cnt; ++i) {\r
974                         ctrl = get_send_wqe(qp, i);\r
975                         ctrl->owner_opcode = cpu_to_be32(1 << 31);\r
976 \r
977                         stamp_send_wqe(qp, i);\r
978                 }\r
979         }\r
980 \r
981         err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),\r
982                              to_mlx4_state(new_state), context, optpar,\r
983                              sqd_event, &qp->mqp);\r
984         if (err)\r
985                 goto out;\r
986 \r
987         qp->state = new_state;\r
988 \r
989         if (attr_mask & IB_QP_ACCESS_FLAGS)\r
990                 qp->atomic_rd_en = (u8)attr->qp_access_flags;\r
991         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
992                 qp->resp_depth = attr->max_dest_rd_atomic;\r
993         if (attr_mask & IB_QP_PORT)\r
994                 qp->port = attr->port_num;\r
995         if (attr_mask & IB_QP_ALT_PATH)\r
996                 qp->alt_port = attr->alt_port_num;\r
997 \r
998         if (is_sqp(dev, qp))\r
999                 store_sqp_attrs(to_msqp(qp), attr, attr_mask);\r
1000 \r
1001         /*\r
1002          * If we moved QP0 to RTR, bring the IB link up; if we moved\r
1003          * QP0 to RESET or ERROR, bring the link back down.\r
1004          */\r
1005         if (is_qp0(dev, qp)) {\r
1006                 if (cur_state != XIB_QPS_RTR && new_state == XIB_QPS_RTR)\r
1007                         if (mlx4_INIT_PORT(dev->dev, qp->port))\r
1008                                 printk(KERN_WARNING "INIT_PORT failed for port %d\n",\r
1009                                        qp->port);\r
1010 \r
1011                 if (cur_state != XIB_QPS_RESET && cur_state != XIB_QPS_ERR &&\r
1012                     (new_state == XIB_QPS_RESET || new_state == XIB_QPS_ERR))\r
1013                         mlx4_CLOSE_PORT(dev->dev, qp->port);\r
1014         }\r
1015 \r
1016         /*\r
1017          * If we moved a kernel QP to RESET, clean up all old CQ\r
1018          * entries and reinitialize the QP.\r
1019          */\r
1020         if (new_state == XIB_QPS_RESET && !ibqp->p_uctx) {\r
1021                 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,\r
1022                                  ibqp->srq ? to_msrq(ibqp->srq): NULL);\r
1023                 if (ibqp->send_cq != ibqp->recv_cq)\r
1024                         mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);\r
1025 \r
1026                 qp->rq.head = 0;\r
1027                 qp->rq.tail = 0;\r
1028                 qp->sq.head = 0;\r
1029                 qp->sq.tail = 0;\r
1030                 if (!ibqp->srq)\r
1031                         *qp->db.db  = 0;\r
1032         }\r
1033 \r
1034 out:\r
1035         kfree(context);\r
1036         return err;\r
1037 }\r
1038 \r
1039 static struct ib_qp_attr mlx4_ib_qp_attr;\r
1040 static int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1];\r
1041 \r
1042 void mlx4_ib_qp_init()\r
1043 {\r
1044         memset( &mlx4_ib_qp_attr, 0, sizeof(mlx4_ib_qp_attr) );\r
1045         mlx4_ib_qp_attr.port_num = 1;\r
1046 \r
1047         memset( &mlx4_ib_qp_attr_mask_table, 0, sizeof(mlx4_ib_qp_attr_mask_table) );\r
1048         mlx4_ib_qp_attr_mask_table[IB_QPT_UD]  = (IB_QP_PKEY_INDEX              |\r
1049                                 IB_QP_PORT                      |\r
1050                                 IB_QP_QKEY);\r
1051         mlx4_ib_qp_attr_mask_table[IB_QPT_UC]  = (IB_QP_PKEY_INDEX              |\r
1052                                 IB_QP_PORT                      |\r
1053                                 IB_QP_ACCESS_FLAGS);\r
1054         mlx4_ib_qp_attr_mask_table[IB_QPT_RC]  = (IB_QP_PKEY_INDEX              |\r
1055                                 IB_QP_PORT                      |\r
1056                                 IB_QP_ACCESS_FLAGS);\r
1057         mlx4_ib_qp_attr_mask_table[IB_QPT_SMI] = (IB_QP_PKEY_INDEX              |\r
1058                                 IB_QP_QKEY);\r
1059         mlx4_ib_qp_attr_mask_table[IB_QPT_GSI] = (IB_QP_PKEY_INDEX              |\r
1060                                 IB_QP_QKEY);\r
1061 }\r
1062 \r
1063 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,\r
1064                       int attr_mask, struct ib_udata *udata)\r
1065 {\r
1066         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
1067         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1068         enum ib_qp_state cur_state, new_state;\r
1069         int err = -EINVAL;\r
1070 \r
1071         UNUSED_PARAM(udata);\r
1072         \r
1073         if (mlx4_is_barred(dev->dev))\r
1074                 return -EFAULT; \r
1075 \r
1076         mutex_lock(&qp->mutex);\r
1077 \r
1078         cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;\r
1079         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;\r
1080 \r
1081         if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))\r
1082                 goto out;\r
1083 \r
1084         if ((attr_mask & IB_QP_PORT) &&\r
1085             (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {\r
1086                 goto out;\r
1087         }\r
1088 \r
1089         if (attr_mask & IB_QP_PKEY_INDEX) {\r
1090                 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;\r
1091                 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])\r
1092                         goto out;\r
1093         }\r
1094 \r
1095         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&\r
1096             attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {\r
1097                 goto out;\r
1098         }\r
1099 \r
1100         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&\r
1101             attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {\r
1102                 goto out;\r
1103         }\r
1104 \r
1105         if (cur_state == new_state && cur_state == XIB_QPS_RESET) {\r
1106                 err = 0;\r
1107                 goto out;\r
1108         }\r
1109 \r
1110         if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_ERR) {\r
1111                 err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,\r
1112                                           mlx4_ib_qp_attr_mask_table[ibqp->qp_type],\r
1113                                           XIB_QPS_RESET, XIB_QPS_INIT);\r
1114                 if (err)\r
1115                         goto out;\r
1116                 cur_state = XIB_QPS_INIT;\r
1117         }\r
1118 \r
1119         err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);\r
1120 \r
1121 out:\r
1122         mutex_unlock(&qp->mutex);\r
1123         return err;\r
1124 }\r
1125 \r
1126 static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr)\r
1127 {\r
1128 \r
1129         enum ib_wr_opcode opcode = -1; //= wr->wr_type;\r
1130 \r
1131         switch (wr->wr_type) {\r
1132                 case WR_SEND: \r
1133                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_SEND_WITH_IMM : IB_WR_SEND;\r
1134                         break;\r
1135                 case WR_LSO:\r
1136                         opcode = IB_WR_LSO;\r
1137                         break;\r
1138                 case WR_RDMA_WRITE:     \r
1139                         opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_RDMA_WRITE_WITH_IMM : IB_WR_RDMA_WRITE;\r
1140                         break;\r
1141                 case WR_RDMA_READ:\r
1142                         opcode = IB_WR_RDMA_READ;\r
1143                         break;\r
1144                 case WR_COMPARE_SWAP:\r
1145                         opcode = IB_WR_ATOMIC_CMP_AND_SWP;\r
1146                         break;\r
1147                 case WR_FETCH_ADD:\r
1148                         opcode = IB_WR_ATOMIC_FETCH_AND_ADD;\r
1149                         break;\r
1150         }\r
1151         return opcode;\r
1152 }\r
1153 \r
1154 static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,\r
1155                             void *wqe)\r
1156 {\r
1157         enum ib_wr_opcode opcode = to_wr_opcode(wr);\r
1158         struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;\r
1159         struct mlx4_wqe_mlx_seg *mlx = wqe;\r
1160         struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);\r
1161         struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);\r
1162         __be16 pkey;\r
1163         int send_size;\r
1164         int header_size;\r
1165         int spc;\r
1166         u32 i;\r
1167 \r
1168         send_size = 0;\r
1169         for (i = 0; i < wr->num_ds; ++i)\r
1170                 send_size += wr->ds_array[i].length;\r
1171 \r
1172         ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);\r
1173 \r
1174         sqp->ud_header.lrh.service_level   =\r
1175                 (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);\r
1176         sqp->ud_header.lrh.destination_lid = ah->av.dlid;\r
1177         sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid & 0x7f);\r
1178         if (mlx4_ib_ah_grh_present(ah)) {\r
1179                 sqp->ud_header.grh.traffic_class =\r
1180                         (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);\r
1181                 sqp->ud_header.grh.flow_label    =\r
1182                         ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);\r
1183                 sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;\r
1184                 ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),\r
1185                                   ah->av.gid_index, &sqp->ud_header.grh.source_gid);\r
1186                 memcpy(sqp->ud_header.grh.destination_gid.raw,\r
1187                        ah->av.dgid, 16);\r
1188         }\r
1189 \r
1190         mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
1191         mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |\r
1192                                   (sqp->ud_header.lrh.destination_lid ==\r
1193                                    XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |\r
1194                                   (sqp->ud_header.lrh.service_level << 8));\r
1195         mlx->rlid   = sqp->ud_header.lrh.destination_lid;\r
1196 \r
1197         switch (opcode) {\r
1198         case IB_WR_SEND:\r
1199                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;\r
1200                 sqp->ud_header.immediate_present = 0;\r
1201                 break;\r
1202         case IB_WR_SEND_WITH_IMM:\r
1203                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;\r
1204                 sqp->ud_header.immediate_present = 1;\r
1205                 sqp->ud_header.immediate_data    = wr->immediate_data;\r
1206                 break;\r
1207         default:\r
1208                 return -EINVAL;\r
1209         }\r
1210 \r
1211         sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;\r
1212         if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)\r
1213                 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;\r
1214         sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED));\r
1215         if (!sqp->qp.ibqp.qp_num)\r
1216                 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);\r
1217         else\r
1218                 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey);\r
1219         sqp->ud_header.bth.pkey = pkey;\r
1220         sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;\r
1221         sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));\r
1222         sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?\r
1223                 cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;\r
1224         sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);\r
1225 \r
1226         header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);\r
1227 \r
1228 #if 0\r
1229         {\r
1230                 printk(KERN_ERR "built UD header of size %d:\n", header_size);\r
1231                 for (i = 0; i < header_size / 4; ++i) {\r
1232                         if (i % 8 == 0)\r
1233                                 printk("  [%02x] ", i * 4);\r
1234                         printk(" %08x",\r
1235                                be32_to_cpu(((__be32 *) sqp->header_buf)[i]));\r
1236                         if ((i + 1) % 8 == 0)\r
1237                                 printk("\n");\r
1238                 }\r
1239                 printk("\n");\r
1240         }\r
1241 #endif\r
1242 \r
1243         /*\r
1244          * Inline data segments may not cross a 64 byte boundary.  If\r
1245          * our UD header is bigger than the space available up to the\r
1246          * next 64 byte boundary in the WQE, use two inline data\r
1247          * segments to hold the UD header.\r
1248          */\r
1249         spc = MLX4_INLINE_ALIGN -\r
1250                 ((u32)(ULONG_PTR)(inl + 1) & (MLX4_INLINE_ALIGN - 1));\r
1251         if (header_size <= spc) {\r
1252                 inl->byte_count = cpu_to_be32(1 << 31 | header_size);\r
1253                 memcpy(inl + 1, sqp->header_buf, header_size);\r
1254                 i = 1;\r
1255         } else {\r
1256                 inl->byte_count = cpu_to_be32(1 << 31 | spc);\r
1257                 memcpy(inl + 1, sqp->header_buf, spc);\r
1258 \r
1259                 inl = (void*)((u8*)(inl + 1) + spc);\r
1260                 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);\r
1261                 /*\r
1262                  * Need a barrier here to make sure all the data is\r
1263                  * visible before the byte_count field is set.\r
1264                  * Otherwise the HCA prefetcher could grab the 64-byte\r
1265                  * chunk with this inline segment and get a valid (!=\r
1266                  * 0xffffffff) byte count but stale data, and end up\r
1267                  * generating a packet with bad headers.\r
1268                  *\r
1269                  * The first inline segment's byte_count field doesn't\r
1270                  * need a barrier, because it comes after a\r
1271                  * control/MLX segment and therefore is at an offset\r
1272                  * of 16 mod 64.\r
1273                  */\r
1274                 wmb();\r
1275                 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));\r
1276                 i = 2;\r
1277         }\r
1278 \r
1279         return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);\r
1280 }\r
1281 \r
1282 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)\r
1283 {\r
1284         unsigned cur;\r
1285         struct mlx4_ib_cq *cq;\r
1286 \r
1287         cur = wq->head - wq->tail;\r
1288         if (likely((int)cur + nreq < wq->max_post))\r
1289                 return 0;\r
1290 \r
1291         cq = to_mcq(ib_cq);\r
1292         spin_lock(&cq->lock);\r
1293         cur = wq->head - wq->tail;\r
1294         spin_unlock(&cq->lock);\r
1295 \r
1296         return (int)cur + nreq >= wq->max_post;\r
1297 }\r
1298 \r
1299 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,\r
1300                                           u64 remote_addr, __be32 rkey)\r
1301 {\r
1302         rseg->raddr    = cpu_to_be64(remote_addr);\r
1303         rseg->rkey     = rkey;\r
1304         rseg->reserved = 0;\r
1305 }\r
1306 \r
1307 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr)\r
1308 {\r
1309         if (wr->wr_type == WR_COMPARE_SWAP) {\r
1310                 aseg->swap_add = wr->remote_ops.atomic2;\r
1311                 aseg->compare  = wr->remote_ops.atomic1;\r
1312         } else {\r
1313                 aseg->swap_add = wr->remote_ops.atomic1;\r
1314                 aseg->compare  = 0;\r
1315         }\r
1316 \r
1317 }\r
1318 \r
1319 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,\r
1320                              ib_send_wr_t *wr)\r
1321 {\r
1322         memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av));\r
1323         dseg->dqpn = wr->dgrm.ud.remote_qp;\r
1324         dseg->qkey = wr->dgrm.ud.remote_qkey;\r
1325 }\r
1326 \r
1327 static void set_mlx_icrc_seg(void *dseg)\r
1328 {\r
1329         u32 *t = dseg;\r
1330         struct mlx4_wqe_inline_seg *iseg = dseg;\r
1331 \r
1332         t[1] = 0;\r
1333 \r
1334         /*\r
1335          * Need a barrier here before writing the byte_count field to\r
1336          * make sure that all the data is visible before the\r
1337          * byte_count field is set.  Otherwise, if the segment begins\r
1338          * a new cacheline, the HCA prefetcher could grab the 64-byte\r
1339          * chunk and get a valid (!= * 0xffffffff) byte count but\r
1340          * stale data, and end up sending the wrong data.\r
1341          */\r
1342         wmb();\r
1343 \r
1344         iseg->byte_count = cpu_to_be32((1 << 31) | 4);\r
1345 }\r
1346 \r
1347 static void set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
1348 {\r
1349         dseg->lkey       = cpu_to_be32(sg->lkey);\r
1350         dseg->addr       = cpu_to_be64(sg->vaddr);\r
1351 \r
1352         /*\r
1353          * Need a barrier here before writing the byte_count field to\r
1354          * make sure that all the data is visible before the\r
1355          * byte_count field is set.  Otherwise, if the segment begins\r
1356          * a new cacheline, the HCA prefetcher could grab the 64-byte\r
1357          * chunk and get a valid (!= * 0xffffffff) byte count but\r
1358          * stale data, and end up sending the wrong data.\r
1359          */\r
1360         wmb();\r
1361 \r
1362         dseg->byte_count = cpu_to_be32(sg->length);\r
1363 }\r
1364 \r
1365 static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
1366 {\r
1367         dseg->byte_count = cpu_to_be32(sg->length);\r
1368         dseg->lkey       = cpu_to_be32(sg->lkey);\r
1369         dseg->addr       = cpu_to_be64(sg->vaddr);\r
1370 }\r
1371 \r
1372 static int build_lso_seg(struct mlx4_lso_seg *wqe, ib_send_wr_t *wr,\r
1373                                                  struct mlx4_ib_qp *qp, unsigned *lso_seg_len)\r
1374  {\r
1375         unsigned halign = ALIGN(sizeof *wqe + wr->dgrm.ud.hlen, 16);\r
1376         void * ds;\r
1377         /*\r
1378         * This is a temporary limitation and will be removed in\r
1379          a forthcoming FW release:\r
1380         */\r
1381         if (unlikely(halign > 64))\r
1382                 return -EINVAL;\r
1383 \r
1384         if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&\r
1385                 wr->num_ds > qp->sq.max_gs - (halign >> 4)))\r
1386                 return -EINVAL;\r
1387         *lso_seg_len = halign;\r
1388          ds =  (u8 *) (void *) wqe + halign;\r
1389         \r
1390         //TODO: use memcpy from physical/virtual addr we can get directly from the ipoib at first data segmentmemcpy(wqe->header, , );\r
1391         memcpy(wqe->header, wr->dgrm.ud.header, wr->dgrm.ud.hlen);\r
1392         \r
1393         /* make sure LSO header is written before overwriting stamping */\r
1394         wmb();\r
1395 \r
1396         wqe->mss_hdr_size = cpu_to_be32((wr->dgrm.ud.mss - wr->dgrm.ud.hlen) << 16 |\r
1397                                                                         wr->dgrm.ud.hlen);\r
1398         \r
1399         return 0;\r
1400 }\r
1401 \r
1402 \r
1403 int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,\r
1404                       ib_send_wr_t **bad_wr)\r
1405 {\r
1406         enum ib_wr_opcode opcode;\r
1407         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1408         u8 *wqe /*, *wqe_start*/;\r
1409         struct mlx4_wqe_ctrl_seg *ctrl;\r
1410         struct mlx4_wqe_data_seg *dseg;\r
1411         unsigned long flags;\r
1412         int nreq;\r
1413         int err = 0;\r
1414         int ind;\r
1415         int size;\r
1416         unsigned seglen;\r
1417         int i;\r
1418         int j = 0;\r
1419 \r
1420         if (mlx4_is_barred(ibqp->device->dma_device))\r
1421                 return -EFAULT;\r
1422 \r
1423         spin_lock_irqsave(&qp->sq.lock, &flags);\r
1424 \r
1425         ind = qp->sq.head;\r
1426 \r
1427         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
1428                 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {\r
1429                         err = -ENOMEM;\r
1430                         if (bad_wr)\r
1431                                 *bad_wr = wr;\r
1432                         goto out;\r
1433                 }\r
1434 \r
1435                 if (unlikely(wr->num_ds > (u32)qp->sq.max_gs)) {\r
1436                         err = -EINVAL;\r
1437                         if (bad_wr)\r
1438                                 *bad_wr = wr;\r
1439                         goto out;\r
1440                 }\r
1441 \r
1442                 /*wqe_start = */\r
1443                 wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));\r
1444                 ctrl = (void*)wqe;\r
1445                 qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;\r
1446                 opcode = to_wr_opcode(wr);\r
1447 \r
1448                 ctrl->srcrb_flags =\r
1449                         (wr->send_opt & IB_SEND_OPT_SIGNALED ?\r
1450                          cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |\r
1451                         (wr->send_opt & IB_SEND_OPT_SOLICITED ?\r
1452                          cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |\r
1453                         (wr->send_opt & IB_SEND_OPT_TX_IP_CSUM ?\r
1454                          cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM) : 0) |\r
1455                         (wr->send_opt & IB_SEND_OPT_TX_TCP_UDP_CSUM ?\r
1456                          cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |\r
1457                         qp->sq_signal_bits;\r
1458 \r
1459                 if (opcode == IB_WR_SEND_WITH_IMM ||\r
1460                     opcode == IB_WR_RDMA_WRITE_WITH_IMM)\r
1461                         ctrl->imm = wr->immediate_data;\r
1462                 else\r
1463                         ctrl->imm = 0;\r
1464 \r
1465                 wqe += sizeof *ctrl;\r
1466                 size = sizeof *ctrl / 16;\r
1467 \r
1468                 switch (ibqp->qp_type) {\r
1469                 case IB_QPT_RC:\r
1470                 case IB_QPT_UC:\r
1471                         switch (opcode) {\r
1472                         case IB_WR_ATOMIC_CMP_AND_SWP:\r
1473                         case IB_WR_ATOMIC_FETCH_AND_ADD:\r
1474                                 set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
1475                                               wr->remote_ops.rkey);\r
1476                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
1477 \r
1478                                 set_atomic_seg((void*)wqe, wr);\r
1479                                 wqe  += sizeof (struct mlx4_wqe_atomic_seg);\r
1480 \r
1481                                 size += (sizeof (struct mlx4_wqe_raddr_seg) +\r
1482                                          sizeof (struct mlx4_wqe_atomic_seg)) / 16;\r
1483 \r
1484                                 break;\r
1485 \r
1486                         case IB_WR_RDMA_READ:\r
1487                         case IB_WR_RDMA_WRITE:\r
1488                         case IB_WR_RDMA_WRITE_WITH_IMM:\r
1489                                 set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
1490                                               wr->remote_ops.rkey);\r
1491                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
1492                                 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;\r
1493                                 break;\r
1494 \r
1495                         default:\r
1496                                 /* No extra segments required for sends */\r
1497                                 break;\r
1498                         }\r
1499                         break;\r
1500 \r
1501                 case IB_QPT_UD:\r
1502                         set_datagram_seg((void*)wqe, wr);\r
1503                         wqe  += sizeof (struct mlx4_wqe_datagram_seg);\r
1504                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;\r
1505                         if (wr->wr_type == WR_LSO) {\r
1506                                 err = build_lso_seg((struct mlx4_lso_seg *)(void *)wqe, wr, qp, &seglen);\r
1507                                 if (unlikely(err)) {\r
1508                                         *bad_wr = wr;\r
1509                                         goto out;\r
1510                                 }\r
1511 #define I64_CACHE_LINE          64\r
1512 #define OPCODE_INVALID_BIT      6\r
1513                                 // WQE bug treatment for LSO case\r
1514                                 // If LSO segment is large enough (exceeds one cache block in size)\r
1515                                 // or if it small enough such that S/G element will be placed within the same cache block,\r
1516                                 // OPCODE_INVALID_BIT should be on in order to reread this WQE \r
1517                                 // More correct solution is \r
1518                                 //      (unlikely (seglen % I64_CACHE_LINE || seglen % (I64_CACHE_LINE-2) )) \r
1519                                 // but it will not be used in order to reduce calculations within Datapath\r
1520                                 // If LSO segment consists of 15 DWORDS, S/G elements block will nevertheless start from \r
1521                                 // the next cache block\r
1522                                 if (unlikely (seglen < I64_CACHE_LINE-4  || seglen > I64_CACHE_LINE ))\r
1523                                         ctrl->owner_opcode |= cpu_to_be32 ( 1 << OPCODE_INVALID_BIT);\r
1524                                 wqe  += seglen;\r
1525                                 size += seglen / 16;\r
1526                                 j=1;\r
1527                         }\r
1528                         break;\r
1529 \r
1530                 case IB_QPT_SMI:\r
1531                 case IB_QPT_GSI:\r
1532                         err = build_mlx_header(to_msqp(qp), wr, ctrl);\r
1533                         if (err < 0) {\r
1534                                 if (bad_wr)\r
1535                                         *bad_wr = wr;\r
1536                                 goto out;\r
1537                         }\r
1538                         wqe  += err;\r
1539                         size += err / 16;\r
1540 \r
1541                         err = 0;\r
1542                         break;\r
1543 \r
1544                 default:\r
1545                         break;\r
1546                 }\r
1547 \r
1548                 /*\r
1549                  * Write data segments in reverse order, so as to\r
1550                  * overwrite cacheline stamp last within each\r
1551                  * cacheline.  This avoids issues with WQE\r
1552                  * prefetching.\r
1553                  */\r
1554 \r
1555                 dseg = (void*)wqe;\r
1556                 dseg += wr->num_ds - 1;\r
1557                 size += wr->num_ds * (sizeof (struct mlx4_wqe_data_seg) / 16);\r
1558 \r
1559                 /* Add one more inline data segment for ICRC for MLX sends */\r
1560                 if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||\r
1561                              qp->ibqp.qp_type == IB_QPT_GSI)) {\r
1562                         set_mlx_icrc_seg(dseg + 1);\r
1563                         size += sizeof (struct mlx4_wqe_data_seg) / 16;\r
1564                 }\r
1565 \r
1566                 for (i = wr->num_ds - 1; i >= 0; --i, --dseg)\r
1567                         set_data_seg(dseg, wr->ds_array + i);\r
1568 \r
1569                 ctrl->fence_size = (u8)((wr->send_opt & IB_SEND_OPT_FENCE ?\r
1570                                     MLX4_WQE_CTRL_FENCE : 0) | size);\r
1571 \r
1572                 /*\r
1573                  * Make sure descriptor is fully written before\r
1574                  * setting ownership bit (because HW can start\r
1575                  * executing as soon as we do).\r
1576                  */\r
1577                 wmb();\r
1578 \r
1579                 if (opcode < 0 || opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {\r
1580                         err = -EINVAL;\r
1581                         goto out;\r
1582                 }\r
1583 \r
1584                 ctrl->owner_opcode = mlx4_ib_opcode[opcode] |\r
1585                         (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);\r
1586 \r
1587                 /*\r
1588                  * We can improve latency by not stamping the last\r
1589                  * send queue WQE until after ringing the doorbell, so\r
1590                  * only stamp here if there are still more WQEs to post.\r
1591                  */\r
1592                 if (wr->p_next)\r
1593                         stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &\r
1594                                        (qp->sq.wqe_cnt - 1));\r
1595 \r
1596                 ++ind;\r
1597         }\r
1598 \r
1599 //printk("ctrl->srcrb_flags & MLX4_WQE_CTRL_TCP_UDP_CSUM =%d \n", ctrl->srcrb_flags & cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM ));\r
1600 \r
1601 out:\r
1602 //WQE printout\r
1603 #if 0   \r
1604         if (j) {\r
1605                 u32 *ds = (u32 *) wqe_start;\r
1606                 printk("WQE DUMP:\n");cq.c.their\r
1607                 for (j = 0; j < ctrl->fence_size*4; ++j) {\r
1608                         printk("%d %08x\n", j,be32_to_cpu(*ds));\r
1609                         ++ds;\r
1610                 }\r
1611         }\r
1612 #endif  \r
1613         if (likely(nreq)) {\r
1614                 qp->sq.head += nreq;\r
1615 \r
1616                 /*\r
1617                  * Make sure that descriptors are written before\r
1618                  * doorbell record.\r
1619                  */\r
1620                 wmb();\r
1621 \r
1622                 writel(qp->doorbell_qpn,\r
1623                        (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);\r
1624 \r
1625 #if 0\r
1626                 if (qp->mqp.qpn == 0x41)\r
1627                         cl_dbg_out( "[MLX4_BUS] mlx4_ib_post_send : qtype %d, qpn %#x, nreq %d, sq.head %#x, wqe_ix %d, db %p \n", \r
1628                                 ibqp->qp_type, qp->mqp.qpn, nreq, qp->sq.head, ind, \r
1629                                 (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL );\r
1630 #endif          \r
1631                 /*\r
1632                  * Make sure doorbells don't leak out of SQ spinlock\r
1633                  * and reach the HCA out of order.\r
1634                  */\r
1635                 mmiowb();\r
1636 \r
1637                 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &\r
1638                                (qp->sq.wqe_cnt - 1));\r
1639         }\r
1640 \r
1641         spin_unlock_irqrestore(&qp->sq.lock, flags);\r
1642 \r
1643         return err;\r
1644 }\r
1645 \r
1646 int mlx4_ib_post_recv(struct ib_qp *ibqp, ib_recv_wr_t *wr,\r
1647                       ib_recv_wr_t **bad_wr)\r
1648 {\r
1649         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1650         struct mlx4_wqe_data_seg *scat;\r
1651         unsigned long flags;\r
1652         int err = 0;\r
1653         int nreq;\r
1654         int ind;\r
1655         int i;\r
1656 \r
1657         if (mlx4_is_barred(ibqp->device->dma_device))\r
1658                 return -EFAULT;\r
1659 \r
1660         spin_lock_irqsave(&qp->rq.lock, &flags);\r
1661 \r
1662         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);\r
1663 \r
1664         for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
1665                 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {\r
1666                         err = -ENOMEM;\r
1667                         if (bad_wr)\r
1668                                 *bad_wr = wr;\r
1669                         goto out;\r
1670                 }\r
1671 \r
1672                 if (unlikely(wr->num_ds > (u32)qp->rq.max_gs)) {\r
1673                         err = -EINVAL;\r
1674                         if (bad_wr)\r
1675                                 *bad_wr = wr;\r
1676                         goto out;\r
1677                 }\r
1678 \r
1679                 scat = get_recv_wqe(qp, ind);\r
1680 \r
1681                 for (i = 0; i < (int)wr->num_ds; ++i)\r
1682                         __set_data_seg(scat + i, wr->ds_array + i);\r
1683 \r
1684                 if (i < qp->rq.max_gs) {\r
1685                         scat[i].byte_count = 0;\r
1686                         scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);\r
1687                         scat[i].addr       = 0;\r
1688                 }\r
1689 \r
1690                 qp->rq.wrid[ind] = wr->wr_id;\r
1691 \r
1692                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);\r
1693         }\r
1694 \r
1695 out:\r
1696         if (likely(nreq)) {\r
1697                 qp->rq.head += nreq;\r
1698 \r
1699                 /*\r
1700                  * Make sure that descriptors are written before\r
1701                  * doorbell record.\r
1702                  */\r
1703                 wmb();\r
1704 \r
1705                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);\r
1706 \r
1707 #if 0\r
1708                 if (qp->mqp.qpn == 0x41)\r
1709                         cl_dbg_out( "[MLX4_BUS] mlx4_ib_post_recv : qtype %d, qpn %#x, nreq %d, rq.head %#x, wqe_ix %d, db_obj %p, db %p \n", \r
1710                                 ibqp->qp_type, qp->mqp.qpn, nreq, qp->rq.head, ind, &qp->db, qp->db.db );\r
1711 #endif          \r
1712         }\r
1713 \r
1714         spin_unlock_irqrestore(&qp->rq.lock, flags);\r
1715 \r
1716         return err;\r
1717 }\r
1718 \r
1719 static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)\r
1720 {\r
1721         switch (mlx4_state) {\r
1722         case MLX4_QP_STATE_RST:      return XIB_QPS_RESET;\r
1723         case MLX4_QP_STATE_INIT:     return XIB_QPS_INIT;\r
1724         case MLX4_QP_STATE_RTR:      return XIB_QPS_RTR;\r
1725         case MLX4_QP_STATE_RTS:      return XIB_QPS_RTS;\r
1726         case MLX4_QP_STATE_SQ_DRAINING:\r
1727         case MLX4_QP_STATE_SQD:      return XIB_QPS_SQD;\r
1728         case MLX4_QP_STATE_SQER:     return XIB_QPS_SQE;\r
1729         case MLX4_QP_STATE_ERR:      return XIB_QPS_ERR;\r
1730         default:                     return -1;\r
1731         }\r
1732 }\r
1733 \r
1734 static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)\r
1735 {\r
1736         switch (mlx4_mig_state) {\r
1737         case MLX4_QP_PM_ARMED:          return IB_MIG_ARMED;\r
1738         case MLX4_QP_PM_REARM:          return IB_MIG_REARM;\r
1739         case MLX4_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;\r
1740         default: return -1;\r
1741         }\r
1742 }\r
1743 \r
1744 static int to_ib_qp_access_flags(int mlx4_flags)\r
1745 {\r
1746         int ib_flags = 0;\r
1747 \r
1748         if (mlx4_flags & MLX4_QP_BIT_RRE)\r
1749                 ib_flags |= IB_ACCESS_REMOTE_READ;\r
1750         if (mlx4_flags & MLX4_QP_BIT_RWE)\r
1751                 ib_flags |= IB_ACCESS_REMOTE_WRITE;\r
1752         if (mlx4_flags & MLX4_QP_BIT_RAE)\r
1753                 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;\r
1754 \r
1755         return ib_flags;\r
1756 }\r
1757 \r
1758 static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,\r
1759                                 struct mlx4_qp_path *path)\r
1760 {\r
1761         memset(ib_ah_attr, 0, sizeof *ib_ah_attr);\r
1762         ib_ah_attr->port_num      = path->sched_queue & 0x40 ? 2 : 1;\r
1763 \r
1764         if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)\r
1765                 return;\r
1766 \r
1767         ib_ah_attr->dlid          = be16_to_cpu(path->rlid);\r
1768         ib_ah_attr->sl            = (path->sched_queue >> 2) & 0xf;\r
1769         ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;\r
1770         ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;\r
1771         ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;\r
1772         if (ib_ah_attr->ah_flags) {\r
1773                 ib_ah_attr->grh.sgid_index = path->mgid_index;\r
1774                 ib_ah_attr->grh.hop_limit  = path->hop_limit;\r
1775                 ib_ah_attr->grh.traffic_class =\r
1776                         (u8)((be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff);\r
1777                 ib_ah_attr->grh.flow_label =\r
1778                         be32_to_cpu(path->tclass_flowlabel) & 0xfffff;\r
1779                 memcpy(ib_ah_attr->grh.dgid.raw,\r
1780                         path->rgid, sizeof ib_ah_attr->grh.dgid.raw);\r
1781         }\r
1782 }\r
1783 \r
1784 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,\r
1785                      struct ib_qp_init_attr *qp_init_attr)\r
1786 {\r
1787         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
1788         struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
1789         struct mlx4_qp_context context;\r
1790         int mlx4_state;\r
1791         int err;\r
1792 \r
1793         UNUSED_PARAM(qp_attr_mask);\r
1794 \r
1795         if (mlx4_is_barred(dev->dev))\r
1796                 return -EFAULT;\r
1797         \r
1798         if (qp->state == XIB_QPS_RESET) {\r
1799                 qp_attr->qp_state = XIB_QPS_RESET;\r
1800                 goto done;\r
1801         }\r
1802 \r
1803         err = mlx4_qp_query(dev->dev, &qp->mqp, &context);\r
1804         if (err)\r
1805                 return -EINVAL;\r
1806 \r
1807         mlx4_state = be32_to_cpu(context.flags) >> 28;\r
1808 \r
1809         qp_attr->qp_state            = to_ib_qp_state(mlx4_state);\r
1810         qp_attr->path_mtu            = context.mtu_msgmax >> 5;\r
1811         qp_attr->path_mig_state      =\r
1812                 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);\r
1813         qp_attr->qkey                = be32_to_cpu(context.qkey);\r
1814         qp_attr->rq_psn              = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;\r
1815         qp_attr->sq_psn              = be32_to_cpu(context.next_send_psn) & 0xffffff;\r
1816         qp_attr->dest_qp_num         = be32_to_cpu(context.remote_qpn) & 0xffffff;\r
1817         qp_attr->qp_access_flags     =\r
1818                 to_ib_qp_access_flags(be32_to_cpu(context.params2));\r
1819 \r
1820         if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {\r
1821                 to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);\r
1822                 to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);\r
1823                 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;\r
1824                 qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;\r
1825         }\r
1826 \r
1827         qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;\r
1828         if (qp_attr->qp_state == XIB_QPS_INIT)\r
1829                 qp_attr->port_num = qp->port;\r
1830         else\r
1831                 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;\r
1832 \r
1833         /* qp_attr->en_sqd_async_notify is only applicable in modify qp */\r
1834         qp_attr->sq_draining = (u8)(mlx4_state == MLX4_QP_STATE_SQ_DRAINING);\r
1835 \r
1836         qp_attr->max_rd_atomic = (u8)(1 << ((be32_to_cpu(context.params1) >> 21) & 0x7));\r
1837 \r
1838         qp_attr->max_dest_rd_atomic =\r
1839                 (u8)(1 << ((be32_to_cpu(context.params2) >> 21) & 0x7));\r
1840         qp_attr->min_rnr_timer      =\r
1841                 (u8)((be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f);\r
1842         qp_attr->timeout            = context.pri_path.ackto >> 3;\r
1843         qp_attr->retry_cnt          = (u8)((be32_to_cpu(context.params1) >> 16) & 0x7);\r
1844         qp_attr->rnr_retry          = (u8)((be32_to_cpu(context.params1) >> 13) & 0x7);\r
1845         qp_attr->alt_timeout        = context.alt_path.ackto >> 3;\r
1846 \r
1847 done:\r
1848         qp_attr->cur_qp_state        = qp_attr->qp_state;\r
1849         qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;\r
1850         qp_attr->cap.max_recv_sge    = qp->rq.max_gs;\r
1851 \r
1852         if (!ibqp->p_uctx) {\r
1853                 qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;\r
1854                 qp_attr->cap.max_send_sge = qp->sq.max_gs;\r
1855         } else {\r
1856                 qp_attr->cap.max_send_wr  = 0;\r
1857                 qp_attr->cap.max_send_sge = 0;\r
1858         }\r
1859 \r
1860         /*\r
1861          * We don't support inline sends for kernel QPs (yet), and we\r
1862          * don't know what userspace's value should be.\r
1863          */\r
1864         qp_attr->cap.max_inline_data = 0;\r
1865 \r
1866         qp_init_attr->cap            = qp_attr->cap;\r
1867 \r
1868         return 0;\r
1869 }\r
1870 \r