8b05a8a934021f311daa1834a5d39466ed4d4d5a
[mirror/winof/.git] / hw / mlx4 / kernel / bus / ib / cq.c
1 /*\r
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.\r
3  *\r
4  * This software is available to you under a choice of one of two\r
5  * licenses.  You may choose to be licensed under the terms of the GNU\r
6  * General Public License (GPL) Version 2, available from the file\r
7  * COPYING in the main directory of this source tree, or the\r
8  * OpenIB.org BSD license below:\r
9  *\r
10  *     Redistribution and use in source and binary forms, with or\r
11  *     without modification, are permitted provided that the following\r
12  *     conditions are met:\r
13  *\r
14  *      - Redistributions of source code must retain the above\r
15  *        copyright notice, this list of conditions and the following\r
16  *        disclaimer.\r
17  *\r
18  *      - Redistributions in binary form must reproduce the above\r
19  *        copyright notice, this list of conditions and the following\r
20  *        disclaimer in the documentation and/or other materials\r
21  *        provided with the distribution.\r
22  *\r
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
30  * SOFTWARE.\r
31  */\r
32 #include <mlx4_debug.h>\r
33 #include "mlx4_ib.h"\r
34 #include "cq.h"\r
35 #include "qp.h"\r
36 #include "user.h"\r
37 \r
38 #if defined(EVENT_TRACING)\r
39 #ifdef offsetof\r
40 #undef offsetof\r
41 #endif\r
42 #include "cq.tmh"\r
43 #endif\r
44 \r
45 \r
46 static void mlx4_ib_cq_comp(struct mlx4_cq *cq)\r
47 {\r
48         struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;\r
49         ibcq->comp_handler(ibcq->cq_context);\r
50 }\r
51 \r
52 static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)\r
53 {\r
54         ib_event_rec_t event;\r
55         struct ib_cq *ibcq;\r
56 \r
57         if (type != MLX4_EVENT_TYPE_CQ_ERROR) {\r
58                 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "\r
59                        "on CQ %06x\n", type, cq->cqn);\r
60                 return;\r
61         }\r
62 \r
63         ibcq = &to_mibcq(cq)->ibcq;\r
64         event.type = IB_EVENT_CQ_ERR;\r
65         event.context = ibcq->cq_context;\r
66         event.vendor_specific = type;\r
67         ibcq->event_handler(&event);\r
68 }\r
69 \r
70 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)\r
71 {\r
72         int offset = n * sizeof (struct mlx4_cqe);\r
73 \r
74         if (buf->buf.nbufs == 1)\r
75                 return buf->buf.u.direct.buf + offset;\r
76         else\r
77                 return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +\r
78                         (offset & (PAGE_SIZE - 1));\r
79 }\r
80 \r
81 static void *get_cqe(struct mlx4_ib_cq *cq, int n)\r
82 {\r
83         return get_cqe_from_buf(&cq->buf, n);\r
84 }\r
85 \r
86 static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)\r
87 {\r
88         struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);\r
89 \r
90         return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^\r
91                 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;\r
92 }\r
93 \r
94 static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)\r
95 {\r
96         return get_sw_cqe(cq, cq->mcq.cons_index);\r
97 }\r
98 \r
99 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)\r
100 {\r
101         struct mlx4_ib_cq *mcq = to_mcq(cq);\r
102         struct mlx4_ib_dev *dev = to_mdev(cq->device);\r
103         struct mlx4_cq_context *context;\r
104         int err;\r
105 \r
106         if (mlx4_is_barred(dev->dev))\r
107                 return -EFAULT;\r
108 \r
109         context = kzalloc(sizeof *context, GFP_KERNEL);\r
110         if (!context)\r
111                 return -ENOMEM;\r
112 \r
113         context->cq_period = cpu_to_be16(cq_period);\r
114         context->cq_max_count = cpu_to_be16(cq_count);\r
115         err = mlx4_cq_modify(dev->dev, &mcq->mcq, context, 1);\r
116 \r
117         kfree(context);\r
118         return err;\r
119 }\r
120 \r
121 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,\r
122                                 struct ib_ucontext *context,\r
123                                 struct ib_udata *udata)\r
124 {\r
125         struct mlx4_ib_dev *dev = to_mdev(ibdev);\r
126         struct mlx4_ib_cq *cq;\r
127         struct mlx4_uar *uar;\r
128         int buf_size;\r
129         int err;\r
130 \r
131         UNUSED_PARAM(vector);\r
132 \r
133         if (mlx4_is_barred(ibdev->dma_device))\r
134                 return ERR_PTR(-EFAULT);\r
135 \r
136         if (entries < 1 || entries > dev->dev->caps.max_cqes)\r
137                 return ERR_PTR(-EINVAL);\r
138 \r
139         cq = kzalloc(sizeof *cq, GFP_KERNEL);\r
140         if (!cq)\r
141                 return ERR_PTR(-ENOMEM);\r
142 \r
143         entries      = roundup_pow_of_two(entries + 1);\r
144         cq->ibcq.cqe = entries - 1;\r
145         buf_size     = entries * sizeof (struct mlx4_cqe);\r
146         spin_lock_init(&cq->lock);\r
147 \r
148         if (context && udata) {\r
149                 struct mlx4_ib_create_cq ucmd;\r
150 \r
151                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {\r
152                         err = -EFAULT;\r
153                         goto err_cq;\r
154                 }\r
155 \r
156                 cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,\r
157                                        IB_ACCESS_LOCAL_WRITE, FALSE);\r
158                 if (IS_ERR(cq->umem)) {\r
159                         err = PTR_ERR(cq->umem);\r
160                         goto err_cq;\r
161                 }\r
162 \r
163                 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),\r
164                                     ilog2(cq->umem->page_size), &cq->buf.mtt);\r
165                 if (err)\r
166                         goto err_buf;\r
167 \r
168                 err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);\r
169                 if (err)\r
170                         goto err_mtt;\r
171 \r
172                 err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,\r
173                                           &cq->db);\r
174                 if (err)\r
175                         goto err_mtt;\r
176 \r
177                 // add mapping to user's arm_sn variable\r
178                 // we have no way pass the completion event to provider library\r
179                 // so we'll increment user's arm_sn in kernel\r
180                 err = ib_umem_map( ucmd.arm_sn_addr, sizeof(int), \r
181                         IB_ACCESS_LOCAL_WRITE, &cq->mcq.mdl, &cq->mcq.p_u_arm_sn );\r
182                 if (err)\r
183                         goto err_dbmap;\r
184 \r
185                 uar = &to_mucontext(context)->uar;\r
186         } else {\r
187                 err = mlx4_ib_db_alloc(dev, &cq->db, 1);\r
188                 if (err)\r
189                         goto err_cq;\r
190 \r
191                 cq->mcq.set_ci_db  = cq->db.db;\r
192                 cq->mcq.arm_db     = cq->db.db + 1;\r
193                 *cq->mcq.set_ci_db = 0;\r
194                 *cq->mcq.arm_db    = 0;\r
195 \r
196                 if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {\r
197                         err = -ENOMEM;\r
198                         goto err_db;\r
199                 }\r
200 \r
201                 err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,\r
202                                     &cq->buf.mtt);\r
203                 if (err)\r
204                         goto err_buf;\r
205 \r
206                 err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);\r
207                 if (err)\r
208                         goto err_mtt;\r
209 \r
210                 cq->mcq.p_u_arm_sn = NULL;\r
211                 uar = &dev->priv_uar;\r
212         }\r
213 \r
214         err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,\r
215                 cq->db.dma.da, &cq->mcq, 0, 0);\r
216         if (err)\r
217                 goto err_dbmap;\r
218 \r
219         cq->mcq.comp  = mlx4_ib_cq_comp;\r
220         cq->mcq.event = mlx4_ib_cq_event;\r
221 \r
222         if (context) \r
223                 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {\r
224                         err = -EFAULT;\r
225                         goto err_dbmap;\r
226                 }\r
227 \r
228         return &cq->ibcq;\r
229 \r
230 err_dbmap:\r
231         ib_umem_unmap( cq->mcq.mdl, cq->mcq.p_u_arm_sn );\r
232         if (context)\r
233                 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);\r
234 \r
235 err_mtt:\r
236         mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);\r
237 \r
238 err_buf:\r
239         if (context)\r
240                 ib_umem_release(cq->umem);\r
241         else\r
242                 mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),\r
243                               &cq->buf.buf);\r
244 \r
245 err_db:\r
246         if (!context)\r
247                 mlx4_ib_db_free(dev, &cq->db);\r
248 \r
249 err_cq:\r
250         kfree(cq);\r
251 \r
252         return ERR_PTR(err);\r
253 }\r
254 \r
255 int mlx4_ib_destroy_cq(struct ib_cq *cq)\r
256 {\r
257         struct mlx4_ib_dev *dev = to_mdev(cq->device);\r
258         struct mlx4_ib_cq *mcq = to_mcq(cq);\r
259 \r
260         mlx4_cq_free(dev->dev, &mcq->mcq);\r
261         mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);\r
262 \r
263         if (cq->p_uctx) {\r
264                 ib_umem_unmap( mcq->mcq.mdl, mcq->mcq.p_u_arm_sn );\r
265                 mlx4_ib_db_unmap_user(to_mucontext(cq->p_uctx), &mcq->db);\r
266                 ib_umem_release(mcq->umem);\r
267         } else {\r
268                 mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),\r
269                               &mcq->buf.buf);\r
270                 mlx4_ib_db_free(dev, &mcq->db);\r
271         }\r
272 \r
273         kfree(mcq);\r
274 \r
275         return 0;\r
276 }\r
277 \r
278 static void dump_cqe(void *cqe)\r
279 {\r
280         __be32 *buf = cqe;\r
281 \r
282         MLX4_PRINT(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, \r
283                 (KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",\r
284                            be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),\r
285                            be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),\r
286                            be32_to_cpu(buf[6]), be32_to_cpu(buf[7])));\r
287 }\r
288 \r
289 static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,\r
290                                      ib_wc_t *wc)\r
291 {\r
292         if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {\r
293                 printk(KERN_DEBUG "local QP operation err "\r
294                        "(QPN %06x, WQE index %x, vendor syndrome %02x, "\r
295                        "opcode = %02x)\n",\r
296                        be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),\r
297                        cqe->vendor_err_syndrome,\r
298                        cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);\r
299                 dump_cqe(cqe);\r
300         }\r
301 \r
302         switch (cqe->syndrome) {\r
303         case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:\r
304                 wc->status = IB_WCS_LOCAL_LEN_ERR;\r
305                 break;\r
306         case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:\r
307                 wc->status = IB_WCS_LOCAL_OP_ERR;\r
308                 break;\r
309         case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:\r
310                 wc->status = IB_WCS_LOCAL_PROTECTION_ERR;\r
311                 break;\r
312         case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:\r
313                 wc->status = IB_WCS_WR_FLUSHED_ERR;\r
314                 break;\r
315         case MLX4_CQE_SYNDROME_MW_BIND_ERR:\r
316                 wc->status = IB_WCS_MEM_WINDOW_BIND_ERR;\r
317                 break;\r
318         case MLX4_CQE_SYNDROME_BAD_RESP_ERR:\r
319                 wc->status = IB_WCS_BAD_RESP_ERR;\r
320                 break;\r
321         case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:\r
322                 wc->status = IB_WCS_LOCAL_ACCESS_ERR;\r
323                 break;\r
324         case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:\r
325                 wc->status = IB_WCS_REM_INVALID_REQ_ERR;\r
326                 break;\r
327         case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:\r
328                 wc->status = IB_WCS_REM_ACCESS_ERR;\r
329                 break;\r
330         case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:\r
331                 wc->status = IB_WCS_REM_OP_ERR;\r
332                 break;\r
333         case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:\r
334                 wc->status = IB_WCS_TIMEOUT_RETRY_ERR;\r
335                 break;\r
336         case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:\r
337                 wc->status = IB_WCS_RNR_RETRY_ERR;\r
338                 break;\r
339         case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:\r
340                 wc->status = IB_WCS_REM_ABORT_ERR;\r
341                 break;\r
342         default:\r
343                 wc->status = IB_WC_GENERAL_ERR;\r
344                 break;\r
345         }\r
346 \r
347         wc->vendor_specific = cqe->vendor_err_syndrome;\r
348 }\r
349 \r
350 static uint32_t mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum) {\r
351         \r
352         #define CSUM_VALID_NUM 0xffff\r
353         uint32_t res = 0;\r
354 \r
355         // Verify that IP_OK bit is set and the packet is pure IPv4 packet\r
356         if ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4            |\r
357                                                         MLX4_CQE_IPOIB_STATUS_IPV4              |\r
358                                                         MLX4_CQE_IPOIB_STATUS_IPV4OPT   |\r
359                                                         MLX4_CQE_IPOIB_STATUS_IPV6              |\r
360                                                         MLX4_CQE_IPOIB_STATUS_IPOK))    ==\r
361                                 cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4          |\r
362                                                         MLX4_CQE_IPOIB_STATUS_IPOK))\r
363         {\r
364                 // IP checksum calculated by MLX4 matched the checksum in the receive packet's \r
365                 res |= MLX4_NdisPacketIpChecksumSucceeded;\r
366                 if (checksum == CSUM_VALID_NUM) {\r
367                                 // TCP or UDP checksum calculated by MLX4 matched the checksum in the receive packet's \r
368                                 res |= (MLX4_NdisPacketUdpChecksumSucceeded |\r
369                                                 MLX4_NdisPacketTcpChecksumSucceeded );\r
370                                 ASSERT( status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_TCP | MLX4_CQE_IPOIB_STATUS_UDP));\r
371                 }\r
372         }\r
373         return (( res << 8 ) & IB_RECV_OPT_CSUM_MASK );\r
374 }\r
375 \r
376 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,\r
377                             struct mlx4_ib_qp **cur_qp,\r
378                             ib_wc_t *wc)\r
379 {\r
380         struct mlx4_cqe *cqe;\r
381         struct mlx4_qp *mqp;\r
382         struct mlx4_ib_wq *wq;\r
383         struct mlx4_ib_srq *srq;\r
384         int is_send;\r
385         int is_error;\r
386         u16 wqe_ctr;\r
387 \r
388         cqe = next_cqe_sw(cq);\r
389         if (!cqe)\r
390                 return -EAGAIN;\r
391 \r
392         ++cq->mcq.cons_index;\r
393 \r
394         /*\r
395          * Make sure we read CQ entry contents after we've checked the\r
396          * ownership bit.\r
397          */\r
398         rmb();\r
399 \r
400         is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;\r
401         is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==\r
402                 MLX4_CQE_OPCODE_ERROR;\r
403 \r
404         if (!*cur_qp || (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (u32)(*cur_qp)->mqp.qpn) {\r
405                 /*\r
406                  * We do not have to take the QP table lock here,\r
407                  * because CQs will be locked while QPs are removed\r
408                  * from the table.\r
409                  */\r
410 #if 1\r
411                 // radix_tree_insert in current implementation seems like\r
412                 // can cause radix_tree_lookup to miss an existing QP\r
413                 // so we call qp_lookup under the spinlock\r
414                 mqp = mlx4_qp_lookup_locked( to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->my_qpn));\r
415 #else\r
416                 mqp = __mlx4_qp_lookup( to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->my_qpn));\r
417 #endif\r
418 \r
419                 if (unlikely(!mqp)) {\r
420                         printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",\r
421                                 cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);\r
422                         return -EINVAL;\r
423                 }\r
424 \r
425                 *cur_qp = to_mibqp(mqp);\r
426         }\r
427 \r
428         if (is_send) {\r
429                 wq = &(*cur_qp)->sq;\r
430                 wqe_ctr = be16_to_cpu(cqe->wqe_index);\r
431                 wq->tail += (u16) (wqe_ctr - (u16) wq->tail);\r
432                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];\r
433                 ++wq->tail;\r
434         } else if ((*cur_qp)->ibqp.srq) {\r
435                 srq = to_msrq((*cur_qp)->ibqp.srq);\r
436                 wqe_ctr = be16_to_cpu(cqe->wqe_index);\r
437                 wc->wr_id = srq->wrid[wqe_ctr];\r
438                 mlx4_ib_free_srq_wqe(srq, wqe_ctr);\r
439         } else {\r
440                 wq        = &(*cur_qp)->rq;\r
441                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];\r
442                 ++wq->tail;\r
443         }\r
444 \r
445         if (is_send) {\r
446                 wc->recv.ud.recv_opt = 0;\r
447                 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {\r
448                 case MLX4_OPCODE_RDMA_WRITE_IMM:\r
449                         wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;\r
450                 case MLX4_OPCODE_RDMA_WRITE:\r
451                         wc->wc_type    = IB_WC_RDMA_WRITE;\r
452                         break;\r
453                 case MLX4_OPCODE_SEND_IMM:\r
454                         wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;\r
455                 case MLX4_OPCODE_SEND:\r
456                         wc->wc_type    = IB_WC_SEND;\r
457                         break;\r
458                 case MLX4_OPCODE_RDMA_READ:\r
459                         wc->wc_type    = IB_WC_RDMA_READ;\r
460                         wc->length  = be32_to_cpu(cqe->byte_cnt);\r
461                         break;\r
462                 case MLX4_OPCODE_ATOMIC_CS:\r
463                         wc->wc_type    = IB_WC_COMPARE_SWAP;\r
464                         wc->length  = 8;\r
465                         break;\r
466                 case MLX4_OPCODE_ATOMIC_FA:\r
467                         wc->wc_type    = IB_WC_FETCH_ADD;\r
468                         wc->length  = 8;\r
469                         break;\r
470                 case MLX4_OPCODE_BIND_MW:\r
471                         wc->wc_type    = IB_WC_MW_BIND;\r
472                         break;\r
473                 case MLX4_OPCODE_LSO:\r
474                         wc->wc_type    = IB_WC_LSO;\r
475                         break;\r
476                 default:\r
477                         wc->wc_type       = IB_WC_SEND;\r
478                         break;\r
479                 }\r
480         } else {\r
481                 wc->length = be32_to_cpu(cqe->byte_cnt);\r
482 \r
483                 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {\r
484                 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:\r
485                         wc->wc_type   = IB_WC_RECV_RDMA_WRITE;\r
486                         wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;\r
487                         wc->recv.ud.immediate_data = cqe->immed_rss_invalid;\r
488                         break;\r
489                 case MLX4_RECV_OPCODE_SEND:\r
490                         wc->wc_type   = IB_WC_RECV;\r
491                         wc->recv.ud.recv_opt = 0;\r
492                         break;\r
493                 case MLX4_RECV_OPCODE_SEND_IMM:\r
494                         wc->wc_type   = IB_WC_RECV;\r
495                         wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;\r
496                         wc->recv.ud.immediate_data = cqe->immed_rss_invalid;\r
497                         break;\r
498                 default:\r
499                         wc->recv.ud.recv_opt = 0;\r
500                         wc->wc_type = IB_WC_RECV;\r
501                         break;\r
502                 }\r
503 \r
504                 wc->recv.ud.remote_lid  = cqe->rlid;\r
505                 wc->recv.ud.remote_sl           = cqe->sl >> 4;\r
506                 wc->recv.ud.remote_qp   = cqe->g_mlpath_rqpn & 0xffffff00;\r
507                 wc->recv.ud.path_bits           = (u8)(cqe->g_mlpath_rqpn & 0x7f);\r
508                 wc->recv.ud.recv_opt            |= cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0;\r
509                 wc->recv.ud.pkey_index  = (u16)(be32_to_cpu(cqe->immed_rss_invalid)  & 0x7f);\r
510                 wc->recv.ud.recv_opt |= mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);\r
511         }\r
512         if (!is_send && cqe->rlid == 0){\r
513                 MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found rlid == 0 \n "));\r
514                 wc->recv.ud.recv_opt         |= IB_RECV_OPT_FORWARD;\r
515         }\r
516 \r
517         if (unlikely(is_error))\r
518                 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);\r
519         else\r
520                 wc->status = IB_WCS_SUCCESS;\r
521 \r
522         return 0;\r
523 }\r
524 \r
525 int mlx4_ib_poll_cq(\r
526         IN              struct ib_cq *ibcq, \r
527         IN      OUT                     ib_wc_t** const                         pp_free_wclist,\r
528                 OUT                     ib_wc_t** const                         pp_done_wclist )\r
529 {\r
530         struct mlx4_ib_cq *cq = to_mcq(ibcq);\r
531         struct mlx4_ib_qp *cur_qp = NULL;\r
532         unsigned long flags;\r
533         int err = 0;\r
534         int npolled = 0;\r
535         ib_wc_t         *wc_p, **next_pp;\r
536 \r
537         spin_lock_irqsave(&cq->lock, &flags);\r
538 \r
539         // loop through CQ\r
540         next_pp = pp_done_wclist;\r
541         wc_p = *pp_free_wclist;\r
542         while( wc_p ) {\r
543                 // poll one CQE\r
544                 err = mlx4_ib_poll_one(cq, &cur_qp, wc_p);\r
545                 if (err)\r
546                         break;\r
547 \r
548                 // prepare for the next loop\r
549                 *next_pp = wc_p;\r
550                 next_pp = &wc_p->p_next;\r
551                 wc_p = wc_p->p_next;\r
552                 ++npolled;\r
553         }\r
554 \r
555         // prepare the results\r
556         *pp_free_wclist = wc_p;         /* Set the head of the free list. */\r
557         *next_pp = NULL;                                                /* Clear the tail of the done list. */\r
558 \r
559         // update consumer index\r
560         if (npolled)\r
561                 mlx4_cq_set_ci(&cq->mcq);\r
562 \r
563         spin_unlock_irqrestore(&cq->lock, flags);\r
564         return (err == 0 || err == -EAGAIN)? npolled : err;\r
565 }\r
566 \r
567 int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)\r
568 {\r
569         if (!mlx4_is_barred(ibcq->device->dma_device))\r
570                 mlx4_cq_arm(&to_mcq(ibcq)->mcq,\r
571                     (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?\r
572                     MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,\r
573                     to_mdev(ibcq->device)->uar_map,\r
574                     MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));\r
575 \r
576         return 0;\r
577 }\r
578 \r
579 void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)\r
580 {\r
581         u32 prod_index;\r
582         int nfreed = 0;\r
583         struct mlx4_cqe *cqe, *dest;\r
584         u8 owner_bit;\r
585 \r
586         /*\r
587          * First we need to find the current producer index, so we\r
588          * know where to start cleaning from.  It doesn't matter if HW\r
589          * adds new entries after this loop -- the QP we're worried\r
590          * about is already in RESET, so the new entries won't come\r
591          * from our QP and therefore don't need to be checked.\r
592          */\r
593         for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)\r
594                 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)\r
595                         break;\r
596 \r
597         /*\r
598          * Now sweep backwards through the CQ, removing CQ entries\r
599          * that match our QP by copying older entries on top of them.\r
600          */\r
601         while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {\r
602                 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);\r
603                 if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {\r
604                         if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))\r
605                                 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));\r
606                         ++nfreed;\r
607                 } else if (nfreed) {\r
608                         dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);\r
609                         owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;\r
610                         memcpy(dest, cqe, sizeof *cqe);\r
611                         dest->owner_sr_opcode = owner_bit |\r
612                                 (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);\r
613                 }\r
614         }\r
615 \r
616         if (nfreed) {\r
617                 cq->mcq.cons_index += nfreed;\r
618                 /*\r
619                  * Make sure update of buffer contents is done before\r
620                  * updating consumer index.\r
621                  */\r
622                 wmb();\r
623                 mlx4_cq_set_ci(&cq->mcq);\r
624         }\r
625 }\r
626 \r
627 void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)\r
628 {\r
629         spin_lock_irq(&cq->lock);\r
630         __mlx4_ib_cq_clean(cq, qpn, srq);\r
631         spin_unlock_irq(&cq->lock);\r
632 }\r