events: allow callbacks per CQ, QP, or SRQ
authorshefty <shefty@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 23 Jul 2008 20:09:45 +0000 (20:09 +0000)
committershefty <shefty@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 23 Jul 2008 20:09:45 +0000 (20:09 +0000)
The underlying hardware drivers both support setting callbacks on a per
widget (CQ, QP, or SRQ) basis, but the verbs interface only allows one
global event handler and one completion handler for the HCA.  Modify the
verbs interface to allow setting events callbacks directly on the HCA
objects.  This allows the HCA to support multiple consumers of its
interface, and users can optimize their event handling based on the call.
For example, the HCA drivers can invoke callbacks directly to the ULPs
without the calls first being filtered by IBAL.  This will improve
completion event reporting.

To minimize changes to the existing stack, the current event handler is
used for all events, regardless of type.  Optimizations are left for a
separate patch.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@1435 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

23 files changed:
core/al/al_verbs.h
core/al/kernel/al_ci_ca.c
hw/mlx4/kernel/bus/ib/cq.c
hw/mlx4/kernel/bus/ib/qp.c
hw/mlx4/kernel/bus/ib/srq.c
hw/mlx4/kernel/bus/inc/ib_verbs.h
hw/mlx4/kernel/hca/ca.c
hw/mlx4/kernel/hca/cq.c
hw/mlx4/kernel/hca/data.c
hw/mlx4/kernel/hca/data.h
hw/mlx4/kernel/hca/qp.c
hw/mlx4/kernel/hca/srq.c
hw/mlx4/kernel/hca/verbs.c
hw/mlx4/kernel/hca/verbs.h
hw/mthca/kernel/hca_data.c
hw/mthca/kernel/hca_data.h
hw/mthca/kernel/hca_verbs.c
hw/mthca/kernel/ib_verbs.h
hw/mthca/kernel/mt_verbs.c
hw/mthca/kernel/mthca_cq.c
hw/mthca/kernel/mthca_qp.c
hw/mthca/kernel/mthca_srq.c
inc/iba/ib_ci.h

index ef4908c..2b9ec41 100644 (file)
@@ -73,6 +73,9 @@
        h_ca->obj.p_ci_ca->verbs.modify_ca( h_ca->obj.p_ci_ca->h_ci_ca,\\r
                port_num, ca_mod, p_port_attr_mod )\r
 \r
+void ci_ca_comp_cb(void *cq_context);\r
+void ci_ca_async_event_cb(ib_event_rec_t* p_event_record);\r
+\r
 static inline ib_api_status_t\r
 verbs_create_cq(\r
        IN              const   ib_ca_handle_t                          h_ca,\r
@@ -82,7 +85,8 @@ verbs_create_cq(
 {\r
        return h_ca->obj.p_ci_ca->verbs.create_cq(\r
                (p_umv_buf) ? h_ca->h_um_ca : h_ca->obj.p_ci_ca->h_ci_ca,\r
-               h_cq, &p_cq_create->size, &h_cq->h_ci_cq, p_umv_buf );\r
+               h_cq, ci_ca_async_event_cb, ci_ca_comp_cb, &p_cq_create->size,\r
+               &h_cq->h_ci_cq, p_umv_buf );\r
 }\r
 \r
 #define verbs_check_cq(h_cq)   ((h_cq)->h_ci_cq)\r
@@ -242,7 +246,7 @@ verbs_create_srq(
        ib_api_status_t         status;\r
 \r
        status = h_srq->obj.p_ci_ca->verbs.create_srq(\r
-               h_pd->h_ci_pd, h_srq, p_srq_attr,\r
+               h_pd->h_ci_pd, h_srq, ci_ca_async_event_cb, p_srq_attr,\r
                &h_srq->h_ci_srq, p_umv_buf );\r
 \r
        h_srq->h_recv_srq = h_srq->h_ci_srq;\r
@@ -287,7 +291,7 @@ verbs_get_spl_qp(
        ib_api_status_t         status;\r
 \r
        status = h_qp->obj.p_ci_ca->verbs.create_spl_qp(\r
-               h_pd->h_ci_pd, port_num, h_qp, p_qp_create,\r
+               h_pd->h_ci_pd, port_num, h_qp, ci_ca_async_event_cb, p_qp_create,\r
                p_qp_attr, &h_qp->h_ci_qp );\r
 \r
        h_qp->h_recv_qp = h_qp->h_ci_qp;\r
@@ -310,7 +314,7 @@ verbs_create_qp(
        ib_api_status_t         status;\r
 \r
        status = h_qp->obj.p_ci_ca->verbs.create_qp(\r
-               h_pd->h_ci_pd, h_qp, p_qp_create, p_qp_attr,\r
+               h_pd->h_ci_pd, h_qp, ci_ca_async_event_cb, p_qp_create, p_qp_attr,\r
                &h_qp->h_ci_qp, p_umv_buf );\r
 \r
        h_qp->h_recv_qp = h_qp->h_ci_qp;\r
index b87ff9f..221d484 100644 (file)
@@ -77,7 +77,7 @@ ci_ca_async_proc_cb(
 \r
 void\r
 ci_ca_async_event_cb(\r
-       IN              const   ib_event_rec_t* const           p_event_record );\r
+       IN                              ib_event_rec_t*                         p_event_record );\r
 \r
 \r
 \r
@@ -155,7 +155,7 @@ create_ci_ca(
        p_ci_ca->dereg_async_item.pfn_callback = ci_ca_async_proc_cb;\r
 \r
        /* Open the CI CA. */\r
-       status = p_ci_ca->verbs.open_ca( p_ci_ca->verbs.guid, ci_ca_comp_cb,\r
+       status = p_ci_ca->verbs.open_ca( p_ci_ca->verbs.guid,\r
                ci_ca_async_event_cb, p_ci_ca, &p_ci_ca->h_ci_ca );\r
        if( status != IB_SUCCESS )\r
        {\r
@@ -336,7 +336,7 @@ ci_ca_comp_cb(
  */\r
 void\r
 ci_ca_async_event_cb(\r
-       IN              const   ib_event_rec_t* const           p_event_record )\r
+       IN              ib_event_rec_t*         p_event_record )\r
 {\r
        ib_async_event_rec_t    event_rec;\r
 \r
index b13d596..939702f 100644 (file)
-/*
- * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <mlx4_debug.h>
-#include "mlx4_ib.h"
-#include "cq.h"
-#include "qp.h"
-#include "user.h"
-
-static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
-{
-       struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
-       ibcq->comp_handler(ibcq, ibcq->cq_context);
-}
-
-static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
-{
-       struct ib_event event;
-       struct ib_cq *ibcq;
-
-       if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
-               printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
-                      "on CQ %06x\n", type, cq->cqn);
-               return;
-       }
-
-       ibcq = &to_mibcq(cq)->ibcq;
-       if (ibcq->event_handler) {
-               event.device     = ibcq->device;
-               event.event      = IB_EVENT_CQ_ERR;
-               event.element.cq = ibcq;
-               ibcq->event_handler(&event, ibcq->cq_context);
-       }
-}
-
-static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
-{
-       int offset = n * sizeof (struct mlx4_cqe);
-
-       if (buf->buf.nbufs == 1)
-               return buf->buf.u.direct.buf + offset;
-       else
-               return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
-                       (offset & (PAGE_SIZE - 1));
-}
-
-static void *get_cqe(struct mlx4_ib_cq *cq, int n)
-{
-       return get_cqe_from_buf(&cq->buf, n);
-}
-
-static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
-{
-       struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
-
-       return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
-               !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
-}
-
-static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
-{
-       return get_sw_cqe(cq, cq->mcq.cons_index);
-}
-
-int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
-{
-       struct mlx4_ib_cq *mcq = to_mcq(cq);
-       struct mlx4_ib_dev *dev = to_mdev(cq->device);
-       struct mlx4_cq_context *context;
-       int err;
-
-       context = kzalloc(sizeof *context, GFP_KERNEL);
-       if (!context)
-               return -ENOMEM;
-
-       context->cq_period = cpu_to_be16(cq_period);
-       context->cq_max_count = cpu_to_be16(cq_count);
-       err = mlx4_cq_modify(dev->dev, &mcq->mcq, context, 1);
-
-       kfree(context);
-       return err;
-}
-
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
-                               struct ib_ucontext *context,
-                               struct ib_udata *udata)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibdev);
-       struct mlx4_ib_cq *cq;
-       struct mlx4_uar *uar;
-       int buf_size;
-       int err;
-
-       UNUSED_PARAM(vector);
-
-       if (entries < 1 || entries > dev->dev->caps.max_cqes)
-               return ERR_PTR(-EINVAL);
-
-       cq = kzalloc(sizeof *cq, GFP_KERNEL);
-       if (!cq)
-               return ERR_PTR(-ENOMEM);
-
-       entries      = roundup_pow_of_two(entries + 1);
-       cq->ibcq.cqe = entries - 1;
-       buf_size     = entries * sizeof (struct mlx4_cqe);
-       spin_lock_init(&cq->lock);
-
-       if (context) {
-               struct mlx4_ib_create_cq ucmd;
-
-               if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
-                       err = -EFAULT;
-                       goto err_cq;
-               }
-
-               cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
-                                      IB_ACCESS_LOCAL_WRITE, FALSE);
-               if (IS_ERR(cq->umem)) {
-                       err = PTR_ERR(cq->umem);
-                       goto err_cq;
-               }
-
-               err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),
-                                   ilog2(cq->umem->page_size), &cq->buf.mtt);
-               if (err)
-                       goto err_buf;
-
-               err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
-               if (err)
-                       goto err_mtt;
-
-               err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
-                                         &cq->db);
-               if (err)
-                       goto err_mtt;
-
-               // add mapping to user's arm_sn variable
-               // we have no way pass the completion event to provider library
-               // so we'll increment user's arm_sn in kernel
-               err = ib_umem_map( ucmd.arm_sn_addr, sizeof(int), 
-                       IB_ACCESS_LOCAL_WRITE, &cq->mcq.mdl, &cq->mcq.p_u_arm_sn );
-               if (err)
-                       goto err_dbmap;
-
-               uar = &to_mucontext(context)->uar;
-       } else {
-               err = mlx4_ib_db_alloc(dev, &cq->db, 1);
-               if (err)
-                       goto err_cq;
-
-               cq->mcq.set_ci_db  = cq->db.db;
-               cq->mcq.arm_db     = cq->db.db + 1;
-               *cq->mcq.set_ci_db = 0;
-               *cq->mcq.arm_db    = 0;
-
-               if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {
-                       err = -ENOMEM;
-                       goto err_db;
-               }
-
-               err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,
-                                   &cq->buf.mtt);
-               if (err)
-                       goto err_buf;
-
-               err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);
-               if (err)
-                       goto err_mtt;
-
-               cq->mcq.p_u_arm_sn = NULL;
-               uar = &dev->priv_uar;
-       }
-
-       err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
-               cq->db.dma.da, &cq->mcq, 0, 0);
-       if (err)
-               goto err_dbmap;
-
-       cq->mcq.comp  = mlx4_ib_cq_comp;
-       cq->mcq.event = mlx4_ib_cq_event;
-
-       if (context) 
-               if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
-                       err = -EFAULT;
-                       goto err_dbmap;
-               }
-
-       return &cq->ibcq;
-
-err_dbmap:
-       ib_umem_unmap( cq->mcq.mdl, cq->mcq.p_u_arm_sn );
-       if (context)
-               mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
-
-err_mtt:
-       mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
-
-err_buf:
-       if (context)
-               ib_umem_release(cq->umem);
-       else
-               mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),
-                             &cq->buf.buf);
-
-err_db:
-       if (!context)
-               mlx4_ib_db_free(dev, &cq->db);
-
-err_cq:
-       kfree(cq);
-
-       return ERR_PTR(err);
-}
-
-int mlx4_ib_destroy_cq(struct ib_cq *cq)
-{
-       struct mlx4_ib_dev *dev = to_mdev(cq->device);
-       struct mlx4_ib_cq *mcq = to_mcq(cq);
-
-       mlx4_cq_free(dev->dev, &mcq->mcq);
-       mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
-
-       if (cq->p_uctx) {
-               ib_umem_unmap( mcq->mcq.mdl, mcq->mcq.p_u_arm_sn );
-               mlx4_ib_db_unmap_user(to_mucontext(cq->p_uctx), &mcq->db);
-               ib_umem_release(mcq->umem);
-       } else {
-               mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),
-                             &mcq->buf.buf);
-               mlx4_ib_db_free(dev, &mcq->db);
-       }
-
-       kfree(mcq);
-
-       return 0;
-}
-
-static void dump_cqe(void *cqe)
-{
-       __be32 *buf = cqe;
-
-       printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
-              be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
-              be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
-              be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
-}
-
-static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
-                                    ib_wc_t *wc)
-{
-       if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
-               printk(KERN_DEBUG "local QP operation err "
-                      "(QPN %06x, WQE index %x, vendor syndrome %02x, "
-                      "opcode = %02x)\n",
-                      be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
-                      cqe->vendor_err_syndrome,
-                      cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
-               dump_cqe(cqe);
-       }
-
-       switch (cqe->syndrome) {
-       case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
-               wc->status = IB_WCS_LOCAL_LEN_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
-               wc->status = IB_WCS_LOCAL_OP_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
-               wc->status = IB_WCS_LOCAL_PROTECTION_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
-               wc->status = IB_WCS_WR_FLUSHED_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_MW_BIND_ERR:
-               wc->status = IB_WCS_MEM_WINDOW_BIND_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
-               wc->status = IB_WCS_BAD_RESP_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
-               wc->status = IB_WCS_LOCAL_ACCESS_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
-               wc->status = IB_WCS_REM_INVALID_REQ_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
-               wc->status = IB_WCS_REM_ACCESS_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
-               wc->status = IB_WCS_REM_OP_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
-               wc->status = IB_WCS_TIMEOUT_RETRY_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
-               wc->status = IB_WCS_RNR_RETRY_ERR;
-               break;
-       case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
-               wc->status = IB_WCS_REM_ABORT_ERR;
-               break;
-       default:
-               wc->status = IB_WC_GENERAL_ERR;
-               break;
-       }
-
-       wc->vendor_specific = cqe->vendor_err_syndrome;
-}
-
-static uint8_t mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum) {
-       
-       #define CSUM_VALID_NUM 0xffff
-       uint8_t res = 0;
-
-       // Verify that IP_OK bit is set and the packet is pure IPv4 packet
-       if ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4            |
-                                                       MLX4_CQE_IPOIB_STATUS_IPV4              |
-                                                       MLX4_CQE_IPOIB_STATUS_IPV4OPT   |
-                                                       MLX4_CQE_IPOIB_STATUS_IPV6              |
-                                                       MLX4_CQE_IPOIB_STATUS_IPOK))    ==
-                               cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4          |
-                                                       MLX4_CQE_IPOIB_STATUS_IPOK))
-       {
-               // IP checksum calculated by MLX4 matched the checksum in the receive packet's 
-               res |= MLX4_NdisPacketIpChecksumSucceeded;
-               if (checksum == CSUM_VALID_NUM) {
-                               // TCP or UDP checksum calculated by MLX4 matched the checksum in the receive packet's 
-                               res |= (MLX4_NdisPacketUdpChecksumSucceeded |
-                                               MLX4_NdisPacketTcpChecksumSucceeded );
-                               ASSERT( status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_TCP | MLX4_CQE_IPOIB_STATUS_UDP));
-               }
-       }
-       return res;
-}
-
-static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
-                           struct mlx4_ib_qp **cur_qp,
-                           ib_wc_t *wc)
-{
-       struct mlx4_cqe *cqe;
-       struct mlx4_qp *mqp;
-       struct mlx4_ib_wq *wq;
-       struct mlx4_ib_srq *srq;
-       int is_send;
-       int is_error;
-       u16 wqe_ctr;
-
-       cqe = next_cqe_sw(cq);
-       if (!cqe)
-               return -EAGAIN;
-
-       ++cq->mcq.cons_index;
-
-       /*
-        * Make sure we read CQ entry contents after we've checked the
-        * ownership bit.
-        */
-       rmb();
-
-       is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
-       is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
-               MLX4_CQE_OPCODE_ERROR;
-
-       if (!*cur_qp || (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (u32)(*cur_qp)->mqp.qpn) {
-               /*
-                * We do not have to take the QP table lock here,
-                * because CQs will be locked while QPs are removed
-                * from the table.
-                */
-#if 1
-               // radix_tree_insert in current implementation seems like
-               // can cause radix_tree_lookup to miss an existing QP
-               // so we call qp_lookup under the spinlock
-               mqp = mlx4_qp_lookup_locked( to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->my_qpn));
-#else
-               mqp = __mlx4_qp_lookup( to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->my_qpn));
-#endif
-
-               if (unlikely(!mqp)) {
-                       printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
-                               cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
-                       return -EINVAL;
-               }
-
-               *cur_qp = to_mibqp(mqp);
-       }
-
-       if (is_send) {
-               wq = &(*cur_qp)->sq;
-               wqe_ctr = be16_to_cpu(cqe->wqe_index);
-               wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
-               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
-               ++wq->tail;
-       } else if ((*cur_qp)->ibqp.srq) {
-               srq = to_msrq((*cur_qp)->ibqp.srq);
-               wqe_ctr = be16_to_cpu(cqe->wqe_index);
-               wc->wr_id = srq->wrid[wqe_ctr];
-               mlx4_ib_free_srq_wqe(srq, wqe_ctr);
-       } else {
-               wq        = &(*cur_qp)->rq;
-               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
-               ++wq->tail;
-       }
-
-       if (is_send) {
-               wc->recv.ud.recv_opt = 0;
-               switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
-               case MLX4_OPCODE_RDMA_WRITE_IMM:
-                       wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;
-               case MLX4_OPCODE_RDMA_WRITE:
-                       wc->wc_type    = IB_WC_RDMA_WRITE;
-                       break;
-               case MLX4_OPCODE_SEND_IMM:
-                       wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;
-               case MLX4_OPCODE_SEND:
-                       wc->wc_type    = IB_WC_SEND;
-                       break;
-               case MLX4_OPCODE_RDMA_READ:
-                       wc->wc_type    = IB_WC_RDMA_READ;
-                       wc->length  = be32_to_cpu(cqe->byte_cnt);
-                       break;
-               case MLX4_OPCODE_ATOMIC_CS:
-                       wc->wc_type    = IB_WC_COMPARE_SWAP;
-                       wc->length  = 8;
-                       break;
-               case MLX4_OPCODE_ATOMIC_FA:
-                       wc->wc_type    = IB_WC_FETCH_ADD;
-                       wc->length  = 8;
-                       break;
-               case MLX4_OPCODE_BIND_MW:
-                       wc->wc_type    = IB_WC_MW_BIND;
-                       break;
-               default:
-                       wc->wc_type       = IB_WC_SEND;
-                       break;
-               }
-       } else {
-               wc->length = be32_to_cpu(cqe->byte_cnt);
-
-               switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
-               case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
-                       wc->wc_type   = IB_WC_RECV_RDMA_WRITE;
-                       wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;
-                       wc->recv.ud.immediate_data = cqe->immed_rss_invalid;
-                       break;
-               case MLX4_RECV_OPCODE_SEND:
-                       wc->wc_type   = IB_WC_RECV;
-                       wc->recv.ud.recv_opt = 0;
-                       break;
-               case MLX4_RECV_OPCODE_SEND_IMM:
-                       wc->wc_type   = IB_WC_RECV;
-                       wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;
-                       wc->recv.ud.immediate_data = cqe->immed_rss_invalid;
-                       break;
-               default:
-                       wc->recv.ud.recv_opt = 0;
-                       wc->wc_type = IB_WC_RECV;
-                       break;
-               }
-
-               wc->recv.ud.remote_lid  = cqe->rlid;
-               wc->recv.ud.remote_sl           = cqe->sl >> 4;
-               wc->recv.ud.remote_qp   = cqe->g_mlpath_rqpn & 0xffffff00;
-               wc->recv.ud.path_bits           = (u8)(cqe->g_mlpath_rqpn & 0x7f);
-               wc->recv.ud.recv_opt            |= cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0;
-               wc->recv.ud.pkey_index  = (u16)(be32_to_cpu(cqe->immed_rss_invalid)  & 0x7f);
-               wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);
-       }
-       if (!is_send && cqe->rlid == 0){
-               MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found rlid == 0 \n "));
-               wc->recv.ud.recv_opt         |= IB_RECV_OPT_FORWARD;
-       }
-
-       if (unlikely(is_error))
-               mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
-       else
-               wc->status = IB_WCS_SUCCESS;
-
-       return 0;
-}
-
-int mlx4_ib_poll_cq(
-       IN              struct ib_cq *ibcq, 
-       IN      OUT                     ib_wc_t** const                         pp_free_wclist,
-               OUT                     ib_wc_t** const                         pp_done_wclist )
-{
-       struct mlx4_ib_cq *cq = to_mcq(ibcq);
-       struct mlx4_ib_qp *cur_qp = NULL;
-       unsigned long flags;
-       int err = 0;
-       int npolled = 0;
-       ib_wc_t         *wc_p, **next_pp;
-
-       spin_lock_irqsave(&cq->lock, &flags);
-
-       // loop through CQ
-       next_pp = pp_done_wclist;
-       wc_p = *pp_free_wclist;
-       while( wc_p ) {
-               // poll one CQE
-               err = mlx4_ib_poll_one(cq, &cur_qp, wc_p);
-               if (err)
-                       break;
-
-               // prepare for the next loop
-               *next_pp = wc_p;
-               next_pp = &wc_p->p_next;
-               wc_p = wc_p->p_next;
-               ++npolled;
-       }
-
-       // prepare the results
-       *pp_free_wclist = wc_p;         /* Set the head of the free list. */
-       *next_pp = NULL;                                                /* Clear the tail of the done list. */
-
-       // update consumer index
-       if (npolled)
-               mlx4_cq_set_ci(&cq->mcq);
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-       return (err == 0 || err == -EAGAIN)? npolled : err;
-}
-
-int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
-{
-       mlx4_cq_arm(&to_mcq(ibcq)->mcq,
-                   (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
-                   MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
-                   to_mdev(ibcq->device)->uar_map,
-                   MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
-
-       return 0;
-}
-
-void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
-{
-       u32 prod_index;
-       int nfreed = 0;
-       struct mlx4_cqe *cqe, *dest;
-       u8 owner_bit;
-
-       /*
-        * First we need to find the current producer index, so we
-        * know where to start cleaning from.  It doesn't matter if HW
-        * adds new entries after this loop -- the QP we're worried
-        * about is already in RESET, so the new entries won't come
-        * from our QP and therefore don't need to be checked.
-        */
-       for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
-               if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
-                       break;
-
-       /*
-        * Now sweep backwards through the CQ, removing CQ entries
-        * that match our QP by copying older entries on top of them.
-        */
-       while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
-               cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
-               if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
-                       if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
-                               mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
-                       ++nfreed;
-               } else if (nfreed) {
-                       dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
-                       owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
-                       memcpy(dest, cqe, sizeof *cqe);
-                       dest->owner_sr_opcode = owner_bit |
-                               (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
-               }
-       }
-
-       if (nfreed) {
-               cq->mcq.cons_index += nfreed;
-               /*
-                * Make sure update of buffer contents is done before
-                * updating consumer index.
-                */
-               wmb();
-               mlx4_cq_set_ci(&cq->mcq);
-       }
-}
-
-void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
-{
-       spin_lock_irq(&cq->lock);
-       __mlx4_ib_cq_clean(cq, qpn, srq);
-       spin_unlock_irq(&cq->lock);
-}
+/*\r
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.\r
+ *\r
+ * This software is available to you under a choice of one of two\r
+ * licenses.  You may choose to be licensed under the terms of the GNU\r
+ * General Public License (GPL) Version 2, available from the file\r
+ * COPYING in the main directory of this source tree, or the\r
+ * OpenIB.org BSD license below:\r
+ *\r
+ *     Redistribution and use in source and binary forms, with or\r
+ *     without modification, are permitted provided that the following\r
+ *     conditions are met:\r
+ *\r
+ *      - Redistributions of source code must retain the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer.\r
+ *\r
+ *      - Redistributions in binary form must reproduce the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer in the documentation and/or other materials\r
+ *        provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ */\r
+#include <mlx4_debug.h>\r
+#include "mlx4_ib.h"\r
+#include "cq.h"\r
+#include "qp.h"\r
+#include "user.h"\r
+\r
+static void mlx4_ib_cq_comp(struct mlx4_cq *cq)\r
+{\r
+       struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;\r
+       ibcq->comp_handler(ibcq->cq_context);\r
+}\r
+\r
+static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)\r
+{\r
+       ib_event_rec_t event;\r
+       struct ib_cq *ibcq;\r
+\r
+       if (type != MLX4_EVENT_TYPE_CQ_ERROR) {\r
+               printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "\r
+                      "on CQ %06x\n", type, cq->cqn);\r
+               return;\r
+       }\r
+\r
+       ibcq = &to_mibcq(cq)->ibcq;\r
+       event.type = IB_EVENT_CQ_ERR;\r
+       event.context = ibcq->cq_context;\r
+       event.vendor_specific = type;\r
+       ibcq->event_handler(&event);\r
+}\r
+\r
+static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)\r
+{\r
+       int offset = n * sizeof (struct mlx4_cqe);\r
+\r
+       if (buf->buf.nbufs == 1)\r
+               return buf->buf.u.direct.buf + offset;\r
+       else\r
+               return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +\r
+                       (offset & (PAGE_SIZE - 1));\r
+}\r
+\r
+static void *get_cqe(struct mlx4_ib_cq *cq, int n)\r
+{\r
+       return get_cqe_from_buf(&cq->buf, n);\r
+}\r
+\r
+static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)\r
+{\r
+       struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);\r
+\r
+       return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^\r
+               !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;\r
+}\r
+\r
+static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)\r
+{\r
+       return get_sw_cqe(cq, cq->mcq.cons_index);\r
+}\r
+\r
+int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)\r
+{\r
+       struct mlx4_ib_cq *mcq = to_mcq(cq);\r
+       struct mlx4_ib_dev *dev = to_mdev(cq->device);\r
+       struct mlx4_cq_context *context;\r
+       int err;\r
+\r
+       context = kzalloc(sizeof *context, GFP_KERNEL);\r
+       if (!context)\r
+               return -ENOMEM;\r
+\r
+       context->cq_period = cpu_to_be16(cq_period);\r
+       context->cq_max_count = cpu_to_be16(cq_count);\r
+       err = mlx4_cq_modify(dev->dev, &mcq->mcq, context, 1);\r
+\r
+       kfree(context);\r
+       return err;\r
+}\r
+\r
+struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,\r
+                               struct ib_ucontext *context,\r
+                               struct ib_udata *udata)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(ibdev);\r
+       struct mlx4_ib_cq *cq;\r
+       struct mlx4_uar *uar;\r
+       int buf_size;\r
+       int err;\r
+\r
+       UNUSED_PARAM(vector);\r
+\r
+       if (entries < 1 || entries > dev->dev->caps.max_cqes)\r
+               return ERR_PTR(-EINVAL);\r
+\r
+       cq = kzalloc(sizeof *cq, GFP_KERNEL);\r
+       if (!cq)\r
+               return ERR_PTR(-ENOMEM);\r
+\r
+       entries      = roundup_pow_of_two(entries + 1);\r
+       cq->ibcq.cqe = entries - 1;\r
+       buf_size     = entries * sizeof (struct mlx4_cqe);\r
+       spin_lock_init(&cq->lock);\r
+\r
+       if (context) {\r
+               struct mlx4_ib_create_cq ucmd;\r
+\r
+               if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {\r
+                       err = -EFAULT;\r
+                       goto err_cq;\r
+               }\r
+\r
+               cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,\r
+                                      IB_ACCESS_LOCAL_WRITE, FALSE);\r
+               if (IS_ERR(cq->umem)) {\r
+                       err = PTR_ERR(cq->umem);\r
+                       goto err_cq;\r
+               }\r
+\r
+               err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),\r
+                                   ilog2(cq->umem->page_size), &cq->buf.mtt);\r
+               if (err)\r
+                       goto err_buf;\r
+\r
+               err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);\r
+               if (err)\r
+                       goto err_mtt;\r
+\r
+               err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,\r
+                                         &cq->db);\r
+               if (err)\r
+                       goto err_mtt;\r
+\r
+               // add mapping to user's arm_sn variable\r
+               // we have no way pass the completion event to provider library\r
+               // so we'll increment user's arm_sn in kernel\r
+               err = ib_umem_map( ucmd.arm_sn_addr, sizeof(int), \r
+                       IB_ACCESS_LOCAL_WRITE, &cq->mcq.mdl, &cq->mcq.p_u_arm_sn );\r
+               if (err)\r
+                       goto err_dbmap;\r
+\r
+               uar = &to_mucontext(context)->uar;\r
+       } else {\r
+               err = mlx4_ib_db_alloc(dev, &cq->db, 1);\r
+               if (err)\r
+                       goto err_cq;\r
+\r
+               cq->mcq.set_ci_db  = cq->db.db;\r
+               cq->mcq.arm_db     = cq->db.db + 1;\r
+               *cq->mcq.set_ci_db = 0;\r
+               *cq->mcq.arm_db    = 0;\r
+\r
+               if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {\r
+                       err = -ENOMEM;\r
+                       goto err_db;\r
+               }\r
+\r
+               err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,\r
+                                   &cq->buf.mtt);\r
+               if (err)\r
+                       goto err_buf;\r
+\r
+               err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);\r
+               if (err)\r
+                       goto err_mtt;\r
+\r
+               cq->mcq.p_u_arm_sn = NULL;\r
+               uar = &dev->priv_uar;\r
+       }\r
+\r
+       err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,\r
+               cq->db.dma.da, &cq->mcq, 0, 0);\r
+       if (err)\r
+               goto err_dbmap;\r
+\r
+       cq->mcq.comp  = mlx4_ib_cq_comp;\r
+       cq->mcq.event = mlx4_ib_cq_event;\r
+\r
+       if (context) \r
+               if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {\r
+                       err = -EFAULT;\r
+                       goto err_dbmap;\r
+               }\r
+\r
+       return &cq->ibcq;\r
+\r
+err_dbmap:\r
+       ib_umem_unmap( cq->mcq.mdl, cq->mcq.p_u_arm_sn );\r
+       if (context)\r
+               mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);\r
+\r
+err_mtt:\r
+       mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);\r
+\r
+err_buf:\r
+       if (context)\r
+               ib_umem_release(cq->umem);\r
+       else\r
+               mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),\r
+                             &cq->buf.buf);\r
+\r
+err_db:\r
+       if (!context)\r
+               mlx4_ib_db_free(dev, &cq->db);\r
+\r
+err_cq:\r
+       kfree(cq);\r
+\r
+       return ERR_PTR(err);\r
+}\r
+\r
+int mlx4_ib_destroy_cq(struct ib_cq *cq)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(cq->device);\r
+       struct mlx4_ib_cq *mcq = to_mcq(cq);\r
+\r
+       mlx4_cq_free(dev->dev, &mcq->mcq);\r
+       mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);\r
+\r
+       if (cq->p_uctx) {\r
+               ib_umem_unmap( mcq->mcq.mdl, mcq->mcq.p_u_arm_sn );\r
+               mlx4_ib_db_unmap_user(to_mucontext(cq->p_uctx), &mcq->db);\r
+               ib_umem_release(mcq->umem);\r
+       } else {\r
+               mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),\r
+                             &mcq->buf.buf);\r
+               mlx4_ib_db_free(dev, &mcq->db);\r
+       }\r
+\r
+       kfree(mcq);\r
+\r
+       return 0;\r
+}\r
+\r
+static void dump_cqe(void *cqe)\r
+{\r
+       __be32 *buf = cqe;\r
+\r
+       printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",\r
+              be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),\r
+              be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),\r
+              be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));\r
+}\r
+\r
+static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,\r
+                                    ib_wc_t *wc)\r
+{\r
+       if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {\r
+               printk(KERN_DEBUG "local QP operation err "\r
+                      "(QPN %06x, WQE index %x, vendor syndrome %02x, "\r
+                      "opcode = %02x)\n",\r
+                      be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),\r
+                      cqe->vendor_err_syndrome,\r
+                      cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);\r
+               dump_cqe(cqe);\r
+       }\r
+\r
+       switch (cqe->syndrome) {\r
+       case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:\r
+               wc->status = IB_WCS_LOCAL_LEN_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:\r
+               wc->status = IB_WCS_LOCAL_OP_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:\r
+               wc->status = IB_WCS_LOCAL_PROTECTION_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:\r
+               wc->status = IB_WCS_WR_FLUSHED_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_MW_BIND_ERR:\r
+               wc->status = IB_WCS_MEM_WINDOW_BIND_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_BAD_RESP_ERR:\r
+               wc->status = IB_WCS_BAD_RESP_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:\r
+               wc->status = IB_WCS_LOCAL_ACCESS_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:\r
+               wc->status = IB_WCS_REM_INVALID_REQ_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:\r
+               wc->status = IB_WCS_REM_ACCESS_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:\r
+               wc->status = IB_WCS_REM_OP_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:\r
+               wc->status = IB_WCS_TIMEOUT_RETRY_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:\r
+               wc->status = IB_WCS_RNR_RETRY_ERR;\r
+               break;\r
+       case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:\r
+               wc->status = IB_WCS_REM_ABORT_ERR;\r
+               break;\r
+       default:\r
+               wc->status = IB_WC_GENERAL_ERR;\r
+               break;\r
+       }\r
+\r
+       wc->vendor_specific = cqe->vendor_err_syndrome;\r
+}\r
+\r
+static uint8_t mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum) {\r
+       \r
+       #define CSUM_VALID_NUM 0xffff\r
+       uint8_t res = 0;\r
+\r
+       // Verify that IP_OK bit is set and the packet is pure IPv4 packet\r
+       if ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4            |\r
+                                                       MLX4_CQE_IPOIB_STATUS_IPV4              |\r
+                                                       MLX4_CQE_IPOIB_STATUS_IPV4OPT   |\r
+                                                       MLX4_CQE_IPOIB_STATUS_IPV6              |\r
+                                                       MLX4_CQE_IPOIB_STATUS_IPOK))    ==\r
+                               cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4          |\r
+                                                       MLX4_CQE_IPOIB_STATUS_IPOK))\r
+       {\r
+               // IP checksum calculated by MLX4 matched the checksum in the receive packet's \r
+               res |= MLX4_NdisPacketIpChecksumSucceeded;\r
+               if (checksum == CSUM_VALID_NUM) {\r
+                               // TCP or UDP checksum calculated by MLX4 matched the checksum in the receive packet's \r
+                               res |= (MLX4_NdisPacketUdpChecksumSucceeded |\r
+                                               MLX4_NdisPacketTcpChecksumSucceeded );\r
+                               ASSERT( status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_TCP | MLX4_CQE_IPOIB_STATUS_UDP));\r
+               }\r
+       }\r
+       return res;\r
+}\r
+\r
+static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,\r
+                           struct mlx4_ib_qp **cur_qp,\r
+                           ib_wc_t *wc)\r
+{\r
+       struct mlx4_cqe *cqe;\r
+       struct mlx4_qp *mqp;\r
+       struct mlx4_ib_wq *wq;\r
+       struct mlx4_ib_srq *srq;\r
+       int is_send;\r
+       int is_error;\r
+       u16 wqe_ctr;\r
+\r
+       cqe = next_cqe_sw(cq);\r
+       if (!cqe)\r
+               return -EAGAIN;\r
+\r
+       ++cq->mcq.cons_index;\r
+\r
+       /*\r
+        * Make sure we read CQ entry contents after we've checked the\r
+        * ownership bit.\r
+        */\r
+       rmb();\r
+\r
+       is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;\r
+       is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==\r
+               MLX4_CQE_OPCODE_ERROR;\r
+\r
+       if (!*cur_qp || (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (u32)(*cur_qp)->mqp.qpn) {\r
+               /*\r
+                * We do not have to take the QP table lock here,\r
+                * because CQs will be locked while QPs are removed\r
+                * from the table.\r
+                */\r
+#if 1\r
+               // radix_tree_insert in current implementation seems like\r
+               // can cause radix_tree_lookup to miss an existing QP\r
+               // so we call qp_lookup under the spinlock\r
+               mqp = mlx4_qp_lookup_locked( to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->my_qpn));\r
+#else\r
+               mqp = __mlx4_qp_lookup( to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->my_qpn));\r
+#endif\r
+\r
+               if (unlikely(!mqp)) {\r
+                       printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",\r
+                               cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);\r
+                       return -EINVAL;\r
+               }\r
+\r
+               *cur_qp = to_mibqp(mqp);\r
+       }\r
+\r
+       if (is_send) {\r
+               wq = &(*cur_qp)->sq;\r
+               wqe_ctr = be16_to_cpu(cqe->wqe_index);\r
+               wq->tail += (u16) (wqe_ctr - (u16) wq->tail);\r
+               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];\r
+               ++wq->tail;\r
+       } else if ((*cur_qp)->ibqp.srq) {\r
+               srq = to_msrq((*cur_qp)->ibqp.srq);\r
+               wqe_ctr = be16_to_cpu(cqe->wqe_index);\r
+               wc->wr_id = srq->wrid[wqe_ctr];\r
+               mlx4_ib_free_srq_wqe(srq, wqe_ctr);\r
+       } else {\r
+               wq        = &(*cur_qp)->rq;\r
+               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];\r
+               ++wq->tail;\r
+       }\r
+\r
+       if (is_send) {\r
+               wc->recv.ud.recv_opt = 0;\r
+               switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {\r
+               case MLX4_OPCODE_RDMA_WRITE_IMM:\r
+                       wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;\r
+               case MLX4_OPCODE_RDMA_WRITE:\r
+                       wc->wc_type    = IB_WC_RDMA_WRITE;\r
+                       break;\r
+               case MLX4_OPCODE_SEND_IMM:\r
+                       wc->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE;\r
+               case MLX4_OPCODE_SEND:\r
+                       wc->wc_type    = IB_WC_SEND;\r
+                       break;\r
+               case MLX4_OPCODE_RDMA_READ:\r
+                       wc->wc_type    = IB_WC_RDMA_READ;\r
+                       wc->length  = be32_to_cpu(cqe->byte_cnt);\r
+                       break;\r
+               case MLX4_OPCODE_ATOMIC_CS:\r
+                       wc->wc_type    = IB_WC_COMPARE_SWAP;\r
+                       wc->length  = 8;\r
+                       break;\r
+               case MLX4_OPCODE_ATOMIC_FA:\r
+                       wc->wc_type    = IB_WC_FETCH_ADD;\r
+                       wc->length  = 8;\r
+                       break;\r
+               case MLX4_OPCODE_BIND_MW:\r
+                       wc->wc_type    = IB_WC_MW_BIND;\r
+                       break;\r
+               default:\r
+                       wc->wc_type       = IB_WC_SEND;\r
+                       break;\r
+               }\r
+       } else {\r
+               wc->length = be32_to_cpu(cqe->byte_cnt);\r
+\r
+               switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {\r
+               case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:\r
+                       wc->wc_type   = IB_WC_RECV_RDMA_WRITE;\r
+                       wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;\r
+                       wc->recv.ud.immediate_data = cqe->immed_rss_invalid;\r
+                       break;\r
+               case MLX4_RECV_OPCODE_SEND:\r
+                       wc->wc_type   = IB_WC_RECV;\r
+                       wc->recv.ud.recv_opt = 0;\r
+                       break;\r
+               case MLX4_RECV_OPCODE_SEND_IMM:\r
+                       wc->wc_type   = IB_WC_RECV;\r
+                       wc->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE;\r
+                       wc->recv.ud.immediate_data = cqe->immed_rss_invalid;\r
+                       break;\r
+               default:\r
+                       wc->recv.ud.recv_opt = 0;\r
+                       wc->wc_type = IB_WC_RECV;\r
+                       break;\r
+               }\r
+\r
+               wc->recv.ud.remote_lid  = cqe->rlid;\r
+               wc->recv.ud.remote_sl           = cqe->sl >> 4;\r
+               wc->recv.ud.remote_qp   = cqe->g_mlpath_rqpn & 0xffffff00;\r
+               wc->recv.ud.path_bits           = (u8)(cqe->g_mlpath_rqpn & 0x7f);\r
+               wc->recv.ud.recv_opt            |= cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0;\r
+               wc->recv.ud.pkey_index  = (u16)(be32_to_cpu(cqe->immed_rss_invalid)  & 0x7f);\r
+               wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum);\r
+       }\r
+       if (!is_send && cqe->rlid == 0){\r
+               MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found rlid == 0 \n "));\r
+               wc->recv.ud.recv_opt         |= IB_RECV_OPT_FORWARD;\r
+       }\r
+\r
+       if (unlikely(is_error))\r
+               mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);\r
+       else\r
+               wc->status = IB_WCS_SUCCESS;\r
+\r
+       return 0;\r
+}\r
+\r
+int mlx4_ib_poll_cq(\r
+       IN              struct ib_cq *ibcq, \r
+       IN      OUT                     ib_wc_t** const                         pp_free_wclist,\r
+               OUT                     ib_wc_t** const                         pp_done_wclist )\r
+{\r
+       struct mlx4_ib_cq *cq = to_mcq(ibcq);\r
+       struct mlx4_ib_qp *cur_qp = NULL;\r
+       unsigned long flags;\r
+       int err = 0;\r
+       int npolled = 0;\r
+       ib_wc_t         *wc_p, **next_pp;\r
+\r
+       spin_lock_irqsave(&cq->lock, &flags);\r
+\r
+       // loop through CQ\r
+       next_pp = pp_done_wclist;\r
+       wc_p = *pp_free_wclist;\r
+       while( wc_p ) {\r
+               // poll one CQE\r
+               err = mlx4_ib_poll_one(cq, &cur_qp, wc_p);\r
+               if (err)\r
+                       break;\r
+\r
+               // prepare for the next loop\r
+               *next_pp = wc_p;\r
+               next_pp = &wc_p->p_next;\r
+               wc_p = wc_p->p_next;\r
+               ++npolled;\r
+       }\r
+\r
+       // prepare the results\r
+       *pp_free_wclist = wc_p;         /* Set the head of the free list. */\r
+       *next_pp = NULL;                                                /* Clear the tail of the done list. */\r
+\r
+       // update consumer index\r
+       if (npolled)\r
+               mlx4_cq_set_ci(&cq->mcq);\r
+\r
+       spin_unlock_irqrestore(&cq->lock, flags);\r
+       return (err == 0 || err == -EAGAIN)? npolled : err;\r
+}\r
+\r
+int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)\r
+{\r
+       mlx4_cq_arm(&to_mcq(ibcq)->mcq,\r
+                   (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?\r
+                   MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,\r
+                   to_mdev(ibcq->device)->uar_map,\r
+                   MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));\r
+\r
+       return 0;\r
+}\r
+\r
+void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)\r
+{\r
+       u32 prod_index;\r
+       int nfreed = 0;\r
+       struct mlx4_cqe *cqe, *dest;\r
+       u8 owner_bit;\r
+\r
+       /*\r
+        * First we need to find the current producer index, so we\r
+        * know where to start cleaning from.  It doesn't matter if HW\r
+        * adds new entries after this loop -- the QP we're worried\r
+        * about is already in RESET, so the new entries won't come\r
+        * from our QP and therefore don't need to be checked.\r
+        */\r
+       for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)\r
+               if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)\r
+                       break;\r
+\r
+       /*\r
+        * Now sweep backwards through the CQ, removing CQ entries\r
+        * that match our QP by copying older entries on top of them.\r
+        */\r
+       while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {\r
+               cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);\r
+               if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {\r
+                       if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))\r
+                               mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));\r
+                       ++nfreed;\r
+               } else if (nfreed) {\r
+                       dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);\r
+                       owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;\r
+                       memcpy(dest, cqe, sizeof *cqe);\r
+                       dest->owner_sr_opcode = owner_bit |\r
+                               (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);\r
+               }\r
+       }\r
+\r
+       if (nfreed) {\r
+               cq->mcq.cons_index += nfreed;\r
+               /*\r
+                * Make sure update of buffer contents is done before\r
+                * updating consumer index.\r
+                */\r
+               wmb();\r
+               mlx4_cq_set_ci(&cq->mcq);\r
+       }\r
+}\r
+\r
+void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)\r
+{\r
+       spin_lock_irq(&cq->lock);\r
+       __mlx4_ib_cq_clean(cq, qpn, srq);\r
+       spin_unlock_irq(&cq->lock);\r
+}\r
index 06f2985..18e1af1 100644 (file)
-/*
- * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "mlx4_ib.h"
-#include "ib_cache.h"
-#include "ib_pack.h"
-#include "qp.h"
-#include "user.h"
-
-enum {
-       MLX4_IB_ACK_REQ_FREQ    = 8,
-};
-
-enum {
-       MLX4_IB_DEFAULT_SCHED_QUEUE     = 0x83,
-       MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
-};
-
-enum {
-       /*
-        * Largest possible UD header: send with GRH and immediate data.
-        */
-       MLX4_IB_UD_HEADER_SIZE          = 72
-};
-
-struct mlx4_ib_sqp {
-       struct mlx4_ib_qp       qp;
-       int                     pkey_index;
-       u32                     qkey;
-       u32                     send_psn;
-       struct ib_ud_header     ud_header;
-       u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];
-};
-
-enum {
-       MLX4_IB_MIN_SQ_STRIDE = 6
-};
-
-static const __be32 mlx4_ib_opcode[] = {
-       __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),         /*      [IB_WR_RDMA_WRITE]                      */
-       __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),     /*      [IB_WR_RDMA_WRITE_WITH_IMM] */
-       __constant_cpu_to_be32(MLX4_OPCODE_SEND),                       /*      [IB_WR_SEND]                            */
-       __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),           /*      [IB_WR_SEND_WITH_IMM]           */
-       __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),          /*      [IB_WR_RDMA_READ]                       */
-       __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),          /*      [IB_WR_ATOMIC_CMP_AND_SWP]      */
-       __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),          /*      [IB_WR_ATOMIC_FETCH_AND_ADD]*/
-};
-
-static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
-{
-       return container_of(mqp, struct mlx4_ib_sqp, qp);
-}
-
-static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
-{
-       return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
-               qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
-}
-
-static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
-{
-       return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
-               qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
-}
-
-static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
-{
-       if (qp->buf.nbufs == 1)
-               return qp->buf.u.direct.buf + offset;
-       else
-               return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
-                       (offset & (PAGE_SIZE - 1));
-}
-
-static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
-{
-       return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
-}
-
-static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
-{
-       return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
-}
-
-/*
- * Stamp a SQ WQE so that it is invalid if prefetched by marking the
- * first four bytes of every 64 byte chunk with 0xffffffff, except for
- * the very first chunk of the WQE.
- */
-static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
-{
-       u32 *wqe = get_send_wqe(qp, n);
-       int i;
-
-       for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)
-               wqe[i] = 0xffffffff;
-}
-
-static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
-{
-       struct ib_event event;
-       struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
-
-       if (type == MLX4_EVENT_TYPE_PATH_MIG)
-               to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
-
-       if (ibqp->event_handler) {
-               event.device     = ibqp->device;
-               event.element.qp = ibqp;
-               switch (type) {
-               case MLX4_EVENT_TYPE_PATH_MIG:
-                       event.event = IB_EVENT_PATH_MIG;
-                       break;
-               case MLX4_EVENT_TYPE_COMM_EST:
-                       event.event = IB_EVENT_COMM_EST;
-                       break;
-               case MLX4_EVENT_TYPE_SQ_DRAINED:
-                       event.event = IB_EVENT_SQ_DRAINED;
-                       break;
-               case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
-                       event.event = IB_EVENT_QP_LAST_WQE_REACHED;
-                       break;
-               case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
-                       event.event = IB_EVENT_QP_FATAL;
-                       break;
-               case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
-                       event.event = IB_EVENT_PATH_MIG_ERR;
-                       break;
-               case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-                       event.event = IB_EVENT_QP_REQ_ERR;
-                       break;
-               case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
-                       event.event = IB_EVENT_QP_ACCESS_ERR;
-                       break;
-               default:
-                       printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
-                              "on QP %06x\n", type, qp->qpn);
-                       return;
-               }
-
-               ibqp->event_handler(&event, ibqp->qp_context);
-       }
-}
-
-static int send_wqe_overhead(enum ib_qp_type type)
-{
-       /*
-        * UD WQEs must have a datagram segment.
-        * RC and UC WQEs might have a remote address segment.
-        * MLX WQEs need two extra inline data segments (for the UD
-        * header and space for the ICRC).
-        */
-       switch (type) {
-       case IB_QPT_UD:
-               return sizeof (struct mlx4_wqe_ctrl_seg) +
-                       sizeof (struct mlx4_wqe_datagram_seg);
-       case IB_QPT_UC:
-               return sizeof (struct mlx4_wqe_ctrl_seg) +
-                       sizeof (struct mlx4_wqe_raddr_seg);
-       case IB_QPT_RC:
-               return sizeof (struct mlx4_wqe_ctrl_seg) +
-                       sizeof (struct mlx4_wqe_atomic_seg) +
-                       sizeof (struct mlx4_wqe_raddr_seg);
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               return sizeof (struct mlx4_wqe_ctrl_seg) +
-                       ALIGN(MLX4_IB_UD_HEADER_SIZE +
-                             DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
-                                          MLX4_INLINE_ALIGN) *
-                             sizeof (struct mlx4_wqe_inline_seg),
-                             sizeof (struct mlx4_wqe_data_seg)) +
-                       ALIGN(4 +
-                             sizeof (struct mlx4_wqe_inline_seg),
-                             sizeof (struct mlx4_wqe_data_seg));
-       default:
-               return sizeof (struct mlx4_wqe_ctrl_seg);
-       }
-}
-
-static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-                      int is_user, int has_srq, struct mlx4_ib_qp *qp)
-{
-       /* Sanity check RQ size before proceeding */
-       if ((int)cap->max_recv_wr  > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
-           (int)cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
-               return -EINVAL;
-
-       if (has_srq) {
-               /* QPs attached to an SRQ should have no RQ */
-               if (cap->max_recv_wr)
-                       return -EINVAL;
-
-               qp->rq.wqe_cnt = qp->rq.max_gs = 0;
-       } else {
-               /* HW requires >= 1 RQ entry with >= 1 gather entry */
-               if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
-                       return -EINVAL;
-
-               qp->rq.wqe_cnt   = roundup_pow_of_two(max(1U, cap->max_recv_wr));
-               qp->rq.max_gs    = roundup_pow_of_two(max(1U, cap->max_recv_sge));
-               qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
-       }
-
-       /* leave userspace return values as they were, so as not to break ABI */
-       if (is_user) {
-               cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;
-               cap->max_recv_sge = qp->rq.max_gs;
-       } else {
-               cap->max_recv_wr  = qp->rq.max_post =
-                       min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
-               cap->max_recv_sge = min(qp->rq.max_gs,
-                                       min(dev->dev->caps.max_sq_sg,
-                                       dev->dev->caps.max_rq_sg));
-       }
-       /* We don't support inline sends for kernel QPs (yet) */
-
-       return 0;
-}
-
-static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-                             enum ib_qp_type type, struct mlx4_ib_qp *qp)
-{
-       /* Sanity check SQ size before proceeding */
-       if ((int)cap->max_send_wr       > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE  ||
-           (int)cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
-           (int)cap->max_inline_data + send_wqe_overhead(type) +
-           (int)sizeof(struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
-               return -EINVAL;
-
-       /*
-        * For MLX transport we need 2 extra S/G entries:
-        * one for the header and one for the checksum at the end
-        */
-       if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
-           (int)cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
-               return -EINVAL;
-
-       qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
-                                                       sizeof (struct mlx4_wqe_data_seg),
-                                                       cap->max_inline_data +
-                                                       sizeof (struct mlx4_wqe_inline_seg)) +
-                                                   send_wqe_overhead(type)));
-       qp->sq.wqe_shift = max(MLX4_IB_SQ_MIN_WQE_SHIFT, qp->sq.wqe_shift);
-       qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
-               sizeof (struct mlx4_wqe_data_seg);
-
-       /*
-        * We need to leave 2 KB + 1 WQE of headroom in the SQ to
-        * allow HW to prefetch.
-        */
-       qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);
-       qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);
-
-       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
-               (qp->sq.wqe_cnt << qp->sq.wqe_shift);
-       if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
-               qp->rq.offset = 0;
-               qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
-       } else {
-               qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
-               qp->sq.offset = 0;
-       }
-
-       cap->max_send_wr = qp->sq.max_post =
-               min(qp->sq.wqe_cnt - qp->sq_spare_wqes,
-                       dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE);
-       cap->max_send_sge = min(qp->sq.max_gs,
-                               min(dev->dev->caps.max_sq_sg,
-                                       dev->dev->caps.max_rq_sg));
-       /* We don't support inline sends for kernel QPs (yet) */
-       cap->max_inline_data = 0;
-
-       return 0;
-}
-
-static int set_user_sq_size(struct mlx4_ib_dev *dev,
-                           struct mlx4_ib_qp *qp,
-                           struct mlx4_ib_create_qp *ucmd)
-{
-       /* Sanity check SQ size before proceeding */
-       if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes       ||
-           ucmd->log_sq_stride >
-               ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
-           ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
-               return -EINVAL;
-
-       qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;
-       qp->sq.wqe_shift = ucmd->log_sq_stride;
-
-       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
-               (qp->sq.wqe_cnt << qp->sq.wqe_shift);
-
-       return 0;
-}
-
-static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
-                           struct ib_qp_init_attr *init_attr,
-                           struct ib_udata *udata, u32 sqpn, struct mlx4_ib_qp *qp)
-{
-       int err;
-
-       mutex_init(&qp->mutex);
-       spin_lock_init(&qp->sq.lock);
-       spin_lock_init(&qp->rq.lock);
-
-       qp->state        = XIB_QPS_RESET;
-       qp->atomic_rd_en = 0;
-       qp->resp_depth   = 0;
-
-       qp->rq.head         = 0;
-       qp->rq.tail         = 0;
-       qp->sq.head         = 0;
-       qp->sq.tail         = 0;
-
-       err = set_rq_size(dev, &init_attr->cap, !!pd->p_uctx, !!init_attr->srq, qp);
-       if (err)
-               goto err;
-
-       if (pd->p_uctx) {
-               struct mlx4_ib_create_qp ucmd;
-
-               if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
-                       err = -EFAULT;
-                       goto err;
-               }
-
-               qp->sq_no_prefetch = ucmd.sq_no_prefetch;
-
-               err = set_user_sq_size(dev, qp, &ucmd);
-               if (err)
-                       goto err;
-
-               qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,
-                                      qp->buf_size, 0, FALSE);
-               if (IS_ERR(qp->umem)) {
-                       err = PTR_ERR(qp->umem);
-                       goto err;
-               }
-
-               err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
-                                   ilog2(qp->umem->page_size), &qp->mtt);
-               if (err)
-                       goto err_buf;
-
-               err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
-               if (err)
-                       goto err_mtt;
-
-               if (!init_attr->srq) {
-                       err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx),
-                                                 ucmd.db_addr, &qp->db);
-                       if (err)
-                               goto err_mtt;
-               }
-       } else {
-               qp->sq_no_prefetch = 0;
-
-               err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
-               if (err)
-                       goto err;
-
-               if (!init_attr->srq) {
-                       err = mlx4_ib_db_alloc(dev, &qp->db, 0);
-                       if (err)
-                               goto err;
-
-                       *qp->db.db = 0;
-               }
-
-               if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
-                       err = -ENOMEM;
-                       goto err_db;
-               }
-
-               err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
-                                   &qp->mtt);
-               if (err)
-                       goto err_buf;
-
-               err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
-               if (err)
-                       goto err_mtt;
-
-               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
-               if (!qp->sq.wrid || !qp->rq.wrid) {
-                       err = -ENOMEM;
-                       goto err_wrid;
-               }
-       }
-
-       if (!sqpn)
-               err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn);
-       if (err)
-               goto err_wrid;
-
-       err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
-       if (err)
-               goto err_wrid;
-
-       /*
-        * Hardware wants QPN written in big-endian order (after
-        * shifting) for send doorbell.  Precompute this value to save
-        * a little bit when posting sends.
-        */
-       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
-
-       if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
-               qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
-       else
-               qp->sq_signal_bits = 0;
-
-       qp->mqp.event = mlx4_ib_qp_event;
-
-       return 0;
-
-err_wrid:
-       if (pd->p_uctx) {
-               if (!init_attr->srq)
-                       mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx),
-                                             &qp->db);
-       } else {
-               kfree(qp->sq.wrid);
-               kfree(qp->rq.wrid);
-       }
-
-err_mtt:
-       mlx4_mtt_cleanup(dev->dev, &qp->mtt);
-
-err_buf:
-       if (pd->p_uctx)
-               ib_umem_release(qp->umem);
-       else
-               mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
-
-err_db:
-       if (!pd->p_uctx && !init_attr->srq)
-               mlx4_ib_db_free(dev, &qp->db);
-
-err:
-       return err;
-}
-
-static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
-{
-       switch (state) {
-       case XIB_QPS_RESET:     return MLX4_QP_STATE_RST;
-       case XIB_QPS_INIT:      return MLX4_QP_STATE_INIT;
-       case XIB_QPS_RTR:       return MLX4_QP_STATE_RTR;
-       case XIB_QPS_RTS:       return MLX4_QP_STATE_RTS;
-       case XIB_QPS_SQD:       return MLX4_QP_STATE_SQD;
-       case XIB_QPS_SQE:       return MLX4_QP_STATE_SQER;
-       case XIB_QPS_ERR:       return MLX4_QP_STATE_ERR;
-       default:                return -1;
-       }
-}
-
-static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_lock_irq(&send_cq->lock);
-       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
-               spin_lock_irq(&send_cq->lock);
-               spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
-       } else {
-               spin_lock_irq(&recv_cq->lock);
-               spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
-       }
-}
-
-static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_unlock_irq(&send_cq->lock);
-       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
-               spin_unlock(&recv_cq->lock);
-               spin_unlock_irq(&send_cq->lock);
-       } else {
-               spin_unlock(&send_cq->lock);
-               spin_unlock_irq(&recv_cq->lock);
-       }
-}
-
-static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
-                             int is_user)
-{
-       struct mlx4_ib_cq *send_cq, *recv_cq;
-
-       if (qp->state != XIB_QPS_RESET)
-               if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
-                                  MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
-                       printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
-                              qp->mqp.qpn);
-
-       send_cq = to_mcq(qp->ibqp.send_cq);
-       recv_cq = to_mcq(qp->ibqp.recv_cq);
-
-       mlx4_ib_lock_cqs(send_cq, recv_cq);
-
-       if (!is_user) {
-               __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
-                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
-               if (send_cq != recv_cq)
-                       __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
-       }
-
-       mlx4_qp_remove(dev->dev, &qp->mqp);
-
-       mlx4_ib_unlock_cqs(send_cq, recv_cq);
-
-       mlx4_qp_free(dev->dev, &qp->mqp);
-       mlx4_mtt_cleanup(dev->dev, &qp->mtt);
-
-       if (is_user) {
-               if (!qp->ibqp.srq)
-                       mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.p_uctx),
-                                             &qp->db);
-               ib_umem_release(qp->umem);
-       } else {
-               kfree(qp->sq.wrid);
-               kfree(qp->rq.wrid);
-               mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
-               if (!qp->ibqp.srq)
-                       mlx4_ib_db_free(dev, &qp->db);
-       }
-}
-
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-                               struct ib_qp_init_attr *init_attr,
-                               struct ib_udata *udata)
-{
-       struct mlx4_ib_dev *dev = to_mdev(pd->device);
-       struct mlx4_ib_sqp *sqp;
-       struct mlx4_ib_qp *qp;
-       int err;
-
-       switch (init_attr->qp_type) {
-       case IB_QPT_RC:
-       case IB_QPT_UC:
-       case IB_QPT_UD:
-       {
-               qp = kzalloc(sizeof *qp, GFP_KERNEL);
-               if (!qp)
-                       return ERR_PTR(-ENOMEM);
-
-               err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
-               if (err) {
-                       kfree(qp);
-                       return ERR_PTR(err);
-               }
-
-               qp->ibqp.qp_num = qp->mqp.qpn;
-
-               break;
-       }
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-       {
-               /* Userspace is not allowed to create special QPs: */
-               if (pd->p_uctx)
-                       return ERR_PTR(-EINVAL);
-
-               sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
-               if (!sqp)
-                       return ERR_PTR(-ENOMEM);
-
-               qp = &sqp->qp;
-
-               err = create_qp_common(dev, pd, init_attr, udata,
-                                      dev->dev->caps.sqp_start +
-                                      (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
-                                      init_attr->port_num - 1,
-                                      qp);
-               if (err) {
-                       kfree(sqp);
-                       return ERR_PTR(err);
-               }
-
-               qp->port        = init_attr->port_num;
-               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
-               break;
-       }
-       default:
-               /* Don't support raw QPs */
-               return ERR_PTR(-EINVAL);
-       }
-
-       return &qp->ibqp;
-}
-
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
-{
-       struct mlx4_ib_dev *dev = to_mdev(qp->device);
-       struct mlx4_ib_qp *mqp = to_mqp(qp);
-
-       if (is_qp0(dev, mqp))
-               mlx4_CLOSE_PORT(dev->dev, mqp->port);
-
-       destroy_qp_common(dev, mqp, !!qp->pd->p_uctx);
-
-       if (is_sqp(dev, mqp))
-               kfree(to_msqp(mqp));
-       else
-               kfree(mqp);
-
-       return 0;
-}
-
-static int to_mlx4_st(enum ib_qp_type type)
-{
-       switch (type) {
-       case IB_QPT_RC:         return MLX4_QP_ST_RC;
-       case IB_QPT_UC:         return MLX4_QP_ST_UC;
-       case IB_QPT_UD:         return MLX4_QP_ST_UD;
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:        return MLX4_QP_ST_MLX;
-       default:                return -1;
-       }
-}
-
-static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
-                                  int attr_mask)
-{
-       u8 dest_rd_atomic;
-       u32 access_flags;
-       u32 hw_access_flags = 0;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               dest_rd_atomic = attr->max_dest_rd_atomic;
-       else
-               dest_rd_atomic = qp->resp_depth;
-
-       if (attr_mask & IB_QP_ACCESS_FLAGS)
-               access_flags = attr->qp_access_flags;
-       else
-               access_flags = qp->atomic_rd_en;
-
-       if (!dest_rd_atomic)
-               access_flags &= IB_ACCESS_REMOTE_WRITE;
-
-       if (access_flags & IB_ACCESS_REMOTE_READ)
-               hw_access_flags |= MLX4_QP_BIT_RRE;
-       if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
-               hw_access_flags |= MLX4_QP_BIT_RAE;
-       if (access_flags & IB_ACCESS_REMOTE_WRITE)
-               hw_access_flags |= MLX4_QP_BIT_RWE;
-
-       return cpu_to_be32(hw_access_flags);
-}
-
-static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
-                           int attr_mask)
-{
-       if (attr_mask & IB_QP_PKEY_INDEX)
-               sqp->pkey_index = attr->pkey_index;
-       if (attr_mask & IB_QP_QKEY)
-               sqp->qkey = attr->qkey;
-       if (attr_mask & IB_QP_SQ_PSN)
-               sqp->send_psn = attr->sq_psn;
-}
-
-static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
-{
-       path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
-}
-
-static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
-                        struct mlx4_qp_path *path, u8 port)
-{
-       path->grh_mylmc     = ah->src_path_bits & 0x7f;
-       path->rlid          = cpu_to_be16(ah->dlid);
-       if (ah->static_rate) {
-               path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
-               while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
-                      !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
-                       --path->static_rate;
-       } else
-               path->static_rate = 0;
-       path->counter_index = 0xff;
-
-       if (ah->ah_flags & IB_AH_GRH) {
-               if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
-                       printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
-                              ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
-                       return -1;
-               }
-
-               path->grh_mylmc |= 1 << 7;
-               path->mgid_index = ah->grh.sgid_index;
-               path->hop_limit  = ah->grh.hop_limit;
-               path->tclass_flowlabel =
-                       cpu_to_be32((ah->grh.traffic_class << 20) |
-                                   (ah->grh.flow_label));
-               memcpy(path->rgid, ah->grh.dgid.raw, 16);
-       }
-
-       path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
-               ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
-
-       return 0;
-}
-
-static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
-                              const struct ib_qp_attr *attr, int attr_mask,
-                              enum ib_qp_state cur_state, enum ib_qp_state new_state)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
-       struct mlx4_ib_qp *qp = to_mqp(ibqp);
-       struct mlx4_qp_context *context;
-       enum mlx4_qp_optpar optpar = 0;
-       int sqd_event;
-       int err = -EINVAL;
-
-       context = kzalloc(sizeof *context, GFP_KERNEL);
-       if (!context)
-               return -ENOMEM;
-
-       context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
-                                    (to_mlx4_st(ibqp->qp_type) << 16));
-       context->flags     |= cpu_to_be32(1 << 8); /* DE? */
-
-       if (!(attr_mask & IB_QP_PATH_MIG_STATE))
-               context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
-       else {
-               optpar |= MLX4_QP_OPTPAR_PM_STATE;
-               switch (attr->path_mig_state) {
-               case IB_MIG_MIGRATED:
-                       context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
-                       break;
-               case IB_MIG_REARM:
-                       context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
-                       break;
-               case IB_MIG_ARMED:
-                       context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
-                       break;
-               }
-       }
-
-       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
-           ibqp->qp_type == IB_QPT_UD)
-               context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
-       else if (attr_mask & IB_QP_PATH_MTU) {
-               if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
-                       printk(KERN_ERR "path MTU (%u) is invalid\n",
-                              attr->path_mtu);
-                       goto out;
-               }
-               context->mtu_msgmax = (u8)((attr->path_mtu << 5) |
-                       ilog2(dev->dev->caps.max_msg_sz));
-       }
-
-       if (qp->rq.wqe_cnt)
-               context->rq_size_stride = (u8)(ilog2(qp->rq.wqe_cnt) << 3);
-       context->rq_size_stride |= qp->rq.wqe_shift - 4;
-
-       if (qp->sq.wqe_cnt)
-               context->sq_size_stride = (u8)(ilog2(qp->sq.wqe_cnt) << 3);
-       context->sq_size_stride |= qp->sq.wqe_shift - 4;
-
-       if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)
-               context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
-
-       if (qp->ibqp.p_uctx)
-               context->usr_page = cpu_to_be32(to_mucontext(ibqp->p_uctx)->uar.index);
-       else
-               context->usr_page = cpu_to_be32(dev->priv_uar.index);
-
-       if (attr_mask & IB_QP_DEST_QPN)
-               context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
-
-       if (attr_mask & IB_QP_PORT) {
-               if (cur_state == XIB_QPS_SQD && new_state == XIB_QPS_SQD &&
-                   !(attr_mask & IB_QP_AV)) {
-                       mlx4_set_sched(&context->pri_path, attr->port_num);
-                       optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
-               }
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX) {
-               context->pri_path.pkey_index = (u8)attr->pkey_index;
-               optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
-       }
-
-       if (attr_mask & IB_QP_AV) {
-               if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
-                                 attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
-                       goto out;
-
-               optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
-                          MLX4_QP_OPTPAR_SCHED_QUEUE);
-       }
-
-       if (attr_mask & IB_QP_TIMEOUT) {
-               context->pri_path.ackto = attr->timeout << 3;
-               optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
-       }
-
-       if (attr_mask & IB_QP_ALT_PATH) {
-               if (attr->alt_port_num == 0 ||
-                   attr->alt_port_num > dev->dev->caps.num_ports)
-                       goto out;
-
-               if (attr->alt_pkey_index >=
-                   dev->dev->caps.pkey_table_len[attr->alt_port_num])
-                       goto out;
-
-               if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
-                                 attr->alt_port_num))
-                       goto out;
-
-               context->alt_path.pkey_index = (u8)attr->alt_pkey_index;
-               context->alt_path.ackto = attr->alt_timeout << 3;
-               optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
-       }
-
-       context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
-       context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
-
-       if (attr_mask & IB_QP_RNR_RETRY) {
-               context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
-               optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
-       }
-
-       if (attr_mask & IB_QP_RETRY_CNT) {
-               context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
-               optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
-       }
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-               if (attr->max_rd_atomic)
-                       context->params1 |=
-                               cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
-               optpar |= MLX4_QP_OPTPAR_SRA_MAX;
-       }
-
-       if (attr_mask & IB_QP_SQ_PSN)
-               context->next_send_psn = cpu_to_be32(attr->sq_psn);
-
-       context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-               if (attr->max_dest_rd_atomic)
-                       context->params2 |=
-                               cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
-               optpar |= MLX4_QP_OPTPAR_RRA_MAX;
-       }
-
-       if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
-               context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
-               optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
-       }
-
-       if (ibqp->srq)
-               context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER) {
-               context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
-               optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
-       }
-       if (attr_mask & IB_QP_RQ_PSN)
-               context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
-
-       context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
-
-       if (attr_mask & IB_QP_QKEY) {
-               context->qkey = cpu_to_be32(attr->qkey);
-               optpar |= MLX4_QP_OPTPAR_Q_KEY;
-       }
-
-       if (ibqp->srq)
-               context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
-
-       if (!ibqp->srq && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)
-               context->db_rec_addr = cpu_to_be64(qp->db.dma.da);
-
-       if (cur_state == XIB_QPS_INIT &&
-           new_state == XIB_QPS_RTR  &&
-           (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
-            ibqp->qp_type == IB_QPT_UD)) {
-               context->pri_path.sched_queue = (qp->port - 1) << 6;
-               if (is_qp0(dev, qp))
-                       context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
-               else
-                       context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
-       }
-
-       if (cur_state == XIB_QPS_RTS && new_state == XIB_QPS_SQD        &&
-           attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
-               sqd_event = 1;
-       else
-               sqd_event = 0;
-
-       /*
-        * Before passing a kernel QP to the HW, make sure that the
-        * ownership bits of the send queue are set and the SQ
-        * headroom is stamped so that the hardware doesn't start
-        * processing stale work requests.
-        */
-       if (!ibqp->p_uctx && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) {
-               struct mlx4_wqe_ctrl_seg *ctrl;
-               int i;
-
-               for (i = 0; i < qp->sq.wqe_cnt; ++i) {
-                       ctrl = get_send_wqe(qp, i);
-                       ctrl->owner_opcode = cpu_to_be32(1 << 31);
-
-                       stamp_send_wqe(qp, i);
-               }
-       }
-
-       err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
-                            to_mlx4_state(new_state), context, optpar,
-                            sqd_event, &qp->mqp);
-       if (err)
-               goto out;
-
-       qp->state = new_state;
-
-       if (attr_mask & IB_QP_ACCESS_FLAGS)
-               qp->atomic_rd_en = (u8)attr->qp_access_flags;
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               qp->resp_depth = attr->max_dest_rd_atomic;
-       if (attr_mask & IB_QP_PORT)
-               qp->port = attr->port_num;
-       if (attr_mask & IB_QP_ALT_PATH)
-               qp->alt_port = attr->alt_port_num;
-
-       if (is_sqp(dev, qp))
-               store_sqp_attrs(to_msqp(qp), attr, attr_mask);
-
-       /*
-        * If we moved QP0 to RTR, bring the IB link up; if we moved
-        * QP0 to RESET or ERROR, bring the link back down.
-        */
-       if (is_qp0(dev, qp)) {
-               if (cur_state != XIB_QPS_RTR && new_state == XIB_QPS_RTR)
-                       if (mlx4_INIT_PORT(dev->dev, qp->port))
-                               printk(KERN_WARNING "INIT_PORT failed for port %d\n",
-                                      qp->port);
-
-               if (cur_state != XIB_QPS_RESET && cur_state != XIB_QPS_ERR &&
-                   (new_state == XIB_QPS_RESET || new_state == XIB_QPS_ERR))
-                       mlx4_CLOSE_PORT(dev->dev, qp->port);
-       }
-
-       /*
-        * If we moved a kernel QP to RESET, clean up all old CQ
-        * entries and reinitialize the QP.
-        */
-       if (new_state == XIB_QPS_RESET && !ibqp->p_uctx) {
-               mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
-                                ibqp->srq ? to_msrq(ibqp->srq): NULL);
-               if (ibqp->send_cq != ibqp->recv_cq)
-                       mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
-
-               qp->rq.head = 0;
-               qp->rq.tail = 0;
-               qp->sq.head = 0;
-               qp->sq.tail = 0;
-               if (!ibqp->srq)
-                       *qp->db.db  = 0;
-       }
-
-out:
-       kfree(context);
-       return err;
-}
-
-static struct ib_qp_attr mlx4_ib_qp_attr;
-static int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1];
-
-void mlx4_ib_qp_init()
-{
-       memset( &mlx4_ib_qp_attr, 0, sizeof(mlx4_ib_qp_attr) );
-       mlx4_ib_qp_attr.port_num = 1;
-
-       memset( &mlx4_ib_qp_attr_mask_table, 0, sizeof(mlx4_ib_qp_attr_mask_table) );
-       mlx4_ib_qp_attr_mask_table[IB_QPT_UD]  = (IB_QP_PKEY_INDEX              |
-                               IB_QP_PORT                      |
-                               IB_QP_QKEY);
-       mlx4_ib_qp_attr_mask_table[IB_QPT_UC]  = (IB_QP_PKEY_INDEX              |
-                               IB_QP_PORT                      |
-                               IB_QP_ACCESS_FLAGS);
-       mlx4_ib_qp_attr_mask_table[IB_QPT_RC]  = (IB_QP_PKEY_INDEX              |
-                               IB_QP_PORT                      |
-                               IB_QP_ACCESS_FLAGS);
-       mlx4_ib_qp_attr_mask_table[IB_QPT_SMI] = (IB_QP_PKEY_INDEX              |
-                               IB_QP_QKEY);
-       mlx4_ib_qp_attr_mask_table[IB_QPT_GSI] = (IB_QP_PKEY_INDEX              |
-                               IB_QP_QKEY);
-}
-
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                     int attr_mask, struct ib_udata *udata)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
-       struct mlx4_ib_qp *qp = to_mqp(ibqp);
-       enum ib_qp_state cur_state, new_state;
-       int err = -EINVAL;
-
-       UNUSED_PARAM(udata);
-       
-       mutex_lock(&qp->mutex);
-
-       cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
-       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-       if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
-               goto out;
-
-       if ((attr_mask & IB_QP_PORT) &&
-           (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
-               goto out;
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX) {
-               int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-               if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])
-                       goto out;
-       }
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
-           attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
-               goto out;
-       }
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
-           attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
-               goto out;
-       }
-
-       if (cur_state == new_state && cur_state == XIB_QPS_RESET) {
-               err = 0;
-               goto out;
-       }
-
-       if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_ERR) {
-               err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
-                                         mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
-                                         XIB_QPS_RESET, XIB_QPS_INIT);
-               if (err)
-                       goto out;
-               cur_state = XIB_QPS_INIT;
-       }
-
-       err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
-
-out:
-       mutex_unlock(&qp->mutex);
-       return err;
-}
-
-static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr)
-{
-
-       enum ib_wr_opcode opcode = -1; //= wr->wr_type;
-
-       switch (wr->wr_type) {
-               case WR_SEND: 
-                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_SEND_WITH_IMM : IB_WR_SEND;
-                       break;
-               case WR_RDMA_WRITE:     
-                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_RDMA_WRITE_WITH_IMM : IB_WR_RDMA_WRITE;
-                       break;
-               case WR_RDMA_READ:              opcode = IB_WR_RDMA_READ; break;
-               case WR_COMPARE_SWAP:           opcode = IB_WR_ATOMIC_CMP_AND_SWP; break;
-               case WR_FETCH_ADD:                      opcode = IB_WR_ATOMIC_FETCH_AND_ADD; break;
-       }
-       return opcode;
-}
-
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,
-                           void *wqe)
-{
-       enum ib_wr_opcode opcode = to_wr_opcode(wr);
-       struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
-       struct mlx4_wqe_mlx_seg *mlx = wqe;
-       struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);
-       struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);
-       __be16 pkey;
-       int send_size;
-       int header_size;
-       int spc;
-       u32 i;
-
-       send_size = 0;
-       for (i = 0; i < wr->num_ds; ++i)
-               send_size += wr->ds_array[i].length;
-
-       ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
-
-       sqp->ud_header.lrh.service_level   =
-               (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);
-       sqp->ud_header.lrh.destination_lid = ah->av.dlid;
-       sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid & 0x7f);
-       if (mlx4_ib_ah_grh_present(ah)) {
-               sqp->ud_header.grh.traffic_class =
-                       (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);
-               sqp->ud_header.grh.flow_label    =
-                       ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
-               sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;
-               ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),
-                                 ah->av.gid_index, &sqp->ud_header.grh.source_gid);
-               memcpy(sqp->ud_header.grh.destination_gid.raw,
-                      ah->av.dgid, 16);
-       }
-
-       mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
-       mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
-                                 (sqp->ud_header.lrh.destination_lid ==
-                                  XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
-                                 (sqp->ud_header.lrh.service_level << 8));
-       mlx->rlid   = sqp->ud_header.lrh.destination_lid;
-
-       switch (opcode) {
-       case IB_WR_SEND:
-               sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;
-               sqp->ud_header.immediate_present = 0;
-               break;
-       case IB_WR_SEND_WITH_IMM:
-               sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
-               sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data    = wr->immediate_data;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
-       if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
-               sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
-       sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED));
-       if (!sqp->qp.ibqp.qp_num)
-               ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
-       else
-               ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey);
-       sqp->ud_header.bth.pkey = pkey;
-       sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;
-       sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-       sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?
-               cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
-       sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
-
-       header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
-
-#if 0
-       {
-               printk(KERN_ERR "built UD header of size %d:\n", header_size);
-               for (i = 0; i < header_size / 4; ++i) {
-                       if (i % 8 == 0)
-                               printk("  [%02x] ", i * 4);
-                       printk(" %08x",
-                              be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
-                       if ((i + 1) % 8 == 0)
-                               printk("\n");
-               }
-               printk("\n");
-       }
-#endif
-
-       /*
-        * Inline data segments may not cross a 64 byte boundary.  If
-        * our UD header is bigger than the space available up to the
-        * next 64 byte boundary in the WQE, use two inline data
-        * segments to hold the UD header.
-        */
-       spc = MLX4_INLINE_ALIGN -
-               ((u32)(ULONG_PTR)(inl + 1) & (MLX4_INLINE_ALIGN - 1));
-       if (header_size <= spc) {
-               inl->byte_count = cpu_to_be32(1 << 31 | header_size);
-               memcpy(inl + 1, sqp->header_buf, header_size);
-               i = 1;
-       } else {
-               inl->byte_count = cpu_to_be32(1 << 31 | spc);
-               memcpy(inl + 1, sqp->header_buf, spc);
-
-               inl = (void*)((u8*)(inl + 1) + spc);
-               memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
-               /*
-                * Need a barrier here to make sure all the data is
-                * visible before the byte_count field is set.
-                * Otherwise the HCA prefetcher could grab the 64-byte
-                * chunk with this inline segment and get a valid (!=
-                * 0xffffffff) byte count but stale data, and end up
-                * generating a packet with bad headers.
-                *
-                * The first inline segment's byte_count field doesn't
-                * need a barrier, because it comes after a
-                * control/MLX segment and therefore is at an offset
-                * of 16 mod 64.
-                */
-               wmb();
-               inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
-               i = 2;
-       }
-
-       return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
-}
-
-static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
-{
-       unsigned cur;
-       struct mlx4_ib_cq *cq;
-
-       cur = wq->head - wq->tail;
-       if (likely((int)cur + nreq < wq->max_post))
-               return 0;
-
-       cq = to_mcq(ib_cq);
-       spin_lock(&cq->lock);
-       cur = wq->head - wq->tail;
-       spin_unlock(&cq->lock);
-
-       return (int)cur + nreq >= wq->max_post;
-}
-
-static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
-                                         u64 remote_addr, __be32 rkey)
-{
-       rseg->raddr    = cpu_to_be64(remote_addr);
-       rseg->rkey     = rkey;
-       rseg->reserved = 0;
-}
-
-static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr)
-{
-       if (wr->wr_type == WR_COMPARE_SWAP) {
-               aseg->swap_add = wr->remote_ops.atomic2;
-               aseg->compare  = wr->remote_ops.atomic1;
-       } else {
-               aseg->swap_add = wr->remote_ops.atomic1;
-               aseg->compare  = 0;
-       }
-
-}
-
-static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-                            ib_send_wr_t *wr)
-{
-       memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av));
-       dseg->dqpn = wr->dgrm.ud.remote_qp;
-       dseg->qkey = wr->dgrm.ud.remote_qkey;
-}
-
-static void set_mlx_icrc_seg(void *dseg)
-{
-       u32 *t = dseg;
-       struct mlx4_wqe_inline_seg *iseg = dseg;
-
-       t[1] = 0;
-
-       /*
-        * Need a barrier here before writing the byte_count field to
-        * make sure that all the data is visible before the
-        * byte_count field is set.  Otherwise, if the segment begins
-        * a new cacheline, the HCA prefetcher could grab the 64-byte
-        * chunk and get a valid (!= * 0xffffffff) byte count but
-        * stale data, and end up sending the wrong data.
-        */
-       wmb();
-
-       iseg->byte_count = cpu_to_be32((1 << 31) | 4);
-}
-
-static void set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)
-{
-       dseg->lkey       = cpu_to_be32(sg->lkey);
-       dseg->addr       = cpu_to_be64(sg->vaddr);
-
-       /*
-        * Need a barrier here before writing the byte_count field to
-        * make sure that all the data is visible before the
-        * byte_count field is set.  Otherwise, if the segment begins
-        * a new cacheline, the HCA prefetcher could grab the 64-byte
-        * chunk and get a valid (!= * 0xffffffff) byte count but
-        * stale data, and end up sending the wrong data.
-        */
-       wmb();
-
-       dseg->byte_count = cpu_to_be32(sg->length);
-}
-
-static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)
-{
-       dseg->byte_count = cpu_to_be32(sg->length);
-       dseg->lkey       = cpu_to_be32(sg->lkey);
-       dseg->addr       = cpu_to_be64(sg->vaddr);
-}
-
-int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,
-                     ib_send_wr_t **bad_wr)
-{
-       enum ib_wr_opcode opcode;
-       struct mlx4_ib_qp *qp = to_mqp(ibqp);
-       u8 *wqe;
-       struct mlx4_wqe_ctrl_seg *ctrl;
-       struct mlx4_wqe_data_seg *dseg;
-       unsigned long flags;
-       int nreq;
-       int err = 0;
-       int ind;
-       int size;
-       int i;
-
-       spin_lock_irqsave(&qp->sq.lock, &flags);
-
-       ind = qp->sq.head;
-
-       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
-               if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
-                       err = -ENOMEM;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               if (unlikely(wr->num_ds > (u32)qp->sq.max_gs)) {
-                       err = -EINVAL;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
-               ctrl = (void*)wqe;
-               qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
-               opcode = to_wr_opcode(wr);
-
-               ctrl->srcrb_flags =
-                       (wr->send_opt & IB_SEND_OPT_SIGNALED ?
-                        cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
-                       (wr->send_opt & IB_SEND_OPT_SOLICITED ?
-                        cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
-                       (wr->send_opt & IB_SEND_OPT_TX_IP_CSUM ?
-                        cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM) : 0) |
-                       (wr->send_opt & IB_SEND_OPT_TX_TCP_UDP_CSUM ?
-                        cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
-                       qp->sq_signal_bits;
-
-               if (opcode == IB_WR_SEND_WITH_IMM ||
-                   opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ctrl->imm = wr->immediate_data;
-               else
-                       ctrl->imm = 0;
-
-               wqe += sizeof *ctrl;
-               size = sizeof *ctrl / 16;
-
-               switch (ibqp->qp_type) {
-               case IB_QPT_RC:
-               case IB_QPT_UC:
-                       switch (opcode) {
-                       case IB_WR_ATOMIC_CMP_AND_SWP:
-                       case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,
-                                             wr->remote_ops.rkey);
-                               wqe  += sizeof (struct mlx4_wqe_raddr_seg);
-
-                               set_atomic_seg((void*)wqe, wr);
-                               wqe  += sizeof (struct mlx4_wqe_atomic_seg);
-
-                               size += (sizeof (struct mlx4_wqe_raddr_seg) +
-                                        sizeof (struct mlx4_wqe_atomic_seg)) / 16;
-
-                               break;
-
-                       case IB_WR_RDMA_READ:
-                       case IB_WR_RDMA_WRITE:
-                       case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,
-                                             wr->remote_ops.rkey);
-                               wqe  += sizeof (struct mlx4_wqe_raddr_seg);
-                               size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
-                               break;
-
-                       default:
-                               /* No extra segments required for sends */
-                               break;
-                       }
-                       break;
-
-               case IB_QPT_UD:
-                       set_datagram_seg((void*)wqe, wr);
-                       wqe  += sizeof (struct mlx4_wqe_datagram_seg);
-                       size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
-                       break;
-
-               case IB_QPT_SMI:
-               case IB_QPT_GSI:
-                       err = build_mlx_header(to_msqp(qp), wr, ctrl);
-                       if (err < 0) {
-                               if (bad_wr)
-                                       *bad_wr = wr;
-                               goto out;
-                       }
-                       wqe  += err;
-                       size += err / 16;
-
-                       err = 0;
-                       break;
-
-               default:
-                       break;
-               }
-
-               /*
-                * Write data segments in reverse order, so as to
-                * overwrite cacheline stamp last within each
-                * cacheline.  This avoids issues with WQE
-                * prefetching.
-                */
-
-               dseg = (void*)wqe;
-               dseg += wr->num_ds - 1;
-               size += wr->num_ds * (sizeof (struct mlx4_wqe_data_seg) / 16);
-
-               /* Add one more inline data segment for ICRC for MLX sends */
-               if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
-                            qp->ibqp.qp_type == IB_QPT_GSI)) {
-                       set_mlx_icrc_seg(dseg + 1);
-                       size += sizeof (struct mlx4_wqe_data_seg) / 16;
-               }
-
-               for (i = wr->num_ds - 1; i >= 0; --i, --dseg)
-                       set_data_seg(dseg, wr->ds_array + i);
-
-               ctrl->fence_size = (u8)((wr->send_opt & IB_SEND_OPT_FENCE ?
-                                   MLX4_WQE_CTRL_FENCE : 0) | size);
-
-               /*
-                * Make sure descriptor is fully written before
-                * setting ownership bit (because HW can start
-                * executing as soon as we do).
-                */
-               wmb();
-
-               if (opcode < 0 || opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
-                       err = -EINVAL;
-                       goto out;
-               }
-
-               ctrl->owner_opcode = mlx4_ib_opcode[opcode] |
-                       (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
-
-               /*
-                * We can improve latency by not stamping the last
-                * send queue WQE until after ringing the doorbell, so
-                * only stamp here if there are still more WQEs to post.
-                */
-               if (wr->p_next)
-                       stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
-                                      (qp->sq.wqe_cnt - 1));
-
-               ++ind;
-       }
-
-out:
-       if (likely(nreq)) {
-               qp->sq.head += nreq;
-
-               /*
-                * Make sure that descriptors are written before
-                * doorbell record.
-                */
-               wmb();
-
-               writel(qp->doorbell_qpn,
-                      (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
-
-#if 0
-               if (qp->mqp.qpn == 0x41)
-                       DbgPrint( "[MLX4_BUS] mlx4_ib_post_send : qtype %d, qpn %#x, nreq %d, sq.head %#x, wqe_ix %d, db %p \n", 
-                               ibqp->qp_type, qp->mqp.qpn, nreq, qp->sq.head, ind, 
-                               (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL );
-#endif         
-               /*
-                * Make sure doorbells don't leak out of SQ spinlock
-                * and reach the HCA out of order.
-                */
-               mmiowb();
-
-               stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
-                              (qp->sq.wqe_cnt - 1));
-       }
-
-       spin_unlock_irqrestore(&qp->sq.lock, flags);
-
-       return err;
-}
-
-int mlx4_ib_post_recv(struct ib_qp *ibqp, ib_recv_wr_t *wr,
-                     ib_recv_wr_t **bad_wr)
-{
-       struct mlx4_ib_qp *qp = to_mqp(ibqp);
-       struct mlx4_wqe_data_seg *scat;
-       unsigned long flags;
-       int err = 0;
-       int nreq;
-       int ind;
-       int i;
-
-       spin_lock_irqsave(&qp->rq.lock, &flags);
-
-       ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
-
-       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
-               if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
-                       err = -ENOMEM;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               if (unlikely(wr->num_ds > (u32)qp->rq.max_gs)) {
-                       err = -EINVAL;
-                       if (bad_wr)
-                               *bad_wr = wr;
-                       goto out;
-               }
-
-               scat = get_recv_wqe(qp, ind);
-
-               for (i = 0; i < (int)wr->num_ds; ++i)
-                       __set_data_seg(scat + i, wr->ds_array + i);
-
-               if (i < qp->rq.max_gs) {
-                       scat[i].byte_count = 0;
-                       scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);
-                       scat[i].addr       = 0;
-               }
-
-               qp->rq.wrid[ind] = wr->wr_id;
-
-               ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
-       }
-
-out:
-       if (likely(nreq)) {
-               qp->rq.head += nreq;
-
-               /*
-                * Make sure that descriptors are written before
-                * doorbell record.
-                */
-               wmb();
-
-               *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
-
-#if 0
-               if (qp->mqp.qpn == 0x41)
-                       DbgPrint( "[MLX4_BUS] mlx4_ib_post_recv : qtype %d, qpn %#x, nreq %d, rq.head %#x, wqe_ix %d, db_obj %p, db %p \n", 
-                               ibqp->qp_type, qp->mqp.qpn, nreq, qp->rq.head, ind, &qp->db, qp->db.db );
-#endif         
-       }
-
-       spin_unlock_irqrestore(&qp->rq.lock, flags);
-
-       return err;
-}
-
-static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
-{
-       switch (mlx4_state) {
-       case MLX4_QP_STATE_RST:      return XIB_QPS_RESET;
-       case MLX4_QP_STATE_INIT:     return XIB_QPS_INIT;
-       case MLX4_QP_STATE_RTR:      return XIB_QPS_RTR;
-       case MLX4_QP_STATE_RTS:      return XIB_QPS_RTS;
-       case MLX4_QP_STATE_SQ_DRAINING:
-       case MLX4_QP_STATE_SQD:      return XIB_QPS_SQD;
-       case MLX4_QP_STATE_SQER:     return XIB_QPS_SQE;
-       case MLX4_QP_STATE_ERR:      return XIB_QPS_ERR;
-       default:                     return -1;
-       }
-}
-
-static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
-{
-       switch (mlx4_mig_state) {
-       case MLX4_QP_PM_ARMED:          return IB_MIG_ARMED;
-       case MLX4_QP_PM_REARM:          return IB_MIG_REARM;
-       case MLX4_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;
-       default: return -1;
-       }
-}
-
-static int to_ib_qp_access_flags(int mlx4_flags)
-{
-       int ib_flags = 0;
-
-       if (mlx4_flags & MLX4_QP_BIT_RRE)
-               ib_flags |= IB_ACCESS_REMOTE_READ;
-       if (mlx4_flags & MLX4_QP_BIT_RWE)
-               ib_flags |= IB_ACCESS_REMOTE_WRITE;
-       if (mlx4_flags & MLX4_QP_BIT_RAE)
-               ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
-
-       return ib_flags;
-}
-
-static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
-                               struct mlx4_qp_path *path)
-{
-       memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
-       ib_ah_attr->port_num      = path->sched_queue & 0x40 ? 2 : 1;
-
-       if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
-               return;
-
-       ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
-       ib_ah_attr->sl            = (path->sched_queue >> 2) & 0xf;
-       ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
-       ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
-       ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
-       if (ib_ah_attr->ah_flags) {
-               ib_ah_attr->grh.sgid_index = path->mgid_index;
-               ib_ah_attr->grh.hop_limit  = path->hop_limit;
-               ib_ah_attr->grh.traffic_class =
-                       (u8)((be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff);
-               ib_ah_attr->grh.flow_label =
-                       be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
-               memcpy(ib_ah_attr->grh.dgid.raw,
-                       path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
-       }
-}
-
-int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
-                    struct ib_qp_init_attr *qp_init_attr)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
-       struct mlx4_ib_qp *qp = to_mqp(ibqp);
-       struct mlx4_qp_context context;
-       int mlx4_state;
-       int err;
-
-       UNUSED_PARAM(qp_attr_mask);
-
-       if (qp->state == XIB_QPS_RESET) {
-               qp_attr->qp_state = XIB_QPS_RESET;
-               goto done;
-       }
-
-       err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
-       if (err)
-               return -EINVAL;
-
-       mlx4_state = be32_to_cpu(context.flags) >> 28;
-
-       qp_attr->qp_state            = to_ib_qp_state(mlx4_state);
-       qp_attr->path_mtu            = context.mtu_msgmax >> 5;
-       qp_attr->path_mig_state      =
-               to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
-       qp_attr->qkey                = be32_to_cpu(context.qkey);
-       qp_attr->rq_psn              = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
-       qp_attr->sq_psn              = be32_to_cpu(context.next_send_psn) & 0xffffff;
-       qp_attr->dest_qp_num         = be32_to_cpu(context.remote_qpn) & 0xffffff;
-       qp_attr->qp_access_flags     =
-               to_ib_qp_access_flags(be32_to_cpu(context.params2));
-
-       if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
-               to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
-               to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
-               qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
-               qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
-       }
-
-       qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
-       if (qp_attr->qp_state == XIB_QPS_INIT)
-               qp_attr->port_num = qp->port;
-       else
-               qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
-
-       /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
-       qp_attr->sq_draining = (u8)(mlx4_state == MLX4_QP_STATE_SQ_DRAINING);
-
-       qp_attr->max_rd_atomic = (u8)(1 << ((be32_to_cpu(context.params1) >> 21) & 0x7));
-
-       qp_attr->max_dest_rd_atomic =
-               (u8)(1 << ((be32_to_cpu(context.params2) >> 21) & 0x7));
-       qp_attr->min_rnr_timer      =
-               (u8)((be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f);
-       qp_attr->timeout            = context.pri_path.ackto >> 3;
-       qp_attr->retry_cnt          = (u8)((be32_to_cpu(context.params1) >> 16) & 0x7);
-       qp_attr->rnr_retry          = (u8)((be32_to_cpu(context.params1) >> 13) & 0x7);
-       qp_attr->alt_timeout        = context.alt_path.ackto >> 3;
-
-done:
-       qp_attr->cur_qp_state        = qp_attr->qp_state;
-       qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
-       qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
-
-       if (!ibqp->p_uctx) {
-               qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;
-               qp_attr->cap.max_send_sge = qp->sq.max_gs;
-       } else {
-               qp_attr->cap.max_send_wr  = 0;
-               qp_attr->cap.max_send_sge = 0;
-       }
-
-       /*
-        * We don't support inline sends for kernel QPs (yet), and we
-        * don't know what userspace's value should be.
-        */
-       qp_attr->cap.max_inline_data = 0;
-
-       qp_init_attr->cap            = qp_attr->cap;
-
-       return 0;
-}
-
+/*\r
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.\r
+ *\r
+ * This software is available to you under a choice of one of two\r
+ * licenses.  You may choose to be licensed under the terms of the GNU\r
+ * General Public License (GPL) Version 2, available from the file\r
+ * COPYING in the main directory of this source tree, or the\r
+ * OpenIB.org BSD license below:\r
+ *\r
+ *     Redistribution and use in source and binary forms, with or\r
+ *     without modification, are permitted provided that the following\r
+ *     conditions are met:\r
+ *\r
+ *      - Redistributions of source code must retain the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer.\r
+ *\r
+ *      - Redistributions in binary form must reproduce the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer in the documentation and/or other materials\r
+ *        provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ */\r
+\r
+#include "mlx4_ib.h"\r
+#include "ib_cache.h"\r
+#include "ib_pack.h"\r
+#include "qp.h"\r
+#include "user.h"\r
+\r
+enum {\r
+       MLX4_IB_ACK_REQ_FREQ    = 8,\r
+};\r
+\r
+enum {\r
+       MLX4_IB_DEFAULT_SCHED_QUEUE     = 0x83,\r
+       MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f\r
+};\r
+\r
+enum {\r
+       /*\r
+        * Largest possible UD header: send with GRH and immediate data.\r
+        */\r
+       MLX4_IB_UD_HEADER_SIZE          = 72\r
+};\r
+\r
+struct mlx4_ib_sqp {\r
+       struct mlx4_ib_qp       qp;\r
+       int                     pkey_index;\r
+       u32                     qkey;\r
+       u32                     send_psn;\r
+       struct ib_ud_header     ud_header;\r
+       u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];\r
+};\r
+\r
+enum {\r
+       MLX4_IB_MIN_SQ_STRIDE = 6\r
+};\r
+\r
+static const __be32 mlx4_ib_opcode[] = {\r
+       __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),         /*      [IB_WR_RDMA_WRITE]                      */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),     /*      [IB_WR_RDMA_WRITE_WITH_IMM] */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_SEND),                       /*      [IB_WR_SEND]                            */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),           /*      [IB_WR_SEND_WITH_IMM]           */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),          /*      [IB_WR_RDMA_READ]                       */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),          /*      [IB_WR_ATOMIC_CMP_AND_SWP]      */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),          /*      [IB_WR_ATOMIC_FETCH_AND_ADD]*/\r
+};\r
+\r
+static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)\r
+{\r
+       return container_of(mqp, struct mlx4_ib_sqp, qp);\r
+}\r
+\r
+static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
+{\r
+       return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
+               qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;\r
+}\r
+\r
+static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)\r
+{\r
+       return qp->mqp.qpn >= dev->dev->caps.sqp_start &&\r
+               qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;\r
+}\r
+\r
+static void *get_wqe(struct mlx4_ib_qp *qp, int offset)\r
+{\r
+       if (qp->buf.nbufs == 1)\r
+               return qp->buf.u.direct.buf + offset;\r
+       else\r
+               return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +\r
+                       (offset & (PAGE_SIZE - 1));\r
+}\r
+\r
+static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)\r
+{\r
+       return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));\r
+}\r
+\r
+static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)\r
+{\r
+       return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));\r
+}\r
+\r
+/*\r
+ * Stamp a SQ WQE so that it is invalid if prefetched by marking the\r
+ * first four bytes of every 64 byte chunk with 0xffffffff, except for\r
+ * the very first chunk of the WQE.\r
+ */\r
+static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)\r
+{\r
+       u32 *wqe = get_send_wqe(qp, n);\r
+       int i;\r
+\r
+       for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)\r
+               wqe[i] = 0xffffffff;\r
+}\r
+\r
+static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)\r
+{\r
+       ib_event_rec_t event;\r
+       struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;\r
+\r
+       if (type == MLX4_EVENT_TYPE_PATH_MIG)\r
+               to_mibqp(qp)->port = to_mibqp(qp)->alt_port;\r
+\r
+       switch (type) {\r
+       case MLX4_EVENT_TYPE_PATH_MIG:\r
+               event.type = IB_EVENT_PATH_MIG;\r
+               break;\r
+       case MLX4_EVENT_TYPE_COMM_EST:\r
+               event.type = IB_EVENT_COMM_EST;\r
+               break;\r
+       case MLX4_EVENT_TYPE_SQ_DRAINED:\r
+               event.type = IB_EVENT_SQ_DRAINED;\r
+               break;\r
+       case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:\r
+               event.type = IB_EVENT_QP_LAST_WQE_REACHED;\r
+               break;\r
+       case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:\r
+               event.type = IB_EVENT_QP_FATAL;\r
+               break;\r
+       case MLX4_EVENT_TYPE_PATH_MIG_FAILED:\r
+               event.type = IB_EVENT_PATH_MIG_ERR;\r
+               break;\r
+       case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:\r
+               event.type = IB_EVENT_QP_REQ_ERR;\r
+               break;\r
+       case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:\r
+               event.type = IB_EVENT_QP_ACCESS_ERR;\r
+               break;\r
+       default:\r
+               printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "\r
+                      "on QP %06x\n", type, qp->qpn);\r
+               return;\r
+       }\r
+\r
+       event.context = ibqp->qp_context;\r
+       ibqp->event_handler(&event);\r
+}\r
+\r
+static int send_wqe_overhead(enum ib_qp_type type)\r
+{\r
+       /*\r
+        * UD WQEs must have a datagram segment.\r
+        * RC and UC WQEs might have a remote address segment.\r
+        * MLX WQEs need two extra inline data segments (for the UD\r
+        * header and space for the ICRC).\r
+        */\r
+       switch (type) {\r
+       case IB_QPT_UD:\r
+               return sizeof (struct mlx4_wqe_ctrl_seg) +\r
+                       sizeof (struct mlx4_wqe_datagram_seg);\r
+       case IB_QPT_UC:\r
+               return sizeof (struct mlx4_wqe_ctrl_seg) +\r
+                       sizeof (struct mlx4_wqe_raddr_seg);\r
+       case IB_QPT_RC:\r
+               return sizeof (struct mlx4_wqe_ctrl_seg) +\r
+                       sizeof (struct mlx4_wqe_atomic_seg) +\r
+                       sizeof (struct mlx4_wqe_raddr_seg);\r
+       case IB_QPT_SMI:\r
+       case IB_QPT_GSI:\r
+               return sizeof (struct mlx4_wqe_ctrl_seg) +\r
+                       ALIGN(MLX4_IB_UD_HEADER_SIZE +\r
+                             DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,\r
+                                          MLX4_INLINE_ALIGN) *\r
+                             sizeof (struct mlx4_wqe_inline_seg),\r
+                             sizeof (struct mlx4_wqe_data_seg)) +\r
+                       ALIGN(4 +\r
+                             sizeof (struct mlx4_wqe_inline_seg),\r
+                             sizeof (struct mlx4_wqe_data_seg));\r
+       default:\r
+               return sizeof (struct mlx4_wqe_ctrl_seg);\r
+       }\r
+}\r
+\r
+static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
+                      int is_user, int has_srq, struct mlx4_ib_qp *qp)\r
+{\r
+       /* Sanity check RQ size before proceeding */\r
+       if ((int)cap->max_recv_wr  > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||\r
+           (int)cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))\r
+               return -EINVAL;\r
+\r
+       if (has_srq) {\r
+               /* QPs attached to an SRQ should have no RQ */\r
+               if (cap->max_recv_wr)\r
+                       return -EINVAL;\r
+\r
+               qp->rq.wqe_cnt = qp->rq.max_gs = 0;\r
+       } else {\r
+               /* HW requires >= 1 RQ entry with >= 1 gather entry */\r
+               if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))\r
+                       return -EINVAL;\r
+\r
+               qp->rq.wqe_cnt   = roundup_pow_of_two(max(1U, cap->max_recv_wr));\r
+               qp->rq.max_gs    = roundup_pow_of_two(max(1U, cap->max_recv_sge));\r
+               qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));\r
+       }\r
+\r
+       /* leave userspace return values as they were, so as not to break ABI */\r
+       if (is_user) {\r
+               cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;\r
+               cap->max_recv_sge = qp->rq.max_gs;\r
+       } else {\r
+               cap->max_recv_wr  = qp->rq.max_post =\r
+                       min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);\r
+               cap->max_recv_sge = min(qp->rq.max_gs,\r
+                                       min(dev->dev->caps.max_sq_sg,\r
+                                       dev->dev->caps.max_rq_sg));\r
+       }\r
+       /* We don't support inline sends for kernel QPs (yet) */\r
+\r
+       return 0;\r
+}\r
+\r
+static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,\r
+                             enum ib_qp_type type, struct mlx4_ib_qp *qp)\r
+{\r
+       /* Sanity check SQ size before proceeding */\r
+       if ((int)cap->max_send_wr       > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE  ||\r
+           (int)cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||\r
+           (int)cap->max_inline_data + send_wqe_overhead(type) +\r
+           (int)sizeof(struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)\r
+               return -EINVAL;\r
+\r
+       /*\r
+        * For MLX transport we need 2 extra S/G entries:\r
+        * one for the header and one for the checksum at the end\r
+        */\r
+       if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&\r
+           (int)cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)\r
+               return -EINVAL;\r
+\r
+       qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *\r
+                                                       sizeof (struct mlx4_wqe_data_seg),\r
+                                                       cap->max_inline_data +\r
+                                                       sizeof (struct mlx4_wqe_inline_seg)) +\r
+                                                   send_wqe_overhead(type)));\r
+       qp->sq.wqe_shift = max(MLX4_IB_SQ_MIN_WQE_SHIFT, qp->sq.wqe_shift);\r
+       qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /\r
+               sizeof (struct mlx4_wqe_data_seg);\r
+\r
+       /*\r
+        * We need to leave 2 KB + 1 WQE of headroom in the SQ to\r
+        * allow HW to prefetch.\r
+        */\r
+       qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);\r
+       qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);\r
+\r
+       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
+               (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
+       if (qp->rq.wqe_shift > qp->sq.wqe_shift) {\r
+               qp->rq.offset = 0;\r
+               qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;\r
+       } else {\r
+               qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;\r
+               qp->sq.offset = 0;\r
+       }\r
+\r
+       cap->max_send_wr = qp->sq.max_post =\r
+               min(qp->sq.wqe_cnt - qp->sq_spare_wqes,\r
+                       dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE);\r
+       cap->max_send_sge = min(qp->sq.max_gs,\r
+                               min(dev->dev->caps.max_sq_sg,\r
+                                       dev->dev->caps.max_rq_sg));\r
+       /* We don't support inline sends for kernel QPs (yet) */\r
+       cap->max_inline_data = 0;\r
+\r
+       return 0;\r
+}\r
+\r
+static int set_user_sq_size(struct mlx4_ib_dev *dev,\r
+                           struct mlx4_ib_qp *qp,\r
+                           struct mlx4_ib_create_qp *ucmd)\r
+{\r
+       /* Sanity check SQ size before proceeding */\r
+       if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes       ||\r
+           ucmd->log_sq_stride >\r
+               ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||\r
+           ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)\r
+               return -EINVAL;\r
+\r
+       qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;\r
+       qp->sq.wqe_shift = ucmd->log_sq_stride;\r
+\r
+       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +\r
+               (qp->sq.wqe_cnt << qp->sq.wqe_shift);\r
+\r
+       return 0;\r
+}\r
+\r
+static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,\r
+                           struct ib_qp_init_attr *init_attr,\r
+                           struct ib_udata *udata, u32 sqpn, struct mlx4_ib_qp *qp)\r
+{\r
+       int err;\r
+\r
+       mutex_init(&qp->mutex);\r
+       spin_lock_init(&qp->sq.lock);\r
+       spin_lock_init(&qp->rq.lock);\r
+\r
+       qp->state        = XIB_QPS_RESET;\r
+       qp->atomic_rd_en = 0;\r
+       qp->resp_depth   = 0;\r
+\r
+       qp->rq.head         = 0;\r
+       qp->rq.tail         = 0;\r
+       qp->sq.head         = 0;\r
+       qp->sq.tail         = 0;\r
+\r
+       err = set_rq_size(dev, &init_attr->cap, !!pd->p_uctx, !!init_attr->srq, qp);\r
+       if (err)\r
+               goto err;\r
+\r
+       if (pd->p_uctx) {\r
+               struct mlx4_ib_create_qp ucmd;\r
+\r
+               if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {\r
+                       err = -EFAULT;\r
+                       goto err;\r
+               }\r
+\r
+               qp->sq_no_prefetch = ucmd.sq_no_prefetch;\r
+\r
+               err = set_user_sq_size(dev, qp, &ucmd);\r
+               if (err)\r
+                       goto err;\r
+\r
+               qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,\r
+                                      qp->buf_size, 0, FALSE);\r
+               if (IS_ERR(qp->umem)) {\r
+                       err = PTR_ERR(qp->umem);\r
+                       goto err;\r
+               }\r
+\r
+               err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),\r
+                                   ilog2(qp->umem->page_size), &qp->mtt);\r
+               if (err)\r
+                       goto err_buf;\r
+\r
+               err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);\r
+               if (err)\r
+                       goto err_mtt;\r
+\r
+               if (!init_attr->srq) {\r
+                       err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx),\r
+                                                 ucmd.db_addr, &qp->db);\r
+                       if (err)\r
+                               goto err_mtt;\r
+               }\r
+       } else {\r
+               qp->sq_no_prefetch = 0;\r
+\r
+               err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);\r
+               if (err)\r
+                       goto err;\r
+\r
+               if (!init_attr->srq) {\r
+                       err = mlx4_ib_db_alloc(dev, &qp->db, 0);\r
+                       if (err)\r
+                               goto err;\r
+\r
+                       *qp->db.db = 0;\r
+               }\r
+\r
+               if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {\r
+                       err = -ENOMEM;\r
+                       goto err_db;\r
+               }\r
+\r
+               err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,\r
+                                   &qp->mtt);\r
+               if (err)\r
+                       goto err_buf;\r
+\r
+               err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);\r
+               if (err)\r
+                       goto err_mtt;\r
+\r
+               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
+               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);\r
+\r
+               if (!qp->sq.wrid || !qp->rq.wrid) {\r
+                       err = -ENOMEM;\r
+                       goto err_wrid;\r
+               }\r
+       }\r
+\r
+       if (!sqpn)\r
+               err = mlx4_qp_reserve_range(dev->dev, 1, 1, &sqpn);\r
+       if (err)\r
+               goto err_wrid;\r
+\r
+       err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);\r
+       if (err)\r
+               goto err_wrid;\r
+\r
+       /*\r
+        * Hardware wants QPN written in big-endian order (after\r
+        * shifting) for send doorbell.  Precompute this value to save\r
+        * a little bit when posting sends.\r
+        */\r
+       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);\r
+\r
+       if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)\r
+               qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
+       else\r
+               qp->sq_signal_bits = 0;\r
+\r
+       qp->mqp.event = mlx4_ib_qp_event;\r
+\r
+       return 0;\r
+\r
+err_wrid:\r
+       if (pd->p_uctx) {\r
+               if (!init_attr->srq)\r
+                       mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx),\r
+                                             &qp->db);\r
+       } else {\r
+               kfree(qp->sq.wrid);\r
+               kfree(qp->rq.wrid);\r
+       }\r
+\r
+err_mtt:\r
+       mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
+\r
+err_buf:\r
+       if (pd->p_uctx)\r
+               ib_umem_release(qp->umem);\r
+       else\r
+               mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
+\r
+err_db:\r
+       if (!pd->p_uctx && !init_attr->srq)\r
+               mlx4_ib_db_free(dev, &qp->db);\r
+\r
+err:\r
+       return err;\r
+}\r
+\r
+static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)\r
+{\r
+       switch (state) {\r
+       case XIB_QPS_RESET:     return MLX4_QP_STATE_RST;\r
+       case XIB_QPS_INIT:      return MLX4_QP_STATE_INIT;\r
+       case XIB_QPS_RTR:       return MLX4_QP_STATE_RTR;\r
+       case XIB_QPS_RTS:       return MLX4_QP_STATE_RTS;\r
+       case XIB_QPS_SQD:       return MLX4_QP_STATE_SQD;\r
+       case XIB_QPS_SQE:       return MLX4_QP_STATE_SQER;\r
+       case XIB_QPS_ERR:       return MLX4_QP_STATE_ERR;\r
+       default:                return -1;\r
+       }\r
+}\r
+\r
+static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
+{\r
+       if (send_cq == recv_cq)\r
+               spin_lock_irq(&send_cq->lock);\r
+       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
+               spin_lock_irq(&send_cq->lock);\r
+               spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);\r
+       } else {\r
+               spin_lock_irq(&recv_cq->lock);\r
+               spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);\r
+       }\r
+}\r
+\r
+static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)\r
+{\r
+       if (send_cq == recv_cq)\r
+               spin_unlock_irq(&send_cq->lock);\r
+       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {\r
+               spin_unlock(&recv_cq->lock);\r
+               spin_unlock_irq(&send_cq->lock);\r
+       } else {\r
+               spin_unlock(&send_cq->lock);\r
+               spin_unlock_irq(&recv_cq->lock);\r
+       }\r
+}\r
+\r
+static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,\r
+                             int is_user)\r
+{\r
+       struct mlx4_ib_cq *send_cq, *recv_cq;\r
+\r
+       if (qp->state != XIB_QPS_RESET)\r
+               if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),\r
+                                  MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))\r
+                       printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",\r
+                              qp->mqp.qpn);\r
+\r
+       send_cq = to_mcq(qp->ibqp.send_cq);\r
+       recv_cq = to_mcq(qp->ibqp.recv_cq);\r
+\r
+       mlx4_ib_lock_cqs(send_cq, recv_cq);\r
+\r
+       if (!is_user) {\r
+               __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,\r
+                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);\r
+               if (send_cq != recv_cq)\r
+                       __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);\r
+       }\r
+\r
+       mlx4_qp_remove(dev->dev, &qp->mqp);\r
+\r
+       mlx4_ib_unlock_cqs(send_cq, recv_cq);\r
+\r
+       mlx4_qp_free(dev->dev, &qp->mqp);\r
+       mlx4_mtt_cleanup(dev->dev, &qp->mtt);\r
+\r
+       if (is_user) {\r
+               if (!qp->ibqp.srq)\r
+                       mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.p_uctx),\r
+                                             &qp->db);\r
+               ib_umem_release(qp->umem);\r
+       } else {\r
+               kfree(qp->sq.wrid);\r
+               kfree(qp->rq.wrid);\r
+               mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);\r
+               if (!qp->ibqp.srq)\r
+                       mlx4_ib_db_free(dev, &qp->db);\r
+       }\r
+}\r
+\r
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,\r
+                               struct ib_qp_init_attr *init_attr,\r
+                               struct ib_udata *udata)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(pd->device);\r
+       struct mlx4_ib_sqp *sqp;\r
+       struct mlx4_ib_qp *qp;\r
+       int err;\r
+\r
+       switch (init_attr->qp_type) {\r
+       case IB_QPT_RC:\r
+       case IB_QPT_UC:\r
+       case IB_QPT_UD:\r
+       {\r
+               qp = kzalloc(sizeof *qp, GFP_KERNEL);\r
+               if (!qp)\r
+                       return ERR_PTR(-ENOMEM);\r
+\r
+               err = create_qp_common(dev, pd, init_attr, udata, 0, qp);\r
+               if (err) {\r
+                       kfree(qp);\r
+                       return ERR_PTR(err);\r
+               }\r
+\r
+               qp->ibqp.qp_num = qp->mqp.qpn;\r
+\r
+               break;\r
+       }\r
+       case IB_QPT_SMI:\r
+       case IB_QPT_GSI:\r
+       {\r
+               /* Userspace is not allowed to create special QPs: */\r
+               if (pd->p_uctx)\r
+                       return ERR_PTR(-EINVAL);\r
+\r
+               sqp = kzalloc(sizeof *sqp, GFP_KERNEL);\r
+               if (!sqp)\r
+                       return ERR_PTR(-ENOMEM);\r
+\r
+               qp = &sqp->qp;\r
+\r
+               err = create_qp_common(dev, pd, init_attr, udata,\r
+                                      dev->dev->caps.sqp_start +\r
+                                      (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +\r
+                                      init_attr->port_num - 1,\r
+                                      qp);\r
+               if (err) {\r
+                       kfree(sqp);\r
+                       return ERR_PTR(err);\r
+               }\r
+\r
+               qp->port        = init_attr->port_num;\r
+               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;\r
+\r
+               break;\r
+       }\r
+       default:\r
+               /* Don't support raw QPs */\r
+               return ERR_PTR(-EINVAL);\r
+       }\r
+\r
+       return &qp->ibqp;\r
+}\r
+\r
+int mlx4_ib_destroy_qp(struct ib_qp *qp)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(qp->device);\r
+       struct mlx4_ib_qp *mqp = to_mqp(qp);\r
+\r
+       if (is_qp0(dev, mqp))\r
+               mlx4_CLOSE_PORT(dev->dev, mqp->port);\r
+\r
+       destroy_qp_common(dev, mqp, !!qp->pd->p_uctx);\r
+\r
+       if (is_sqp(dev, mqp))\r
+               kfree(to_msqp(mqp));\r
+       else\r
+               kfree(mqp);\r
+\r
+       return 0;\r
+}\r
+\r
+static int to_mlx4_st(enum ib_qp_type type)\r
+{\r
+       switch (type) {\r
+       case IB_QPT_RC:         return MLX4_QP_ST_RC;\r
+       case IB_QPT_UC:         return MLX4_QP_ST_UC;\r
+       case IB_QPT_UD:         return MLX4_QP_ST_UD;\r
+       case IB_QPT_SMI:\r
+       case IB_QPT_GSI:        return MLX4_QP_ST_MLX;\r
+       default:                return -1;\r
+       }\r
+}\r
+\r
+static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,\r
+                                  int attr_mask)\r
+{\r
+       u8 dest_rd_atomic;\r
+       u32 access_flags;\r
+       u32 hw_access_flags = 0;\r
+\r
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
+               dest_rd_atomic = attr->max_dest_rd_atomic;\r
+       else\r
+               dest_rd_atomic = qp->resp_depth;\r
+\r
+       if (attr_mask & IB_QP_ACCESS_FLAGS)\r
+               access_flags = attr->qp_access_flags;\r
+       else\r
+               access_flags = qp->atomic_rd_en;\r
+\r
+       if (!dest_rd_atomic)\r
+               access_flags &= IB_ACCESS_REMOTE_WRITE;\r
+\r
+       if (access_flags & IB_ACCESS_REMOTE_READ)\r
+               hw_access_flags |= MLX4_QP_BIT_RRE;\r
+       if (access_flags & IB_ACCESS_REMOTE_ATOMIC)\r
+               hw_access_flags |= MLX4_QP_BIT_RAE;\r
+       if (access_flags & IB_ACCESS_REMOTE_WRITE)\r
+               hw_access_flags |= MLX4_QP_BIT_RWE;\r
+\r
+       return cpu_to_be32(hw_access_flags);\r
+}\r
+\r
+static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,\r
+                           int attr_mask)\r
+{\r
+       if (attr_mask & IB_QP_PKEY_INDEX)\r
+               sqp->pkey_index = attr->pkey_index;\r
+       if (attr_mask & IB_QP_QKEY)\r
+               sqp->qkey = attr->qkey;\r
+       if (attr_mask & IB_QP_SQ_PSN)\r
+               sqp->send_psn = attr->sq_psn;\r
+}\r
+\r
+static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)\r
+{\r
+       path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);\r
+}\r
+\r
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,\r
+                        struct mlx4_qp_path *path, u8 port)\r
+{\r
+       path->grh_mylmc     = ah->src_path_bits & 0x7f;\r
+       path->rlid          = cpu_to_be16(ah->dlid);\r
+       if (ah->static_rate) {\r
+               path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;\r
+               while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&\r
+                      !(1 << path->static_rate & dev->dev->caps.stat_rate_support))\r
+                       --path->static_rate;\r
+       } else\r
+               path->static_rate = 0;\r
+       path->counter_index = 0xff;\r
+\r
+       if (ah->ah_flags & IB_AH_GRH) {\r
+               if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {\r
+                       printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",\r
+                              ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);\r
+                       return -1;\r
+               }\r
+\r
+               path->grh_mylmc |= 1 << 7;\r
+               path->mgid_index = ah->grh.sgid_index;\r
+               path->hop_limit  = ah->grh.hop_limit;\r
+               path->tclass_flowlabel =\r
+                       cpu_to_be32((ah->grh.traffic_class << 20) |\r
+                                   (ah->grh.flow_label));\r
+               memcpy(path->rgid, ah->grh.dgid.raw, 16);\r
+       }\r
+\r
+       path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |\r
+               ((port - 1) << 6) | ((ah->sl & 0xf) << 2);\r
+\r
+       return 0;\r
+}\r
+\r
+static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,\r
+                              const struct ib_qp_attr *attr, int attr_mask,\r
+                              enum ib_qp_state cur_state, enum ib_qp_state new_state)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
+       struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
+       struct mlx4_qp_context *context;\r
+       enum mlx4_qp_optpar optpar = 0;\r
+       int sqd_event;\r
+       int err = -EINVAL;\r
+\r
+       context = kzalloc(sizeof *context, GFP_KERNEL);\r
+       if (!context)\r
+               return -ENOMEM;\r
+\r
+       context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |\r
+                                    (to_mlx4_st(ibqp->qp_type) << 16));\r
+       context->flags     |= cpu_to_be32(1 << 8); /* DE? */\r
+\r
+       if (!(attr_mask & IB_QP_PATH_MIG_STATE))\r
+               context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
+       else {\r
+               optpar |= MLX4_QP_OPTPAR_PM_STATE;\r
+               switch (attr->path_mig_state) {\r
+               case IB_MIG_MIGRATED:\r
+                       context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);\r
+                       break;\r
+               case IB_MIG_REARM:\r
+                       context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);\r
+                       break;\r
+               case IB_MIG_ARMED:\r
+                       context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);\r
+                       break;\r
+               }\r
+       }\r
+\r
+       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||\r
+           ibqp->qp_type == IB_QPT_UD)\r
+               context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;\r
+       else if (attr_mask & IB_QP_PATH_MTU) {\r
+               if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {\r
+                       printk(KERN_ERR "path MTU (%u) is invalid\n",\r
+                              attr->path_mtu);\r
+                       goto out;\r
+               }\r
+               context->mtu_msgmax = (u8)((attr->path_mtu << 5) |\r
+                       ilog2(dev->dev->caps.max_msg_sz));\r
+       }\r
+\r
+       if (qp->rq.wqe_cnt)\r
+               context->rq_size_stride = (u8)(ilog2(qp->rq.wqe_cnt) << 3);\r
+       context->rq_size_stride |= qp->rq.wqe_shift - 4;\r
+\r
+       if (qp->sq.wqe_cnt)\r
+               context->sq_size_stride = (u8)(ilog2(qp->sq.wqe_cnt) << 3);\r
+       context->sq_size_stride |= qp->sq.wqe_shift - 4;\r
+\r
+       if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
+               context->sq_size_stride |= !!qp->sq_no_prefetch << 7;\r
+\r
+       if (qp->ibqp.p_uctx)\r
+               context->usr_page = cpu_to_be32(to_mucontext(ibqp->p_uctx)->uar.index);\r
+       else\r
+               context->usr_page = cpu_to_be32(dev->priv_uar.index);\r
+\r
+       if (attr_mask & IB_QP_DEST_QPN)\r
+               context->remote_qpn = cpu_to_be32(attr->dest_qp_num);\r
+\r
+       if (attr_mask & IB_QP_PORT) {\r
+               if (cur_state == XIB_QPS_SQD && new_state == XIB_QPS_SQD &&\r
+                   !(attr_mask & IB_QP_AV)) {\r
+                       mlx4_set_sched(&context->pri_path, attr->port_num);\r
+                       optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;\r
+               }\r
+       }\r
+\r
+       if (attr_mask & IB_QP_PKEY_INDEX) {\r
+               context->pri_path.pkey_index = (u8)attr->pkey_index;\r
+               optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_AV) {\r
+               if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,\r
+                                 attr_mask & IB_QP_PORT ? attr->port_num : qp->port))\r
+                       goto out;\r
+\r
+               optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |\r
+                          MLX4_QP_OPTPAR_SCHED_QUEUE);\r
+       }\r
+\r
+       if (attr_mask & IB_QP_TIMEOUT) {\r
+               context->pri_path.ackto = attr->timeout << 3;\r
+               optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_ALT_PATH) {\r
+               if (attr->alt_port_num == 0 ||\r
+                   attr->alt_port_num > dev->dev->caps.num_ports)\r
+                       goto out;\r
+\r
+               if (attr->alt_pkey_index >=\r
+                   dev->dev->caps.pkey_table_len[attr->alt_port_num])\r
+                       goto out;\r
+\r
+               if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,\r
+                                 attr->alt_port_num))\r
+                       goto out;\r
+\r
+               context->alt_path.pkey_index = (u8)attr->alt_pkey_index;\r
+               context->alt_path.ackto = attr->alt_timeout << 3;\r
+               optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;\r
+       }\r
+\r
+       context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pdn);\r
+       context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);\r
+\r
+       if (attr_mask & IB_QP_RNR_RETRY) {\r
+               context->params1 |= cpu_to_be32(attr->rnr_retry << 13);\r
+               optpar |= MLX4_QP_OPTPAR_RNR_RETRY;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_RETRY_CNT) {\r
+               context->params1 |= cpu_to_be32(attr->retry_cnt << 16);\r
+               optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {\r
+               if (attr->max_rd_atomic)\r
+                       context->params1 |=\r
+                               cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);\r
+               optpar |= MLX4_QP_OPTPAR_SRA_MAX;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_SQ_PSN)\r
+               context->next_send_psn = cpu_to_be32(attr->sq_psn);\r
+\r
+       context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);\r
+\r
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {\r
+               if (attr->max_dest_rd_atomic)\r
+                       context->params2 |=\r
+                               cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);\r
+               optpar |= MLX4_QP_OPTPAR_RRA_MAX;\r
+       }\r
+\r
+       if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {\r
+               context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);\r
+               optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;\r
+       }\r
+\r
+       if (ibqp->srq)\r
+               context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);\r
+\r
+       if (attr_mask & IB_QP_MIN_RNR_TIMER) {\r
+               context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);\r
+               optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;\r
+       }\r
+       if (attr_mask & IB_QP_RQ_PSN)\r
+               context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);\r
+\r
+       context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);\r
+\r
+       if (attr_mask & IB_QP_QKEY) {\r
+               context->qkey = cpu_to_be32(attr->qkey);\r
+               optpar |= MLX4_QP_OPTPAR_Q_KEY;\r
+       }\r
+\r
+       if (ibqp->srq)\r
+               context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);\r
+\r
+       if (!ibqp->srq && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT)\r
+               context->db_rec_addr = cpu_to_be64(qp->db.dma.da);\r
+\r
+       if (cur_state == XIB_QPS_INIT &&\r
+           new_state == XIB_QPS_RTR  &&\r
+           (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||\r
+            ibqp->qp_type == IB_QPT_UD)) {\r
+               context->pri_path.sched_queue = (qp->port - 1) << 6;\r
+               if (is_qp0(dev, qp))\r
+                       context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;\r
+               else\r
+                       context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;\r
+       }\r
+\r
+       if (cur_state == XIB_QPS_RTS && new_state == XIB_QPS_SQD        &&\r
+           attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)\r
+               sqd_event = 1;\r
+       else\r
+               sqd_event = 0;\r
+\r
+       /*\r
+        * Before passing a kernel QP to the HW, make sure that the\r
+        * ownership bits of the send queue are set and the SQ\r
+        * headroom is stamped so that the hardware doesn't start\r
+        * processing stale work requests.\r
+        */\r
+       if (!ibqp->p_uctx && cur_state == XIB_QPS_RESET && new_state == XIB_QPS_INIT) {\r
+               struct mlx4_wqe_ctrl_seg *ctrl;\r
+               int i;\r
+\r
+               for (i = 0; i < qp->sq.wqe_cnt; ++i) {\r
+                       ctrl = get_send_wqe(qp, i);\r
+                       ctrl->owner_opcode = cpu_to_be32(1 << 31);\r
+\r
+                       stamp_send_wqe(qp, i);\r
+               }\r
+       }\r
+\r
+       err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),\r
+                            to_mlx4_state(new_state), context, optpar,\r
+                            sqd_event, &qp->mqp);\r
+       if (err)\r
+               goto out;\r
+\r
+       qp->state = new_state;\r
+\r
+       if (attr_mask & IB_QP_ACCESS_FLAGS)\r
+               qp->atomic_rd_en = (u8)attr->qp_access_flags;\r
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)\r
+               qp->resp_depth = attr->max_dest_rd_atomic;\r
+       if (attr_mask & IB_QP_PORT)\r
+               qp->port = attr->port_num;\r
+       if (attr_mask & IB_QP_ALT_PATH)\r
+               qp->alt_port = attr->alt_port_num;\r
+\r
+       if (is_sqp(dev, qp))\r
+               store_sqp_attrs(to_msqp(qp), attr, attr_mask);\r
+\r
+       /*\r
+        * If we moved QP0 to RTR, bring the IB link up; if we moved\r
+        * QP0 to RESET or ERROR, bring the link back down.\r
+        */\r
+       if (is_qp0(dev, qp)) {\r
+               if (cur_state != XIB_QPS_RTR && new_state == XIB_QPS_RTR)\r
+                       if (mlx4_INIT_PORT(dev->dev, qp->port))\r
+                               printk(KERN_WARNING "INIT_PORT failed for port %d\n",\r
+                                      qp->port);\r
+\r
+               if (cur_state != XIB_QPS_RESET && cur_state != XIB_QPS_ERR &&\r
+                   (new_state == XIB_QPS_RESET || new_state == XIB_QPS_ERR))\r
+                       mlx4_CLOSE_PORT(dev->dev, qp->port);\r
+       }\r
+\r
+       /*\r
+        * If we moved a kernel QP to RESET, clean up all old CQ\r
+        * entries and reinitialize the QP.\r
+        */\r
+       if (new_state == XIB_QPS_RESET && !ibqp->p_uctx) {\r
+               mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,\r
+                                ibqp->srq ? to_msrq(ibqp->srq): NULL);\r
+               if (ibqp->send_cq != ibqp->recv_cq)\r
+                       mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);\r
+\r
+               qp->rq.head = 0;\r
+               qp->rq.tail = 0;\r
+               qp->sq.head = 0;\r
+               qp->sq.tail = 0;\r
+               if (!ibqp->srq)\r
+                       *qp->db.db  = 0;\r
+       }\r
+\r
+out:\r
+       kfree(context);\r
+       return err;\r
+}\r
+\r
+static struct ib_qp_attr mlx4_ib_qp_attr;\r
+static int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1];\r
+\r
+void mlx4_ib_qp_init()\r
+{\r
+       memset( &mlx4_ib_qp_attr, 0, sizeof(mlx4_ib_qp_attr) );\r
+       mlx4_ib_qp_attr.port_num = 1;\r
+\r
+       memset( &mlx4_ib_qp_attr_mask_table, 0, sizeof(mlx4_ib_qp_attr_mask_table) );\r
+       mlx4_ib_qp_attr_mask_table[IB_QPT_UD]  = (IB_QP_PKEY_INDEX              |\r
+                               IB_QP_PORT                      |\r
+                               IB_QP_QKEY);\r
+       mlx4_ib_qp_attr_mask_table[IB_QPT_UC]  = (IB_QP_PKEY_INDEX              |\r
+                               IB_QP_PORT                      |\r
+                               IB_QP_ACCESS_FLAGS);\r
+       mlx4_ib_qp_attr_mask_table[IB_QPT_RC]  = (IB_QP_PKEY_INDEX              |\r
+                               IB_QP_PORT                      |\r
+                               IB_QP_ACCESS_FLAGS);\r
+       mlx4_ib_qp_attr_mask_table[IB_QPT_SMI] = (IB_QP_PKEY_INDEX              |\r
+                               IB_QP_QKEY);\r
+       mlx4_ib_qp_attr_mask_table[IB_QPT_GSI] = (IB_QP_PKEY_INDEX              |\r
+                               IB_QP_QKEY);\r
+}\r
+\r
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,\r
+                     int attr_mask, struct ib_udata *udata)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
+       struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
+       enum ib_qp_state cur_state, new_state;\r
+       int err = -EINVAL;\r
+\r
+       UNUSED_PARAM(udata);\r
+       \r
+       mutex_lock(&qp->mutex);\r
+\r
+       cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;\r
+       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;\r
+\r
+       if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))\r
+               goto out;\r
+\r
+       if ((attr_mask & IB_QP_PORT) &&\r
+           (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {\r
+               goto out;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_PKEY_INDEX) {\r
+               int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;\r
+               if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])\r
+                       goto out;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&\r
+           attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {\r
+               goto out;\r
+       }\r
+\r
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&\r
+           attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {\r
+               goto out;\r
+       }\r
+\r
+       if (cur_state == new_state && cur_state == XIB_QPS_RESET) {\r
+               err = 0;\r
+               goto out;\r
+       }\r
+\r
+       if (cur_state == XIB_QPS_RESET && new_state == XIB_QPS_ERR) {\r
+               err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,\r
+                                         mlx4_ib_qp_attr_mask_table[ibqp->qp_type],\r
+                                         XIB_QPS_RESET, XIB_QPS_INIT);\r
+               if (err)\r
+                       goto out;\r
+               cur_state = XIB_QPS_INIT;\r
+       }\r
+\r
+       err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);\r
+\r
+out:\r
+       mutex_unlock(&qp->mutex);\r
+       return err;\r
+}\r
+\r
+static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr)\r
+{\r
+\r
+       enum ib_wr_opcode opcode = -1; //= wr->wr_type;\r
+\r
+       switch (wr->wr_type) {\r
+               case WR_SEND: \r
+                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_SEND_WITH_IMM : IB_WR_SEND;\r
+                       break;\r
+               case WR_RDMA_WRITE:     \r
+                       opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? IB_WR_RDMA_WRITE_WITH_IMM : IB_WR_RDMA_WRITE;\r
+                       break;\r
+               case WR_RDMA_READ:              opcode = IB_WR_RDMA_READ; break;\r
+               case WR_COMPARE_SWAP:           opcode = IB_WR_ATOMIC_CMP_AND_SWP; break;\r
+               case WR_FETCH_ADD:                      opcode = IB_WR_ATOMIC_FETCH_AND_ADD; break;\r
+       }\r
+       return opcode;\r
+}\r
+\r
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,\r
+                           void *wqe)\r
+{\r
+       enum ib_wr_opcode opcode = to_wr_opcode(wr);\r
+       struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;\r
+       struct mlx4_wqe_mlx_seg *mlx = wqe;\r
+       struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);\r
+       struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);\r
+       __be16 pkey;\r
+       int send_size;\r
+       int header_size;\r
+       int spc;\r
+       u32 i;\r
+\r
+       send_size = 0;\r
+       for (i = 0; i < wr->num_ds; ++i)\r
+               send_size += wr->ds_array[i].length;\r
+\r
+       ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);\r
+\r
+       sqp->ud_header.lrh.service_level   =\r
+               (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);\r
+       sqp->ud_header.lrh.destination_lid = ah->av.dlid;\r
+       sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid & 0x7f);\r
+       if (mlx4_ib_ah_grh_present(ah)) {\r
+               sqp->ud_header.grh.traffic_class =\r
+                       (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);\r
+               sqp->ud_header.grh.flow_label    =\r
+                       ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);\r
+               sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;\r
+               ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),\r
+                                 ah->av.gid_index, &sqp->ud_header.grh.source_gid);\r
+               memcpy(sqp->ud_header.grh.destination_gid.raw,\r
+                      ah->av.dgid, 16);\r
+       }\r
+\r
+       mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
+       mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |\r
+                                 (sqp->ud_header.lrh.destination_lid ==\r
+                                  XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |\r
+                                 (sqp->ud_header.lrh.service_level << 8));\r
+       mlx->rlid   = sqp->ud_header.lrh.destination_lid;\r
+\r
+       switch (opcode) {\r
+       case IB_WR_SEND:\r
+               sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;\r
+               sqp->ud_header.immediate_present = 0;\r
+               break;\r
+       case IB_WR_SEND_WITH_IMM:\r
+               sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;\r
+               sqp->ud_header.immediate_present = 1;\r
+               sqp->ud_header.immediate_data    = wr->immediate_data;\r
+               break;\r
+       default:\r
+               return -EINVAL;\r
+       }\r
+\r
+       sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;\r
+       if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)\r
+               sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;\r
+       sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED));\r
+       if (!sqp->qp.ibqp.qp_num)\r
+               ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);\r
+       else\r
+               ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey);\r
+       sqp->ud_header.bth.pkey = pkey;\r
+       sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;\r
+       sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));\r
+       sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?\r
+               cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;\r
+       sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);\r
+\r
+       header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);\r
+\r
+#if 0\r
+       {\r
+               printk(KERN_ERR "built UD header of size %d:\n", header_size);\r
+               for (i = 0; i < header_size / 4; ++i) {\r
+                       if (i % 8 == 0)\r
+                               printk("  [%02x] ", i * 4);\r
+                       printk(" %08x",\r
+                              be32_to_cpu(((__be32 *) sqp->header_buf)[i]));\r
+                       if ((i + 1) % 8 == 0)\r
+                               printk("\n");\r
+               }\r
+               printk("\n");\r
+       }\r
+#endif\r
+\r
+       /*\r
+        * Inline data segments may not cross a 64 byte boundary.  If\r
+        * our UD header is bigger than the space available up to the\r
+        * next 64 byte boundary in the WQE, use two inline data\r
+        * segments to hold the UD header.\r
+        */\r
+       spc = MLX4_INLINE_ALIGN -\r
+               ((u32)(ULONG_PTR)(inl + 1) & (MLX4_INLINE_ALIGN - 1));\r
+       if (header_size <= spc) {\r
+               inl->byte_count = cpu_to_be32(1 << 31 | header_size);\r
+               memcpy(inl + 1, sqp->header_buf, header_size);\r
+               i = 1;\r
+       } else {\r
+               inl->byte_count = cpu_to_be32(1 << 31 | spc);\r
+               memcpy(inl + 1, sqp->header_buf, spc);\r
+\r
+               inl = (void*)((u8*)(inl + 1) + spc);\r
+               memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);\r
+               /*\r
+                * Need a barrier here to make sure all the data is\r
+                * visible before the byte_count field is set.\r
+                * Otherwise the HCA prefetcher could grab the 64-byte\r
+                * chunk with this inline segment and get a valid (!=\r
+                * 0xffffffff) byte count but stale data, and end up\r
+                * generating a packet with bad headers.\r
+                *\r
+                * The first inline segment's byte_count field doesn't\r
+                * need a barrier, because it comes after a\r
+                * control/MLX segment and therefore is at an offset\r
+                * of 16 mod 64.\r
+                */\r
+               wmb();\r
+               inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));\r
+               i = 2;\r
+       }\r
+\r
+       return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);\r
+}\r
+\r
+static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)\r
+{\r
+       unsigned cur;\r
+       struct mlx4_ib_cq *cq;\r
+\r
+       cur = wq->head - wq->tail;\r
+       if (likely((int)cur + nreq < wq->max_post))\r
+               return 0;\r
+\r
+       cq = to_mcq(ib_cq);\r
+       spin_lock(&cq->lock);\r
+       cur = wq->head - wq->tail;\r
+       spin_unlock(&cq->lock);\r
+\r
+       return (int)cur + nreq >= wq->max_post;\r
+}\r
+\r
+static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,\r
+                                         u64 remote_addr, __be32 rkey)\r
+{\r
+       rseg->raddr    = cpu_to_be64(remote_addr);\r
+       rseg->rkey     = rkey;\r
+       rseg->reserved = 0;\r
+}\r
+\r
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr)\r
+{\r
+       if (wr->wr_type == WR_COMPARE_SWAP) {\r
+               aseg->swap_add = wr->remote_ops.atomic2;\r
+               aseg->compare  = wr->remote_ops.atomic1;\r
+       } else {\r
+               aseg->swap_add = wr->remote_ops.atomic1;\r
+               aseg->compare  = 0;\r
+       }\r
+\r
+}\r
+\r
+static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,\r
+                            ib_send_wr_t *wr)\r
+{\r
+       memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av));\r
+       dseg->dqpn = wr->dgrm.ud.remote_qp;\r
+       dseg->qkey = wr->dgrm.ud.remote_qkey;\r
+}\r
+\r
+static void set_mlx_icrc_seg(void *dseg)\r
+{\r
+       u32 *t = dseg;\r
+       struct mlx4_wqe_inline_seg *iseg = dseg;\r
+\r
+       t[1] = 0;\r
+\r
+       /*\r
+        * Need a barrier here before writing the byte_count field to\r
+        * make sure that all the data is visible before the\r
+        * byte_count field is set.  Otherwise, if the segment begins\r
+        * a new cacheline, the HCA prefetcher could grab the 64-byte\r
+        * chunk and get a valid (!= * 0xffffffff) byte count but\r
+        * stale data, and end up sending the wrong data.\r
+        */\r
+       wmb();\r
+\r
+       iseg->byte_count = cpu_to_be32((1 << 31) | 4);\r
+}\r
+\r
+static void set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
+{\r
+       dseg->lkey       = cpu_to_be32(sg->lkey);\r
+       dseg->addr       = cpu_to_be64(sg->vaddr);\r
+\r
+       /*\r
+        * Need a barrier here before writing the byte_count field to\r
+        * make sure that all the data is visible before the\r
+        * byte_count field is set.  Otherwise, if the segment begins\r
+        * a new cacheline, the HCA prefetcher could grab the 64-byte\r
+        * chunk and get a valid (!= * 0xffffffff) byte count but\r
+        * stale data, and end up sending the wrong data.\r
+        */\r
+       wmb();\r
+\r
+       dseg->byte_count = cpu_to_be32(sg->length);\r
+}\r
+\r
+static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, ib_local_ds_t *sg)\r
+{\r
+       dseg->byte_count = cpu_to_be32(sg->length);\r
+       dseg->lkey       = cpu_to_be32(sg->lkey);\r
+       dseg->addr       = cpu_to_be64(sg->vaddr);\r
+}\r
+\r
+int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,\r
+                     ib_send_wr_t **bad_wr)\r
+{\r
+       enum ib_wr_opcode opcode;\r
+       struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
+       u8 *wqe;\r
+       struct mlx4_wqe_ctrl_seg *ctrl;\r
+       struct mlx4_wqe_data_seg *dseg;\r
+       unsigned long flags;\r
+       int nreq;\r
+       int err = 0;\r
+       int ind;\r
+       int size;\r
+       int i;\r
+\r
+       spin_lock_irqsave(&qp->sq.lock, &flags);\r
+\r
+       ind = qp->sq.head;\r
+\r
+       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
+               if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {\r
+                       err = -ENOMEM;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               if (unlikely(wr->num_ds > (u32)qp->sq.max_gs)) {\r
+                       err = -EINVAL;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));\r
+               ctrl = (void*)wqe;\r
+               qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;\r
+               opcode = to_wr_opcode(wr);\r
+\r
+               ctrl->srcrb_flags =\r
+                       (wr->send_opt & IB_SEND_OPT_SIGNALED ?\r
+                        cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |\r
+                       (wr->send_opt & IB_SEND_OPT_SOLICITED ?\r
+                        cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |\r
+                       (wr->send_opt & IB_SEND_OPT_TX_IP_CSUM ?\r
+                        cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM) : 0) |\r
+                       (wr->send_opt & IB_SEND_OPT_TX_TCP_UDP_CSUM ?\r
+                        cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |\r
+                       qp->sq_signal_bits;\r
+\r
+               if (opcode == IB_WR_SEND_WITH_IMM ||\r
+                   opcode == IB_WR_RDMA_WRITE_WITH_IMM)\r
+                       ctrl->imm = wr->immediate_data;\r
+               else\r
+                       ctrl->imm = 0;\r
+\r
+               wqe += sizeof *ctrl;\r
+               size = sizeof *ctrl / 16;\r
+\r
+               switch (ibqp->qp_type) {\r
+               case IB_QPT_RC:\r
+               case IB_QPT_UC:\r
+                       switch (opcode) {\r
+                       case IB_WR_ATOMIC_CMP_AND_SWP:\r
+                       case IB_WR_ATOMIC_FETCH_AND_ADD:\r
+                               set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
+                                             wr->remote_ops.rkey);\r
+                               wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
+\r
+                               set_atomic_seg((void*)wqe, wr);\r
+                               wqe  += sizeof (struct mlx4_wqe_atomic_seg);\r
+\r
+                               size += (sizeof (struct mlx4_wqe_raddr_seg) +\r
+                                        sizeof (struct mlx4_wqe_atomic_seg)) / 16;\r
+\r
+                               break;\r
+\r
+                       case IB_WR_RDMA_READ:\r
+                       case IB_WR_RDMA_WRITE:\r
+                       case IB_WR_RDMA_WRITE_WITH_IMM:\r
+                               set_raddr_seg((void*)wqe, wr->remote_ops.vaddr,\r
+                                             wr->remote_ops.rkey);\r
+                               wqe  += sizeof (struct mlx4_wqe_raddr_seg);\r
+                               size += sizeof (struct mlx4_wqe_raddr_seg) / 16;\r
+                               break;\r
+\r
+                       default:\r
+                               /* No extra segments required for sends */\r
+                               break;\r
+                       }\r
+                       break;\r
+\r
+               case IB_QPT_UD:\r
+                       set_datagram_seg((void*)wqe, wr);\r
+                       wqe  += sizeof (struct mlx4_wqe_datagram_seg);\r
+                       size += sizeof (struct mlx4_wqe_datagram_seg) / 16;\r
+                       break;\r
+\r
+               case IB_QPT_SMI:\r
+               case IB_QPT_GSI:\r
+                       err = build_mlx_header(to_msqp(qp), wr, ctrl);\r
+                       if (err < 0) {\r
+                               if (bad_wr)\r
+                                       *bad_wr = wr;\r
+                               goto out;\r
+                       }\r
+                       wqe  += err;\r
+                       size += err / 16;\r
+\r
+                       err = 0;\r
+                       break;\r
+\r
+               default:\r
+                       break;\r
+               }\r
+\r
+               /*\r
+                * Write data segments in reverse order, so as to\r
+                * overwrite cacheline stamp last within each\r
+                * cacheline.  This avoids issues with WQE\r
+                * prefetching.\r
+                */\r
+\r
+               dseg = (void*)wqe;\r
+               dseg += wr->num_ds - 1;\r
+               size += wr->num_ds * (sizeof (struct mlx4_wqe_data_seg) / 16);\r
+\r
+               /* Add one more inline data segment for ICRC for MLX sends */\r
+               if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||\r
+                            qp->ibqp.qp_type == IB_QPT_GSI)) {\r
+                       set_mlx_icrc_seg(dseg + 1);\r
+                       size += sizeof (struct mlx4_wqe_data_seg) / 16;\r
+               }\r
+\r
+               for (i = wr->num_ds - 1; i >= 0; --i, --dseg)\r
+                       set_data_seg(dseg, wr->ds_array + i);\r
+\r
+               ctrl->fence_size = (u8)((wr->send_opt & IB_SEND_OPT_FENCE ?\r
+                                   MLX4_WQE_CTRL_FENCE : 0) | size);\r
+\r
+               /*\r
+                * Make sure descriptor is fully written before\r
+                * setting ownership bit (because HW can start\r
+                * executing as soon as we do).\r
+                */\r
+               wmb();\r
+\r
+               if (opcode < 0 || opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {\r
+                       err = -EINVAL;\r
+                       goto out;\r
+               }\r
+\r
+               ctrl->owner_opcode = mlx4_ib_opcode[opcode] |\r
+                       (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);\r
+\r
+               /*\r
+                * We can improve latency by not stamping the last\r
+                * send queue WQE until after ringing the doorbell, so\r
+                * only stamp here if there are still more WQEs to post.\r
+                */\r
+               if (wr->p_next)\r
+                       stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &\r
+                                      (qp->sq.wqe_cnt - 1));\r
+\r
+               ++ind;\r
+       }\r
+\r
+out:\r
+       if (likely(nreq)) {\r
+               qp->sq.head += nreq;\r
+\r
+               /*\r
+                * Make sure that descriptors are written before\r
+                * doorbell record.\r
+                */\r
+               wmb();\r
+\r
+               writel(qp->doorbell_qpn,\r
+                      (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);\r
+\r
+#if 0\r
+               if (qp->mqp.qpn == 0x41)\r
+                       DbgPrint( "[MLX4_BUS] mlx4_ib_post_send : qtype %d, qpn %#x, nreq %d, sq.head %#x, wqe_ix %d, db %p \n", \r
+                               ibqp->qp_type, qp->mqp.qpn, nreq, qp->sq.head, ind, \r
+                               (u8*)to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL );\r
+#endif         \r
+               /*\r
+                * Make sure doorbells don't leak out of SQ spinlock\r
+                * and reach the HCA out of order.\r
+                */\r
+               mmiowb();\r
+\r
+               stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &\r
+                              (qp->sq.wqe_cnt - 1));\r
+       }\r
+\r
+       spin_unlock_irqrestore(&qp->sq.lock, flags);\r
+\r
+       return err;\r
+}\r
+\r
+int mlx4_ib_post_recv(struct ib_qp *ibqp, ib_recv_wr_t *wr,\r
+                     ib_recv_wr_t **bad_wr)\r
+{\r
+       struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
+       struct mlx4_wqe_data_seg *scat;\r
+       unsigned long flags;\r
+       int err = 0;\r
+       int nreq;\r
+       int ind;\r
+       int i;\r
+\r
+       spin_lock_irqsave(&qp->rq.lock, &flags);\r
+\r
+       ind = qp->rq.head & (qp->rq.wqe_cnt - 1);\r
+\r
+       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
+               if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {\r
+                       err = -ENOMEM;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               if (unlikely(wr->num_ds > (u32)qp->rq.max_gs)) {\r
+                       err = -EINVAL;\r
+                       if (bad_wr)\r
+                               *bad_wr = wr;\r
+                       goto out;\r
+               }\r
+\r
+               scat = get_recv_wqe(qp, ind);\r
+\r
+               for (i = 0; i < (int)wr->num_ds; ++i)\r
+                       __set_data_seg(scat + i, wr->ds_array + i);\r
+\r
+               if (i < qp->rq.max_gs) {\r
+                       scat[i].byte_count = 0;\r
+                       scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);\r
+                       scat[i].addr       = 0;\r
+               }\r
+\r
+               qp->rq.wrid[ind] = wr->wr_id;\r
+\r
+               ind = (ind + 1) & (qp->rq.wqe_cnt - 1);\r
+       }\r
+\r
+out:\r
+       if (likely(nreq)) {\r
+               qp->rq.head += nreq;\r
+\r
+               /*\r
+                * Make sure that descriptors are written before\r
+                * doorbell record.\r
+                */\r
+               wmb();\r
+\r
+               *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);\r
+\r
+#if 0\r
+               if (qp->mqp.qpn == 0x41)\r
+                       DbgPrint( "[MLX4_BUS] mlx4_ib_post_recv : qtype %d, qpn %#x, nreq %d, rq.head %#x, wqe_ix %d, db_obj %p, db %p \n", \r
+                               ibqp->qp_type, qp->mqp.qpn, nreq, qp->rq.head, ind, &qp->db, qp->db.db );\r
+#endif         \r
+       }\r
+\r
+       spin_unlock_irqrestore(&qp->rq.lock, flags);\r
+\r
+       return err;\r
+}\r
+\r
+static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)\r
+{\r
+       switch (mlx4_state) {\r
+       case MLX4_QP_STATE_RST:      return XIB_QPS_RESET;\r
+       case MLX4_QP_STATE_INIT:     return XIB_QPS_INIT;\r
+       case MLX4_QP_STATE_RTR:      return XIB_QPS_RTR;\r
+       case MLX4_QP_STATE_RTS:      return XIB_QPS_RTS;\r
+       case MLX4_QP_STATE_SQ_DRAINING:\r
+       case MLX4_QP_STATE_SQD:      return XIB_QPS_SQD;\r
+       case MLX4_QP_STATE_SQER:     return XIB_QPS_SQE;\r
+       case MLX4_QP_STATE_ERR:      return XIB_QPS_ERR;\r
+       default:                     return -1;\r
+       }\r
+}\r
+\r
+static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)\r
+{\r
+       switch (mlx4_mig_state) {\r
+       case MLX4_QP_PM_ARMED:          return IB_MIG_ARMED;\r
+       case MLX4_QP_PM_REARM:          return IB_MIG_REARM;\r
+       case MLX4_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;\r
+       default: return -1;\r
+       }\r
+}\r
+\r
+static int to_ib_qp_access_flags(int mlx4_flags)\r
+{\r
+       int ib_flags = 0;\r
+\r
+       if (mlx4_flags & MLX4_QP_BIT_RRE)\r
+               ib_flags |= IB_ACCESS_REMOTE_READ;\r
+       if (mlx4_flags & MLX4_QP_BIT_RWE)\r
+               ib_flags |= IB_ACCESS_REMOTE_WRITE;\r
+       if (mlx4_flags & MLX4_QP_BIT_RAE)\r
+               ib_flags |= IB_ACCESS_REMOTE_ATOMIC;\r
+\r
+       return ib_flags;\r
+}\r
+\r
+static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,\r
+                               struct mlx4_qp_path *path)\r
+{\r
+       memset(ib_ah_attr, 0, sizeof *ib_ah_attr);\r
+       ib_ah_attr->port_num      = path->sched_queue & 0x40 ? 2 : 1;\r
+\r
+       if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)\r
+               return;\r
+\r
+       ib_ah_attr->dlid          = be16_to_cpu(path->rlid);\r
+       ib_ah_attr->sl            = (path->sched_queue >> 2) & 0xf;\r
+       ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;\r
+       ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;\r
+       ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;\r
+       if (ib_ah_attr->ah_flags) {\r
+               ib_ah_attr->grh.sgid_index = path->mgid_index;\r
+               ib_ah_attr->grh.hop_limit  = path->hop_limit;\r
+               ib_ah_attr->grh.traffic_class =\r
+                       (u8)((be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff);\r
+               ib_ah_attr->grh.flow_label =\r
+                       be32_to_cpu(path->tclass_flowlabel) & 0xfffff;\r
+               memcpy(ib_ah_attr->grh.dgid.raw,\r
+                       path->rgid, sizeof ib_ah_attr->grh.dgid.raw);\r
+       }\r
+}\r
+\r
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,\r
+                    struct ib_qp_init_attr *qp_init_attr)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);\r
+       struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
+       struct mlx4_qp_context context;\r
+       int mlx4_state;\r
+       int err;\r
+\r
+       UNUSED_PARAM(qp_attr_mask);\r
+\r
+       if (qp->state == XIB_QPS_RESET) {\r
+               qp_attr->qp_state = XIB_QPS_RESET;\r
+               goto done;\r
+       }\r
+\r
+       err = mlx4_qp_query(dev->dev, &qp->mqp, &context);\r
+       if (err)\r
+               return -EINVAL;\r
+\r
+       mlx4_state = be32_to_cpu(context.flags) >> 28;\r
+\r
+       qp_attr->qp_state            = to_ib_qp_state(mlx4_state);\r
+       qp_attr->path_mtu            = context.mtu_msgmax >> 5;\r
+       qp_attr->path_mig_state      =\r
+               to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);\r
+       qp_attr->qkey                = be32_to_cpu(context.qkey);\r
+       qp_attr->rq_psn              = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;\r
+       qp_attr->sq_psn              = be32_to_cpu(context.next_send_psn) & 0xffffff;\r
+       qp_attr->dest_qp_num         = be32_to_cpu(context.remote_qpn) & 0xffffff;\r
+       qp_attr->qp_access_flags     =\r
+               to_ib_qp_access_flags(be32_to_cpu(context.params2));\r
+\r
+       if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {\r
+               to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);\r
+               to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);\r
+               qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;\r
+               qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;\r
+       }\r
+\r
+       qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;\r
+       if (qp_attr->qp_state == XIB_QPS_INIT)\r
+               qp_attr->port_num = qp->port;\r
+       else\r
+               qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;\r
+\r
+       /* qp_attr->en_sqd_async_notify is only applicable in modify qp */\r
+       qp_attr->sq_draining = (u8)(mlx4_state == MLX4_QP_STATE_SQ_DRAINING);\r
+\r
+       qp_attr->max_rd_atomic = (u8)(1 << ((be32_to_cpu(context.params1) >> 21) & 0x7));\r
+\r
+       qp_attr->max_dest_rd_atomic =\r
+               (u8)(1 << ((be32_to_cpu(context.params2) >> 21) & 0x7));\r
+       qp_attr->min_rnr_timer      =\r
+               (u8)((be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f);\r
+       qp_attr->timeout            = context.pri_path.ackto >> 3;\r
+       qp_attr->retry_cnt          = (u8)((be32_to_cpu(context.params1) >> 16) & 0x7);\r
+       qp_attr->rnr_retry          = (u8)((be32_to_cpu(context.params1) >> 13) & 0x7);\r
+       qp_attr->alt_timeout        = context.alt_path.ackto >> 3;\r
+\r
+done:\r
+       qp_attr->cur_qp_state        = qp_attr->qp_state;\r
+       qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;\r
+       qp_attr->cap.max_recv_sge    = qp->rq.max_gs;\r
+\r
+       if (!ibqp->p_uctx) {\r
+               qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;\r
+               qp_attr->cap.max_send_sge = qp->sq.max_gs;\r
+       } else {\r
+               qp_attr->cap.max_send_wr  = 0;\r
+               qp_attr->cap.max_send_sge = 0;\r
+       }\r
+\r
+       /*\r
+        * We don't support inline sends for kernel QPs (yet), and we\r
+        * don't know what userspace's value should be.\r
+        */\r
+       qp_attr->cap.max_inline_data = 0;\r
+\r
+       qp_init_attr->cap            = qp_attr->cap;\r
+\r
+       return 0;\r
+}\r
+\r
index 2148584..4758305 100644 (file)
-/*
- * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "mlx4_ib.h"
-#include "qp.h"
-#include "srq.h"
-#include "user.h"
-
-static void *get_wqe(struct mlx4_ib_srq *srq, int n)
-{
-       int offset = n << srq->msrq.wqe_shift;
-
-       if (srq->buf.nbufs == 1)
-               return srq->buf.u.direct.buf + offset;
-       else
-               return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
-                       (offset & (PAGE_SIZE - 1));
-}
-
-static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
-{
-       struct ib_event event;
-       struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
-
-       if (ibsrq->event_handler) {
-               event.device      = ibsrq->device;
-               event.element.srq = ibsrq;
-               switch (type) {
-               case MLX4_EVENT_TYPE_SRQ_LIMIT:
-                       event.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       break;
-               case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
-                       event.event = IB_EVENT_SRQ_ERR;
-                       break;
-               default:
-                       printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
-                              "on SRQ %06x\n", type, srq->srqn);
-                       return;
-               }
-
-               ibsrq->event_handler(&event, ibsrq->srq_context);
-       }
-}
-
-struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
-                                 struct ib_srq_init_attr *init_attr,
-                                 struct ib_udata *udata)
-{
-       struct mlx4_ib_dev *dev = to_mdev(pd->device);
-       struct mlx4_ib_srq *srq;
-       struct mlx4_wqe_srq_next_seg *next;
-       int desc_size;
-       int buf_size;
-       int err;
-       int i;
-       u32 cqn = 0;
-       u16 xrcd = 0;
-
-       /* Sanity check SRQ size before proceeding */
-       if ((int)init_attr->attr.max_wr  >= dev->dev->caps.max_srq_wqes ||
-           (int)init_attr->attr.max_sge >  dev->dev->caps.max_srq_sge)
-               return ERR_PTR(-EINVAL);
-
-       srq = kzalloc(sizeof *srq, GFP_KERNEL);
-       if (!srq)
-               return ERR_PTR(-ENOMEM);
-
-       mutex_init(&srq->mutex);
-       spin_lock_init(&srq->lock);
-       srq->msrq.max    = roundup_pow_of_two(init_attr->attr.max_wr + 1);
-       srq->msrq.max_gs = init_attr->attr.max_sge;
-
-       desc_size = max(32UL,
-                       roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
-                                          srq->msrq.max_gs *
-                                          sizeof (struct mlx4_wqe_data_seg)));
-       srq->msrq.wqe_shift = ilog2(desc_size);
-
-       buf_size = srq->msrq.max * desc_size;
-
-       if (pd->p_uctx) {
-               struct mlx4_ib_create_srq ucmd;
-
-               if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
-                       err = -EFAULT;
-                       goto err_srq;
-               }
-
-               srq->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,
-                                       buf_size, 0, FALSE);
-               if (IS_ERR(srq->umem)) {
-                       err = PTR_ERR(srq->umem);
-                       goto err_srq;
-               }
-
-               err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
-                                   ilog2(srq->umem->page_size), &srq->mtt);
-               if (err)
-                       goto err_buf;
-
-               err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
-               if (err)
-                       goto err_mtt;
-
-               err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx),
-                                         ucmd.db_addr, &srq->db);
-               if (err)
-                       goto err_mtt;
-       } else {
-               err = mlx4_ib_db_alloc(dev, &srq->db, 0);
-               if (err)
-                       goto err_srq;
-
-               *srq->db.db = 0;
-
-               if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
-                       err = -ENOMEM;
-                       goto err_db;
-               }
-
-               srq->head    = 0;
-               srq->tail    = srq->msrq.max - 1;
-               srq->wqe_ctr = 0;
-
-               for (i = 0; i < srq->msrq.max; ++i) {
-                       next = get_wqe(srq, i);
-                       next->next_wqe_index =
-                               cpu_to_be16((i + 1) & (srq->msrq.max - 1));
-               }
-
-               err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
-                                   &srq->mtt);
-               if (err)
-                       goto err_buf;
-
-               err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
-               if (err)
-                       goto err_mtt;
-
-               srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
-               if (!srq->wrid) {
-                       err = -ENOMEM;
-                       goto err_mtt;
-               }
-       }
-       err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcd, &srq->mtt,
-                            srq->db.dma.da, &srq->msrq);
-       if (err)
-               goto err_wrid;
-
-       srq->msrq.event = mlx4_ib_srq_event;
-
-       if (pd->p_uctx)
-               if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
-                       err = -EFAULT;
-                       goto err_wrid;
-               }
-
-       init_attr->attr.max_wr = srq->msrq.max - 1;
-
-       return &srq->ibsrq;
-
-err_wrid:
-       if (pd->p_uctx)
-               mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx), &srq->db);
-       else
-               kfree(srq->wrid);
-
-err_mtt:
-       mlx4_mtt_cleanup(dev->dev, &srq->mtt);
-
-err_buf:
-       if (pd->p_uctx)
-               ib_umem_release(srq->umem);
-       else
-               mlx4_buf_free(dev->dev, buf_size, &srq->buf);
-
-err_db:
-       if (!pd->p_uctx)
-               mlx4_ib_db_free(dev, &srq->db);
-
-err_srq:
-       kfree(srq);
-
-       return ERR_PTR(err);
-}
-
-int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                      enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
-       struct mlx4_ib_srq *srq = to_msrq(ibsrq);
-       int ret;
-
-       UNUSED_PARAM(udata);
-       
-       /* We don't support resizing SRQs (yet?) */
-       if (attr_mask & XIB_SRQ_MAX_WR)
-               return -ENOSYS;
-
-       if (attr_mask & XIB_SRQ_LIMIT) {
-               if ((int)attr->srq_limit >= srq->msrq.max)
-                       return -ERANGE;
-
-               mutex_lock(&srq->mutex);
-               ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
-               mutex_unlock(&srq->mutex);
-
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
-int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
-       struct mlx4_ib_srq *srq = to_msrq(ibsrq);
-       int ret;
-       int limit_watermark;
-
-       ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);
-       if (ret)
-               return ret;
-
-       srq_attr->srq_limit = limit_watermark;
-       srq_attr->max_wr    = srq->msrq.max - 1;
-       srq_attr->max_sge   = srq->msrq.max_gs;
-
-       return 0;
-}
-
-int mlx4_ib_destroy_srq(struct ib_srq *srq)
-{
-       struct mlx4_ib_dev *dev = to_mdev(srq->device);
-       struct mlx4_ib_srq *msrq = to_msrq(srq);
-
-       mlx4_srq_invalidate(dev->dev, &msrq->msrq);
-       mlx4_srq_remove(dev->dev, &msrq->msrq);
-
-       mlx4_srq_free(dev->dev, &msrq->msrq);
-       mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
-
-       if (srq->p_uctx) {
-               mlx4_ib_db_unmap_user(to_mucontext(srq->p_uctx), &msrq->db);
-               ib_umem_release(msrq->umem);
-       } else {
-               kfree(msrq->wrid);
-               mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
-                             &msrq->buf);
-               mlx4_ib_db_free(dev, &msrq->db);
-       }
-
-       kfree(msrq);
-
-       return 0;
-}
-
-void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
-{
-       struct mlx4_wqe_srq_next_seg *next;
-
-       /* always called with interrupts disabled. */
-       spin_lock(&srq->lock);
-
-       next = get_wqe(srq, srq->tail);
-       next->next_wqe_index = cpu_to_be16(wqe_index);
-       srq->tail = wqe_index;
-
-       spin_unlock(&srq->lock);
-}
-
-int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, ib_recv_wr_t *wr,
-                         ib_recv_wr_t **bad_wr)
-{
-       struct mlx4_ib_srq *srq = to_msrq(ibsrq);
-       struct mlx4_wqe_srq_next_seg *next;
-       struct mlx4_wqe_data_seg *scat;
-       unsigned long flags;
-       int err = 0;
-       int nreq;
-       int i;
-
-       spin_lock_irqsave(&srq->lock, &flags);
-
-       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {
-               if (unlikely(wr->num_ds > (u32)srq->msrq.max_gs)) {
-                       err = -EINVAL;
-                       *bad_wr = wr;
-                       break;
-               }
-
-               if (unlikely(srq->head == srq->tail)) {
-                       err = -ENOMEM;
-                       *bad_wr = wr;
-                       break;
-               }
-
-               srq->wrid[srq->head] = wr->wr_id;
-
-               next      = get_wqe(srq, srq->head);
-               srq->head = be16_to_cpu(next->next_wqe_index);
-               scat      = (struct mlx4_wqe_data_seg *) (next + 1);
-
-               for (i = 0; i < (int)wr->num_ds; ++i) {
-                       scat[i].byte_count = cpu_to_be32(wr->ds_array[i].length);
-                       scat[i].lkey       = cpu_to_be32(wr->ds_array[i].lkey);
-                       scat[i].addr       = cpu_to_be64(wr->ds_array[i].vaddr);
-               }
-
-               if (i < srq->msrq.max_gs) {
-                       scat[i].byte_count = 0;
-                       scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);
-                       scat[i].addr       = 0;
-               }
-       }
-
-       if (likely(nreq)) {
-               srq->wqe_ctr = (u16)(srq->wqe_ctr + nreq);
-
-               /*
-                * Make sure that descriptors are written before
-                * doorbell record.
-                */
-               wmb();
-
-               *srq->db.db = cpu_to_be32(srq->wqe_ctr);
-       }
-
-       spin_unlock_irqrestore(&srq->lock, flags);
-
-       return err;
-}
+/*\r
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.\r
+ *\r
+ * This software is available to you under a choice of one of two\r
+ * licenses.  You may choose to be licensed under the terms of the GNU\r
+ * General Public License (GPL) Version 2, available from the file\r
+ * COPYING in the main directory of this source tree, or the\r
+ * OpenIB.org BSD license below:\r
+ *\r
+ *     Redistribution and use in source and binary forms, with or\r
+ *     without modification, are permitted provided that the following\r
+ *     conditions are met:\r
+ *\r
+ *      - Redistributions of source code must retain the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer.\r
+ *\r
+ *      - Redistributions in binary form must reproduce the above\r
+ *        copyright notice, this list of conditions and the following\r
+ *        disclaimer in the documentation and/or other materials\r
+ *        provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ */\r
+\r
+#include "mlx4_ib.h"\r
+#include "qp.h"\r
+#include "srq.h"\r
+#include "user.h"\r
+\r
+static void *get_wqe(struct mlx4_ib_srq *srq, int n)\r
+{\r
+       int offset = n << srq->msrq.wqe_shift;\r
+\r
+       if (srq->buf.nbufs == 1)\r
+               return srq->buf.u.direct.buf + offset;\r
+       else\r
+               return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +\r
+                       (offset & (PAGE_SIZE - 1));\r
+}\r
+\r
+static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)\r
+{\r
+       ib_event_rec_t event;\r
+       struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;\r
+\r
+       switch (type) {\r
+       case MLX4_EVENT_TYPE_SRQ_LIMIT:\r
+               event.type = IB_EVENT_SRQ_LIMIT_REACHED;\r
+               break;\r
+       case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:\r
+               event.type = IB_EVENT_SRQ_ERR;\r
+               break;\r
+       default:\r
+               printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "\r
+                      "on SRQ %06x\n", type, srq->srqn);\r
+               return;\r
+       }\r
+\r
+       event.context = ibsrq->srq_context;\r
+       ibsrq->event_handler(&event);\r
+}\r
+\r
+struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,\r
+                                 struct ib_srq_init_attr *init_attr,\r
+                                 struct ib_udata *udata)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(pd->device);\r
+       struct mlx4_ib_srq *srq;\r
+       struct mlx4_wqe_srq_next_seg *next;\r
+       int desc_size;\r
+       int buf_size;\r
+       int err;\r
+       int i;\r
+       u32 cqn = 0;\r
+       u16 xrcd = 0;\r
+\r
+       /* Sanity check SRQ size before proceeding */\r
+       if ((int)init_attr->attr.max_wr  >= dev->dev->caps.max_srq_wqes ||\r
+           (int)init_attr->attr.max_sge >  dev->dev->caps.max_srq_sge)\r
+               return ERR_PTR(-EINVAL);\r
+\r
+       srq = kzalloc(sizeof *srq, GFP_KERNEL);\r
+       if (!srq)\r
+               return ERR_PTR(-ENOMEM);\r
+\r
+       mutex_init(&srq->mutex);\r
+       spin_lock_init(&srq->lock);\r
+       srq->msrq.max    = roundup_pow_of_two(init_attr->attr.max_wr + 1);\r
+       srq->msrq.max_gs = init_attr->attr.max_sge;\r
+\r
+       desc_size = max(32UL,\r
+                       roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +\r
+                                          srq->msrq.max_gs *\r
+                                          sizeof (struct mlx4_wqe_data_seg)));\r
+       srq->msrq.wqe_shift = ilog2(desc_size);\r
+\r
+       buf_size = srq->msrq.max * desc_size;\r
+\r
+       if (pd->p_uctx) {\r
+               struct mlx4_ib_create_srq ucmd;\r
+\r
+               if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {\r
+                       err = -EFAULT;\r
+                       goto err_srq;\r
+               }\r
+\r
+               srq->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,\r
+                                       buf_size, 0, FALSE);\r
+               if (IS_ERR(srq->umem)) {\r
+                       err = PTR_ERR(srq->umem);\r
+                       goto err_srq;\r
+               }\r
+\r
+               err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),\r
+                                   ilog2(srq->umem->page_size), &srq->mtt);\r
+               if (err)\r
+                       goto err_buf;\r
+\r
+               err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);\r
+               if (err)\r
+                       goto err_mtt;\r
+\r
+               err = mlx4_ib_db_map_user(to_mucontext(pd->p_uctx),\r
+                                         ucmd.db_addr, &srq->db);\r
+               if (err)\r
+                       goto err_mtt;\r
+       } else {\r
+               err = mlx4_ib_db_alloc(dev, &srq->db, 0);\r
+               if (err)\r
+                       goto err_srq;\r
+\r
+               *srq->db.db = 0;\r
+\r
+               if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {\r
+                       err = -ENOMEM;\r
+                       goto err_db;\r
+               }\r
+\r
+               srq->head    = 0;\r
+               srq->tail    = srq->msrq.max - 1;\r
+               srq->wqe_ctr = 0;\r
+\r
+               for (i = 0; i < srq->msrq.max; ++i) {\r
+                       next = get_wqe(srq, i);\r
+                       next->next_wqe_index =\r
+                               cpu_to_be16((i + 1) & (srq->msrq.max - 1));\r
+               }\r
+\r
+               err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,\r
+                                   &srq->mtt);\r
+               if (err)\r
+                       goto err_buf;\r
+\r
+               err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);\r
+               if (err)\r
+                       goto err_mtt;\r
+\r
+               srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);\r
+               if (!srq->wrid) {\r
+                       err = -ENOMEM;\r
+                       goto err_mtt;\r
+               }\r
+       }\r
+       err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcd, &srq->mtt,\r
+                            srq->db.dma.da, &srq->msrq);\r
+       if (err)\r
+               goto err_wrid;\r
+\r
+       srq->msrq.event = mlx4_ib_srq_event;\r
+\r
+       if (pd->p_uctx)\r
+               if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {\r
+                       err = -EFAULT;\r
+                       goto err_wrid;\r
+               }\r
+\r
+       init_attr->attr.max_wr = srq->msrq.max - 1;\r
+\r
+       return &srq->ibsrq;\r
+\r
+err_wrid:\r
+       if (pd->p_uctx)\r
+               mlx4_ib_db_unmap_user(to_mucontext(pd->p_uctx), &srq->db);\r
+       else\r
+               kfree(srq->wrid);\r
+\r
+err_mtt:\r
+       mlx4_mtt_cleanup(dev->dev, &srq->mtt);\r
+\r
+err_buf:\r
+       if (pd->p_uctx)\r
+               ib_umem_release(srq->umem);\r
+       else\r
+               mlx4_buf_free(dev->dev, buf_size, &srq->buf);\r
+\r
+err_db:\r
+       if (!pd->p_uctx)\r
+               mlx4_ib_db_free(dev, &srq->db);\r
+\r
+err_srq:\r
+       kfree(srq);\r
+\r
+       return ERR_PTR(err);\r
+}\r
+\r
+int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,\r
+                      enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);\r
+       struct mlx4_ib_srq *srq = to_msrq(ibsrq);\r
+       int ret;\r
+\r
+       UNUSED_PARAM(udata);\r
+       \r
+       /* We don't support resizing SRQs (yet?) */\r
+       if (attr_mask & XIB_SRQ_MAX_WR)\r
+               return -ENOSYS;\r
+\r
+       if (attr_mask & XIB_SRQ_LIMIT) {\r
+               if ((int)attr->srq_limit >= srq->msrq.max)\r
+                       return -ERANGE;\r
+\r
+               mutex_lock(&srq->mutex);\r
+               ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);\r
+               mutex_unlock(&srq->mutex);\r
+\r
+               if (ret)\r
+                       return ret;\r
+       }\r
+\r
+       return 0;\r
+}\r
+\r
+int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);\r
+       struct mlx4_ib_srq *srq = to_msrq(ibsrq);\r
+       int ret;\r
+       int limit_watermark;\r
+\r
+       ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);\r
+       if (ret)\r
+               return ret;\r
+\r
+       srq_attr->srq_limit = limit_watermark;\r
+       srq_attr->max_wr    = srq->msrq.max - 1;\r
+       srq_attr->max_sge   = srq->msrq.max_gs;\r
+\r
+       return 0;\r
+}\r
+\r
+int mlx4_ib_destroy_srq(struct ib_srq *srq)\r
+{\r
+       struct mlx4_ib_dev *dev = to_mdev(srq->device);\r
+       struct mlx4_ib_srq *msrq = to_msrq(srq);\r
+\r
+       mlx4_srq_invalidate(dev->dev, &msrq->msrq);\r
+       mlx4_srq_remove(dev->dev, &msrq->msrq);\r
+\r
+       mlx4_srq_free(dev->dev, &msrq->msrq);\r
+       mlx4_mtt_cleanup(dev->dev, &msrq->mtt);\r
+\r
+       if (srq->p_uctx) {\r
+               mlx4_ib_db_unmap_user(to_mucontext(srq->p_uctx), &msrq->db);\r
+               ib_umem_release(msrq->umem);\r
+       } else {\r
+               kfree(msrq->wrid);\r
+               mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,\r
+                             &msrq->buf);\r
+               mlx4_ib_db_free(dev, &msrq->db);\r
+       }\r
+\r
+       kfree(msrq);\r
+\r
+       return 0;\r
+}\r
+\r
+void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)\r
+{\r
+       struct mlx4_wqe_srq_next_seg *next;\r
+\r
+       /* always called with interrupts disabled. */\r
+       spin_lock(&srq->lock);\r
+\r
+       next = get_wqe(srq, srq->tail);\r
+       next->next_wqe_index = cpu_to_be16(wqe_index);\r
+       srq->tail = wqe_index;\r
+\r
+       spin_unlock(&srq->lock);\r
+}\r
+\r
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, ib_recv_wr_t *wr,\r
+                         ib_recv_wr_t **bad_wr)\r
+{\r
+       struct mlx4_ib_srq *srq = to_msrq(ibsrq);\r
+       struct mlx4_wqe_srq_next_seg *next;\r
+       struct mlx4_wqe_data_seg *scat;\r
+       unsigned long flags;\r
+       int err = 0;\r
+       int nreq;\r
+       int i;\r
+\r
+       spin_lock_irqsave(&srq->lock, &flags);\r
+\r
+       for (nreq = 0; wr; ++nreq, wr = wr->p_next) {\r
+               if (unlikely(wr->num_ds > (u32)srq->msrq.max_gs)) {\r
+                       err = -EINVAL;\r
+                       *bad_wr = wr;\r
+                       break;\r
+               }\r
+\r
+               if (unlikely(srq->head == srq->tail)) {\r
+                       err = -ENOMEM;\r
+                       *bad_wr = wr;\r
+                       break;\r
+               }\r
+\r
+               srq->wrid[srq->head] = wr->wr_id;\r
+\r
+               next      = get_wqe(srq, srq->head);\r
+               srq->head = be16_to_cpu(next->next_wqe_index);\r
+               scat      = (struct mlx4_wqe_data_seg *) (next + 1);\r
+\r
+               for (i = 0; i < (int)wr->num_ds; ++i) {\r
+                       scat[i].byte_count = cpu_to_be32(wr->ds_array[i].length);\r
+                       scat[i].lkey       = cpu_to_be32(wr->ds_array[i].lkey);\r
+                       scat[i].addr       = cpu_to_be64(wr->ds_array[i].vaddr);\r
+               }\r
+\r
+               if (i < srq->msrq.max_gs) {\r
+                       scat[i].byte_count = 0;\r
+                       scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);\r
+                       scat[i].addr       = 0;\r
+               }\r
+       }\r
+\r
+       if (likely(nreq)) {\r
+               srq->wqe_ctr = (u16)(srq->wqe_ctr + nreq);\r
+\r
+               /*\r
+                * Make sure that descriptors are written before\r
+                * doorbell record.\r
+                */\r
+               wmb();\r
+\r
+               *srq->db.db = cpu_to_be32(srq->wqe_ctr);\r
+       }\r
+\r
+       spin_unlock_irqrestore(&srq->lock, flags);\r
+\r
+       return err;\r
+}\r
index 3dc8441..1c6e832 100644 (file)
-/*
- * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
- * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $
- */
-
-#if !defined(IB_VERBS_H)
-#define IB_VERBS_H
-
-#include <iba\ib_ci.h>
-
-union ib_gid {
-       u8      raw[16];
-       struct {
-               __be64  subnet_prefix;
-               __be64  interface_id;
-       } global;
-};
-
-#include "ib_verbs_ex.h"
-
-enum rdma_node_type {
-       /* IB values map to NodeInfo:NodeType. */
-       RDMA_NODE_IB_CA         = 1,
-       RDMA_NODE_IB_SWITCH,
-       RDMA_NODE_IB_ROUTER,
-       RDMA_NODE_RNIC
-};
-
-enum rdma_transport_type {
-       RDMA_TRANSPORT_IB,
-       RDMA_TRANSPORT_IWARP
-};
-
-enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
-
-enum ib_device_cap_flags {
-       IB_DEVICE_RESIZE_MAX_WR         = 1,
-       IB_DEVICE_BAD_PKEY_CNTR         = (1<<1),
-       IB_DEVICE_BAD_QKEY_CNTR         = (1<<2),
-       IB_DEVICE_RAW_MULTI             = (1<<3),
-       IB_DEVICE_AUTO_PATH_MIG         = (1<<4),
-       IB_DEVICE_CHANGE_PHY_PORT       = (1<<5),
-       IB_DEVICE_UD_AV_PORT_ENFORCE    = (1<<6),
-       IB_DEVICE_CURR_QP_STATE_MOD     = (1<<7),
-       IB_DEVICE_SHUTDOWN_PORT         = (1<<8),
-       IB_DEVICE_INIT_TYPE             = (1<<9),
-       IB_DEVICE_PORT_ACTIVE_EVENT     = (1<<10),
-       IB_DEVICE_SYS_IMAGE_GUID        = (1<<11),
-       IB_DEVICE_RC_RNR_NAK_GEN        = (1<<12),
-       IB_DEVICE_SRQ_RESIZE            = (1<<13),
-       IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
-       IB_DEVICE_ZERO_STAG                     = (1<<15),
-       IB_DEVICE_SEND_W_INV            = (1<<16),
-       IB_DEVICE_MEM_WINDOW            = (1<<17),
-       IB_DEVICE_IPOIB_CSUM            = (1<<18)
-};
-
-enum ib_atomic_cap {
-       IB_ATOMIC_NON,
-       IB_ATOMIC_HCA,
-       IB_ATOMIC_GLOB
-};
-
-struct ib_device_attr {
-       u64                     fw_ver;
-       __be64                  sys_image_guid;
-       u64                     max_mr_size;
-       u64                     page_size_cap;
-       u32                     vendor_id;
-       u32                     vendor_part_id;
-       u32                     hw_ver;
-       int                     max_qp;
-       int                     max_qp_wr;
-       int                     device_cap_flags;
-       int                     max_sge;
-       int                     max_sge_rd;
-       int                     max_cq;
-       int                     max_cqe;
-       int                     max_mr;
-       int                     max_pd;
-       int                     max_qp_rd_atom;
-       int                     max_ee_rd_atom;
-       int                     max_res_rd_atom;
-       int                     max_qp_init_rd_atom;
-       int                     max_ee_init_rd_atom;
-       enum ib_atomic_cap      atomic_cap;
-       int                     max_ee;
-       int                     max_rdd;
-       int                     max_mw;
-       int                     max_raw_ipv6_qp;
-       int                     max_raw_ethy_qp;
-       int                     max_mcast_grp;
-       int                     max_mcast_qp_attach;
-       int                     max_total_mcast_qp_attach;
-       u64                     max_ah;
-       int                     max_fmr;
-       int                     max_map_per_fmr;
-       int                     max_srq;
-       int                     max_srq_wr;
-       int                     max_srq_sge;
-       u16                     max_pkeys;
-       u8                      local_ca_ack_delay;
-};
-
-enum ib_mtu {
-       IB_MTU_256  = 1,
-       IB_MTU_512  = 2,
-       IB_MTU_1024 = 3,
-       IB_MTU_2048 = 4,
-       IB_MTU_4096 = 5
-};
-
-static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
-{
-       switch (mtu) {
-       case IB_MTU_256:  return  256;
-       case IB_MTU_512:  return  512;
-       case IB_MTU_1024: return 1024;
-       case IB_MTU_2048: return 2048;
-       case IB_MTU_4096: return 4096;
-       default:          return -1;
-       }
-}
-
-enum ib_port_state {
-       IB_PORT_NOP             = 0,
-       IB_PORT_DOWN            = 1,
-       IB_PORT_INIT            = 2,
-       IB_PORT_ARMED           = 3,
-       IB_PORT_ACTIVE          = 4,
-       IB_PORT_ACTIVE_DEFER    = 5
-};
-
-enum ib_port_cap_flags {
-       IB_PORT_SM                              = 1 <<  1,
-       IB_PORT_NOTICE_SUP                      = 1 <<  2,
-       IB_PORT_TRAP_SUP                        = 1 <<  3,
-       IB_PORT_OPT_IPD_SUP                     = 1 <<  4,
-       IB_PORT_AUTO_MIGR_SUP                   = 1 <<  5,
-       IB_PORT_SL_MAP_SUP                      = 1 <<  6,
-       IB_PORT_MKEY_NVRAM                      = 1 <<  7,
-       IB_PORT_PKEY_NVRAM                      = 1 <<  8,
-       IB_PORT_LED_INFO_SUP                    = 1 <<  9,
-       IB_PORT_SM_DISABLED                     = 1 << 10,
-       IB_PORT_SYS_IMAGE_GUID_SUP              = 1 << 11,
-       IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP       = 1 << 12,
-       IB_PORT_CM_SUP                          = 1 << 16,
-       IB_PORT_SNMP_TUNNEL_SUP                 = 1 << 17,
-       IB_PORT_REINIT_SUP                      = 1 << 18,
-       IB_PORT_DEVICE_MGMT_SUP                 = 1 << 19,
-       IB_PORT_VENDOR_CLASS_SUP                = 1 << 20,
-       IB_PORT_DR_NOTICE_SUP                   = 1 << 21,
-       IB_PORT_CAP_MASK_NOTICE_SUP             = 1 << 22,
-       IB_PORT_BOOT_MGMT_SUP                   = 1 << 23,
-       IB_PORT_LINK_LATENCY_SUP                = 1 << 24,
-       IB_PORT_CLIENT_REG_SUP                  = 1 << 25
-};
-
-enum ib_port_width {
-       IB_WIDTH_1X     = 1,
-       IB_WIDTH_4X     = 2,
-       IB_WIDTH_8X     = 4,
-       IB_WIDTH_12X    = 8
-};
-
-static inline int ib_width_enum_to_int(enum ib_port_width width)
-{
-       switch (width) {
-       case IB_WIDTH_1X:  return  1;
-       case IB_WIDTH_4X:  return  4;
-       case IB_WIDTH_8X:  return  8;
-       case IB_WIDTH_12X: return 12;
-       default:          return -1;
-       }
-}
-
-struct ib_port_attr {
-       enum ib_port_state      state;
-       enum ib_mtu             max_mtu;
-       enum ib_mtu             active_mtu;
-       int                     gid_tbl_len;
-       u32                     port_cap_flags;
-       u32                     max_msg_sz;
-       u32                     bad_pkey_cntr;
-       u32                     qkey_viol_cntr;
-       u16                     pkey_tbl_len;
-       u16                     lid;
-       u16                     sm_lid;
-       u8                      lmc;
-       u8                      max_vl_num;
-       u8                      sm_sl;
-       u8                      subnet_timeout;
-       u8                      init_type_reply;
-       u8                      active_width;
-       u8                      active_speed;
-       u8                      phys_state;
-};
-
-enum ib_device_modify_flags {
-       IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0,
-       IB_DEVICE_MODIFY_NODE_DESC      = 1 << 1
-};
-
-struct ib_device_modify {
-       u64     sys_image_guid;
-       char    node_desc[64];
-};
-
-enum ib_port_modify_flags {
-       IB_PORT_SHUTDOWN                = 1,
-       IB_PORT_INIT_TYPE               = (1<<2),
-       IB_PORT_RESET_QKEY_CNTR         = (1<<3)
-};
-
-struct ib_port_modify {
-       u32     set_port_cap_mask;
-       u32     clr_port_cap_mask;
-       u8      init_type;
-};
-
-enum ib_event_type {
-       IB_EVENT_CQ_ERR                                                                 = IB_AE_CQ_ERROR,
-       IB_EVENT_QP_FATAL                                                               = IB_AE_QP_FATAL,
-       IB_EVENT_QP_REQ_ERR                                                     = IB_AE_WQ_REQ_ERROR,
-       IB_EVENT_QP_ACCESS_ERR                                  = IB_AE_WQ_ACCESS_ERROR,
-       IB_EVENT_COMM_EST                                                       = IB_AE_QP_COMM,
-       IB_EVENT_SQ_DRAINED                                             = IB_AE_SQ_DRAINED,
-       IB_EVENT_PATH_MIG                                                               = IB_AE_QP_APM,
-       IB_EVENT_PATH_MIG_ERR                                   = IB_AE_QP_APM_ERROR,
-       IB_EVENT_DEVICE_FATAL                                           = IB_AE_LOCAL_FATAL,
-       IB_EVENT_PORT_ACTIVE                                            = IB_AE_PORT_ACTIVE,
-       IB_EVENT_PORT_ERR                                                               = IB_AE_PORT_DOWN,
-       IB_EVENT_SRQ_LIMIT_REACHED                              = IB_AE_SRQ_LIMIT_REACHED,
-       IB_EVENT_SRQ_ERR                                                = IB_AE_SRQ_CATAS_ERROR,
-       IB_EVENT_QP_LAST_WQE_REACHED                    = IB_AE_SRQ_QP_LAST_WQE_REACHED,
-       IB_EVENT_LID_CHANGE                                                     = IB_AE_UNKNOWN + 1,
-       IB_EVENT_PKEY_CHANGE,
-       IB_EVENT_SM_CHANGE,
-       IB_EVENT_CLIENT_REREGISTER
-};
-
-struct ib_event {
-       struct ib_device        *device;
-       union {
-               struct ib_cq    *cq;
-               struct ib_qp    *qp;
-               struct ib_srq   *srq;
-               u8              port_num;
-       } element;
-       enum ib_event_type      event;
-       struct ib_event_ex      x;
-       };
-
-struct ib_event_handler {
-       struct ib_device *device;
-       void            (*handler)(struct ib_event_handler *, struct ib_event *);
-       void *            ctx;
-       struct list_head  list;
-};
-
-#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler, _ctx)           \
-       {                                                       \
-               (_ptr)->device  = _device;                      \
-               (_ptr)->handler = _handler;             \
-               (_ptr)->ctx = _ctx;             \
-               INIT_LIST_HEAD(&(_ptr)->list);                  \
-       }
-
-struct ib_global_route {
-       union ib_gid    dgid;
-       u32             flow_label;
-       u8              sgid_index;
-       u8              hop_limit;
-       u8              traffic_class;
-};
-
-struct ib_grh {
-       __be32          version_tclass_flow;
-       __be16          paylen;
-       u8              next_hdr;
-       u8              hop_limit;
-       union ib_gid    sgid;
-       union ib_gid    dgid;
-};
-
-enum {
-       IB_MULTICAST_QPN = 0xffffff
-};
-
-#define XIB_LID_PERMISSIVE     __constant_htons(0xFFFF)
-
-enum ib_ah_flags {
-       IB_AH_GRH       = 1
-};
-
-enum ib_rate {
-       IB_RATE_PORT_CURRENT = 0,
-       IB_RATE_2_5_GBPS = 2,
-       IB_RATE_5_GBPS   = 5,
-       IB_RATE_10_GBPS  = 3,
-       IB_RATE_20_GBPS  = 6,
-       IB_RATE_30_GBPS  = 4,
-       IB_RATE_40_GBPS  = 7,
-       IB_RATE_60_GBPS  = 8,
-       IB_RATE_80_GBPS  = 9,
-       IB_RATE_120_GBPS = 10
-};
-
-/**
- * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
- * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be
- * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
- * @rate: rate to convert.
- */
-int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
-
-/**
- * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
- * enum.
- * @mult: multiple to convert.
- */
-enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
-
-struct ib_ah_attr {
-       struct ib_global_route  grh;
-       u16                     dlid;
-       u8                      sl;
-       u8                      src_path_bits;
-       u8                      static_rate;
-       u8                      ah_flags;
-       u8                      port_num;
-};
-
-enum ib_wc_status {
-       IB_WC_SUCCESS,
-       IB_WC_LOC_LEN_ERR,
-       IB_WC_LOC_QP_OP_ERR,
-       IB_WC_LOC_EEC_OP_ERR,
-       IB_WC_LOC_PROT_ERR,
-       IB_WC_WR_FLUSH_ERR,
-       IB_WC_MW_BIND_ERR,
-       IB_WC_BAD_RESP_ERR,
-       IB_WC_LOC_ACCESS_ERR,
-       IB_WC_REM_INV_REQ_ERR,
-       IB_WC_REM_ACCESS_ERR,
-       IB_WC_REM_OP_ERR,
-       IB_WC_RETRY_EXC_ERR,
-       IB_WC_RNR_RETRY_EXC_ERR,
-       IB_WC_LOC_RDD_VIOL_ERR,
-       IB_WC_REM_INV_RD_REQ_ERR,
-       IB_WC_REM_ABORT_ERR,
-       IB_WC_INV_EECN_ERR,
-       IB_WC_INV_EEC_STATE_ERR,
-       IB_WC_FATAL_ERR,
-       IB_WC_RESP_TIMEOUT_ERR,
-       IB_WC_GENERAL_ERR
-};
-
-enum ib_wc_opcode {
-       XIB_WC_SEND,
-       XIB_WC_RDMA_WRITE,
-       XIB_WC_RDMA_READ,
-       XIB_WC_COMP_SWAP,
-       XIB_WC_FETCH_ADD,
-       XIB_WC_BIND_MW,
-/*
- * Set value of XIB_WC_RECV so consumers can test if a completion is a
- * receive by testing (opcode & XIB_WC_RECV).
- */
-       XIB_WC_RECV                     = 1 << 7,
-       XIB_WC_RECV_RDMA_WITH_IMM
-};
-
-enum ib_wc_flags {
-       IB_WC_GRH               = 1,
-       IB_WC_WITH_IMM          = (1<<1),
-       IB_WC_FORWARD           = (1<<2)
-};
-
-struct ib_wc {
-       u64                     wr_id;
-       enum ib_wc_status       status;
-       enum ib_wc_opcode       opcode;
-       u32                     vendor_err;
-       u32                     byte_len;
-       struct ib_qp           *qp;
-       __be32                  imm_data;
-       u32                     src_qp;
-       int                     wc_flags;
-       u16                     pkey_index;
-       u16                     slid;
-       u8                      sl;
-       u8                      dlid_path_bits;
-       u8                      port_num;       /* valid only for DR SMPs on switches */
-};
-
-enum ib_cq_notify_flags {
-       IB_CQ_SOLICITED                 = 1 << 0,
-       IB_CQ_NEXT_COMP                 = 1 << 1,
-       IB_CQ_SOLICITED_MASK            = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
-       IB_CQ_REPORT_MISSED_EVENTS      = 1 << 2,
-};
-
-enum ib_srq_attr_mask {
-       XIB_SRQ_MAX_WR  = 1 << 0,
-       XIB_SRQ_LIMIT   = 1 << 1,
-};
-
-struct ib_srq_attr {
-       u32     max_wr;
-       u32     max_sge;
-       u32     srq_limit;
-};
-
-struct ib_srq_init_attr {
-       void                  (*event_handler)(struct ib_event *, void *);
-       void                   *srq_context;
-       struct ib_srq_attr      attr;
-};
-
-struct ib_qp_cap {
-       u32     max_send_wr;
-       u32     max_recv_wr;
-       u32     max_send_sge;
-       u32     max_recv_sge;
-       u32     max_inline_data;
-};
-
-enum ib_sig_type {
-       IB_SIGNAL_ALL_WR,
-       IB_SIGNAL_REQ_WR
-};
-
-enum ib_qp_type {
-       /*
-        * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
-        * here (and in that order) since the MAD layer uses them as
-        * indices into a 2-entry table.
-        */
-       IB_QPT_SMI,
-       IB_QPT_GSI,
-
-       IB_QPT_RC,
-       IB_QPT_UC,
-       IB_QPT_UD,
-       IB_QPT_RAW_IP_V6,
-       IB_QPT_RAW_ETY
-};
-
-struct ib_qp_init_attr {
-       void                  (*event_handler)(struct ib_event *, void *);
-       void                   *qp_context;
-       struct ib_cq           *send_cq;
-       struct ib_cq           *recv_cq;
-       struct ib_srq          *srq;
-       struct ib_qp_cap        cap;
-       enum ib_sig_type        sq_sig_type;
-       enum ib_qp_type         qp_type;
-       u8                      port_num; /* special QP types only */
-};
-
-enum ib_rnr_timeout {
-       IB_RNR_TIMER_655_36 =  0,
-       IB_RNR_TIMER_000_01 =  1,
-       IB_RNR_TIMER_000_02 =  2,
-       IB_RNR_TIMER_000_03 =  3,
-       IB_RNR_TIMER_000_04 =  4,
-       IB_RNR_TIMER_000_06 =  5,
-       IB_RNR_TIMER_000_08 =  6,
-       IB_RNR_TIMER_000_12 =  7,
-       IB_RNR_TIMER_000_16 =  8,
-       IB_RNR_TIMER_000_24 =  9,
-       IB_RNR_TIMER_000_32 = 10,
-       IB_RNR_TIMER_000_48 = 11,
-       IB_RNR_TIMER_000_64 = 12,
-       IB_RNR_TIMER_000_96 = 13,
-       IB_RNR_TIMER_001_28 = 14,
-       IB_RNR_TIMER_001_92 = 15,
-       IB_RNR_TIMER_002_56 = 16,
-       IB_RNR_TIMER_003_84 = 17,
-       IB_RNR_TIMER_005_12 = 18,
-       IB_RNR_TIMER_007_68 = 19,
-       IB_RNR_TIMER_010_24 = 20,
-       IB_RNR_TIMER_015_36 = 21,
-       IB_RNR_TIMER_020_48 = 22,
-       IB_RNR_TIMER_030_72 = 23,
-       IB_RNR_TIMER_040_96 = 24,
-       IB_RNR_TIMER_061_44 = 25,
-       IB_RNR_TIMER_081_92 = 26,
-       IB_RNR_TIMER_122_88 = 27,
-       IB_RNR_TIMER_163_84 = 28,
-       IB_RNR_TIMER_245_76 = 29,
-       IB_RNR_TIMER_327_68 = 30,
-       IB_RNR_TIMER_491_52 = 31
-};
-       
-enum ib_qp_attr_mask {
-       IB_QP_STATE                     = 1,
-       IB_QP_CUR_STATE                 = (1<<1),
-       IB_QP_EN_SQD_ASYNC_NOTIFY       = (1<<2),
-       IB_QP_ACCESS_FLAGS              = (1<<3),
-       IB_QP_PKEY_INDEX                = (1<<4),
-       IB_QP_PORT                      = (1<<5),
-       IB_QP_QKEY                      = (1<<6),
-       IB_QP_AV                        = (1<<7),
-       IB_QP_PATH_MTU                  = (1<<8),
-       IB_QP_TIMEOUT                   = (1<<9),
-       IB_QP_RETRY_CNT                 = (1<<10),
-       IB_QP_RNR_RETRY                 = (1<<11),
-       IB_QP_RQ_PSN                    = (1<<12),
-       IB_QP_MAX_QP_RD_ATOMIC          = (1<<13),
-       IB_QP_ALT_PATH                  = (1<<14),
-       IB_QP_MIN_RNR_TIMER             = (1<<15),
-       IB_QP_SQ_PSN                    = (1<<16),
-       IB_QP_MAX_DEST_RD_ATOMIC        = (1<<17),
-       IB_QP_PATH_MIG_STATE            = (1<<18),
-       IB_QP_CAP                       = (1<<19),
-       IB_QP_DEST_QPN                  = (1<<20)
-};
-
-enum ib_qp_state {
-       XIB_QPS_RESET,
-       XIB_QPS_INIT,
-       XIB_QPS_RTR,
-       XIB_QPS_RTS,
-       XIB_QPS_SQD,
-       XIB_QPS_SQE,
-       XIB_QPS_ERR
-};
-
-enum ib_mig_state {
-       IB_MIG_MIGRATED,
-       IB_MIG_REARM,
-       IB_MIG_ARMED
-};
-
-struct ib_qp_attr {
-       enum ib_qp_state        qp_state;
-       enum ib_qp_state        cur_qp_state;
-       enum ib_mtu             path_mtu;
-       enum ib_mig_state       path_mig_state;
-       u32                     qkey;
-       u32                     rq_psn;
-       u32                     sq_psn;
-       u32                     dest_qp_num;
-       int                     qp_access_flags;
-       struct ib_qp_cap        cap;
-       struct ib_ah_attr       ah_attr;
-       struct ib_ah_attr       alt_ah_attr;
-       u16                     pkey_index;
-       u16                     alt_pkey_index;
-       u8                      en_sqd_async_notify;
-       u8                      sq_draining;
-       u8                      max_rd_atomic;
-       u8                      max_dest_rd_atomic;
-       u8                      min_rnr_timer;
-       u8                      port_num;
-       u8                      timeout;
-       u8                      retry_cnt;
-       u8                      rnr_retry;
-       u8                      alt_port_num;
-       u8                      alt_timeout;
-};
-
-enum ib_wr_opcode {
-       IB_WR_RDMA_WRITE,
-       IB_WR_RDMA_WRITE_WITH_IMM,
-       IB_WR_SEND,
-       IB_WR_SEND_WITH_IMM,
-       IB_WR_RDMA_READ,
-       IB_WR_ATOMIC_CMP_AND_SWP,
-       IB_WR_ATOMIC_FETCH_AND_ADD
-};
-
-enum ib_send_flags {
-       IB_SEND_FENCE           = 1,
-       IB_SEND_SIGNALED        = (1<<1),
-       IB_SEND_SOLICITED       = (1<<2),
-       IB_SEND_INLINE          = (1<<3)
-};
-
-struct ib_sge {
-       u64     addr;
-       u32     length;
-       u32     lkey;
-};
-
-struct ib_send_wr {
-       struct ib_send_wr      *next;
-       u64                     wr_id;
-       struct ib_sge          *sg_list;
-       int                     num_sge;
-       enum ib_wr_opcode       opcode;
-       int                     send_flags;
-       __be32                  imm_data;
-       union {
-               struct {
-                       u64     remote_addr;
-                       u32     rkey;
-               } rdma;
-               struct {
-                       u64     remote_addr;
-                       u64     compare_add;
-                       u64     swap;
-                       u32     rkey;
-               } atomic;
-               struct {
-                       struct ib_ah *ah;
-                       u32     remote_qpn;
-                       u32     remote_qkey;
-                       u16     pkey_index; /* valid for GSI only */
-                       u8      port_num;   /* valid for DR SMPs on switch only */
-               } ud;
-       } wr;
-};
-
-struct ib_recv_wr {
-       struct ib_recv_wr      *next;
-       u64                     wr_id;
-       struct ib_sge          *sg_list;
-       int                     num_sge;
-};
-
-enum ib_access_flags {
-       IB_ACCESS_LOCAL_WRITE   = 1,
-       IB_ACCESS_REMOTE_WRITE  = (1<<1),
-       IB_ACCESS_REMOTE_READ   = (1<<2),
-       IB_ACCESS_REMOTE_ATOMIC = (1<<3),
-       IB_ACCESS_MW_BIND       = (1<<4)
-};
-
-struct ib_phys_buf {
-       u64      addr;
-       u64      size;
-};
-
-struct ib_mr_attr {
-       struct ib_pd    *pd;
-       u64             device_virt_addr;
-       u64             size;
-       int             mr_access_flags;
-       u32             lkey;
-       u32             rkey;
-};
-
-enum ib_mr_rereg_flags {
-       IB_MR_REREG_TRANS       = 1,
-       IB_MR_REREG_PD          = (1<<1),
-       IB_MR_REREG_ACCESS      = (1<<2)
-};
-
-struct ib_mw_bind {
-       struct ib_mr   *mr;
-       u64             wr_id;
-       u64             addr;
-       u32             length;
-       int             send_flags;
-       int             mw_access_flags;
-};
-
-struct ib_fmr_attr {
-       int     max_pages;
-       int     max_maps;
-       u8      page_shift;
-};
-struct ib_ucontext {
-       struct ib_device       *device;
-       int                     closing;
-       struct ib_ucontext_ex   x;
-};
-
-struct ib_udata {
-       void        *inbuf;
-       void        *outbuf;
-       size_t       inlen;
-       size_t       outlen;
-};
-
-#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)                      \
-       {                                                               \
-               (udata)->inbuf  = (void *) (ibuf);              \
-               (udata)->outbuf = (void *) (obuf);              \
-               (udata)->inlen  = (ilen);                               \
-               (udata)->outlen = (olen);                               \
-       }
-
-struct ib_pd {
-       struct ib_device       *device;
-       struct ib_ucontext     *p_uctx;
-       atomic_t                usecnt; /* count all resources */
-};
-
-struct ib_ah {
-       struct ib_device        *device;
-       struct ib_pd            *pd;
-       struct ib_ucontext      *p_uctx;
-};
-
-typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
-
-struct ib_cq {
-       struct ib_device       *device;
-       struct ib_ucontext     *p_uctx;
-       ib_comp_handler         comp_handler;
-       void                  (*event_handler)(struct ib_event *, void *);
-       void *                  cq_context;
-       int                     cqe;
-       atomic_t                usecnt; /* count number of work queues */
-       struct ib_cq_ex         x;
-};
-
-struct ib_srq {
-       struct ib_device       *device;
-       struct ib_pd           *pd;
-       struct ib_ucontext     *p_uctx;
-       void                  (*event_handler)(struct ib_event *, void *);
-       void                   *srq_context;
-       atomic_t                usecnt;
-       struct ib_srq_ex        x;
-};
-
-struct ib_qp {
-       struct ib_device       *device;
-       struct ib_pd           *pd;
-       struct ib_cq           *send_cq;
-       struct ib_cq           *recv_cq;
-       struct ib_srq          *srq;
-       struct ib_ucontext     *p_uctx;
-       void                  (*event_handler)(struct ib_event *, void *);
-       void                   *qp_context;
-       u32                     qp_num;
-       enum ib_qp_type         qp_type;
-       struct ib_qp_ex         x;
-};
-
-struct ib_mr {
-       struct ib_device  *device;
-       struct ib_pd      *pd;
-       struct ib_ucontext *p_uctx;
-       u32                lkey;
-       u32                rkey;
-       atomic_t           usecnt; /* count number of MWs */
-};
-
-struct ib_mw {
-       struct ib_device        *device;
-       struct ib_pd            *pd;
-       struct ib_ucontext  *p_uctx;
-       u32                     rkey;
-};
-
-struct ib_fmr {
-       struct ib_device        *device;
-       struct ib_pd            *pd;
-       struct list_head        list;
-       u32                     lkey;
-       u32                     rkey;
-};
-
-struct ib_mad;
-struct ib_grh;
-
-enum ib_process_mad_flags {
-       IB_MAD_IGNORE_MKEY      = 1,
-       IB_MAD_IGNORE_BKEY      = 2,
-       IB_MAD_IGNORE_ALL       = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
-};
-
-enum ib_mad_result {
-       IB_MAD_RESULT_FAILURE  = 0,      /* (!SUCCESS is the important flag) */
-       IB_MAD_RESULT_SUCCESS  = 1 << 0, /* MAD was successfully processed   */
-       IB_MAD_RESULT_REPLY    = 1 << 1, /* Reply packet needs to be sent    */
-       IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */
-};
-
-#define IB_DEVICE_NAME_MAX 64
-
-struct ib_cache {
-       rwlock_t                lock;
-       struct ib_event_handler event_handler;
-       struct ib_pkey_cache  **pkey_cache;
-       struct ib_gid_cache   **gid_cache;
-       u8                     *lmc_cache;
-       struct ib_cache_ex      x;
-};
-
-struct ib_dma_mapping_ops {
-       int             (*mapping_error)(struct ib_device *dev,
-                                        u64 dma_addr);
-       u64             (*map_single)(struct ib_device *dev,
-                                     void *ptr, size_t size,
-                                     enum dma_data_direction direction);
-       void            (*unmap_single)(struct ib_device *dev,
-                                       u64 addr, size_t size,
-                                       enum dma_data_direction direction);
-       u64             (*map_page)(struct ib_device *dev,
-                                   dma_addr_t page, unsigned long offset,
-                                   size_t size,
-                                   enum dma_data_direction direction);
-       void            (*unmap_page)(struct ib_device *dev,
-                                     u64 addr, size_t size,
-                                     enum dma_data_direction direction);
-       int             (*map_sg)(struct ib_device *dev,
-                                 struct scatterlist *sg, int nents,
-                                 enum dma_data_direction direction);
-       void            (*unmap_sg)(struct ib_device *dev,
-                                   struct scatterlist *sg, int nents,
-                                   enum dma_data_direction direction);
-       u64             (*dma_address)(struct ib_device *dev,
-                                      struct scatterlist *sg);
-       unsigned int    (*dma_len)(struct ib_device *dev,
-                                  struct scatterlist *sg);
-       void            (*sync_single_for_cpu)(struct ib_device *dev,
-                                              u64 dma_handle,
-                                              size_t size,
-                                              enum dma_data_direction dir);
-       void            (*sync_single_for_device)(struct ib_device *dev,
-                                                 u64 dma_handle,
-                                                 size_t size,
-                                                 enum dma_data_direction dir);
-       void            *(*alloc_coherent)(struct ib_device *dev,
-                                          size_t size,
-                                          u64 *dma_handle,
-                                          gfp_t flag);
-       void            (*free_coherent)(struct ib_device *dev,
-                                        size_t size, void *cpu_addr,
-                                        u64 dma_handle);
-};
-
-struct iw_cm_verbs;
-
-struct ib_device {
-       struct mlx4_dev                *dma_device;
-
-       char                          name[IB_DEVICE_NAME_MAX];
-
-       struct list_head              event_handler_list;
-       spinlock_t                    event_handler_lock;
-
-       struct list_head              core_list;
-       struct list_head              client_data_list;
-       spinlock_t                    client_data_lock;
-
-       struct ib_cache               cache;
-       int                          *pkey_tbl_len;
-       int                          *gid_tbl_len;
-
-       u32                           flags;
-
-       int                           num_comp_vectors;
-
-       struct iw_cm_verbs           *iwcm;
-
-       int                        (*query_device)(struct ib_device *device,
-                                                  struct ib_device_attr *device_attr);
-       int                        (*query_port)(struct ib_device *device,
-                                                u8 port_num,
-                                                struct ib_port_attr *port_attr);
-       int                        (*query_gid_chunk)(struct ib_device *device,
-                                               u8 port_num, int index,
-                                               union ib_gid gid[8], int size);
-       int                        (*query_pkey_chunk)(struct ib_device *device,
-                                                u8 port_num, u16 index, __be16 pkey[32], int size);
-       int                        (*modify_device)(struct ib_device *device,
-                                                   int device_modify_mask,
-                                                   struct ib_device_modify *device_modify);
-       int                        (*modify_port)(struct ib_device *device,
-                                                 u8 port_num, int port_modify_mask,
-                                                 struct ib_port_modify *port_modify);
-       struct ib_ucontext *       (*alloc_ucontext)(struct ib_device *device,
-                                                    struct ib_udata *udata);
-       int                        (*dealloc_ucontext)(struct ib_ucontext *context);
-       int                        (*mmap)(struct ib_ucontext *context,
-                                          struct vm_area_struct *vma);
-       struct ib_pd *             (*alloc_pd)(struct ib_device *device,
-                                              struct ib_ucontext *context,
-                                              struct ib_udata *udata);
-       int                        (*dealloc_pd)(struct ib_pd *pd);
-       struct ib_ah *             (*create_ah)(struct ib_pd *pd,
-                                               struct ib_ah_attr *ah_attr);
-       int                        (*modify_ah)(struct ib_ah *ah,
-                                               struct ib_ah_attr *ah_attr);
-       int                        (*query_ah)(struct ib_ah *ah,
-                                              struct ib_ah_attr *ah_attr);
-       int                        (*destroy_ah)(struct ib_ah *ah);
-       struct ib_srq *            (*create_srq)(struct ib_pd *pd,
-                                                struct ib_srq_init_attr *srq_init_attr,
-                                                struct ib_udata *udata);
-       int                        (*modify_srq)(struct ib_srq *srq,
-                                                struct ib_srq_attr *srq_attr,
-                                                enum ib_srq_attr_mask srq_attr_mask,
-                                                struct ib_udata *udata);
-       int                        (*query_srq)(struct ib_srq *srq,
-                                               struct ib_srq_attr *srq_attr);
-       int                        (*destroy_srq)(struct ib_srq *srq);
-       int                        (*post_srq_recv)(struct ib_srq *srq,
-                                                   ib_recv_wr_t *recv_wr,
-                                                   ib_recv_wr_t **bad_recv_wr);
-       struct ib_qp *             (*create_qp)(struct ib_pd *pd,
-                                               struct ib_qp_init_attr *qp_init_attr,
-                                               struct ib_udata *udata);
-       int                        (*modify_qp)(struct ib_qp *qp,
-                                               struct ib_qp_attr *qp_attr,
-                                               int qp_attr_mask,
-                                               struct ib_udata *udata);
-       int                        (*query_qp)(struct ib_qp *qp,
-                                              struct ib_qp_attr *qp_attr,
-                                              int qp_attr_mask,
-                                              struct ib_qp_init_attr *qp_init_attr);
-       int                        (*destroy_qp)(struct ib_qp *qp);
-       int                        (*post_send)(struct ib_qp *qp,
-                                               ib_send_wr_t *send_wr,
-                                               ib_send_wr_t **bad_send_wr);
-       int                        (*post_recv)(struct ib_qp *qp,
-                                               ib_recv_wr_t *recv_wr,
-                                               ib_recv_wr_t **bad_recv_wr);
-       struct ib_cq *             (*create_cq)(struct ib_device *device, int cqe,
-                                               int comp_vector,
-                                               struct ib_ucontext *context,
-                                               struct ib_udata *udata);
-       int                        (*modify_cq)(struct ib_cq *cq, u16 cq_count,
-                                               u16 cq_period);
-       int                        (*destroy_cq)(struct ib_cq *cq);
-       int                        (*resize_cq)(struct ib_cq *cq, int cqe,
-                                               struct ib_udata *udata);
-       int                        (*poll_cq)(struct ib_cq *ibcq,
-                       ib_wc_t** const pp_free_wclist, ib_wc_t** const pp_done_wclist);
-       int                        (*peek_cq)(struct ib_cq *cq, int wc_cnt);
-       int                        (*req_notify_cq)(struct ib_cq *cq,
-                                                   enum ib_cq_notify_flags flags);
-       int                        (*req_ncomp_notif)(struct ib_cq *cq,
-                                                     int wc_cnt);
-       struct ib_mr *             (*get_dma_mr)(struct ib_pd *pd,
-                                                int mr_access_flags);
-       struct ib_mr *             (*reg_phys_mr)(struct ib_pd *pd,
-                                                 struct ib_phys_buf *phys_buf_array,
-                                                 int num_phys_buf,
-                                                 int mr_access_flags,
-                                                 u64 *iova_start);
-       struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd,
-                                                 u64 start, u64 length,
-                                                 u64 virt_addr,
-                                                 int mr_access_flags,
-                                                 struct ib_udata *udata);
-       int                        (*query_mr)(struct ib_mr *mr,
-                                              struct ib_mr_attr *mr_attr);
-       int                        (*dereg_mr)(struct ib_mr *mr);
-       int                        (*rereg_phys_mr)(struct ib_mr *mr,
-                                                   int mr_rereg_mask,
-                                                   struct ib_pd *pd,
-                                                   struct ib_phys_buf *phys_buf_array,
-                                                   int num_phys_buf,
-                                                   int mr_access_flags,
-                                                   u64 *iova_start);
-       struct ib_mw *             (*alloc_mw)(struct ib_pd *pd);
-       int                        (*bind_mw)(struct ib_qp *qp,
-                                             struct ib_mw *mw,
-                                             struct ib_mw_bind *mw_bind);
-       int                        (*dealloc_mw)(struct ib_mw *mw);
-       struct ib_fmr *            (*alloc_fmr)(struct ib_pd *pd,
-                                               int mr_access_flags,
-                                               struct ib_fmr_attr *fmr_attr);
-       int                        (*map_phys_fmr)(struct ib_fmr *fmr,
-                                                  u64 *page_list, int list_len,
-                                                  u64 iova);
-       int                        (*unmap_fmr)(struct list_head *fmr_list);
-       int                        (*dealloc_fmr)(struct ib_fmr *fmr);
-       int                        (*attach_mcast)(struct ib_qp *qp,
-                                                  union ib_gid *gid,
-                                                  u16 lid);
-       int                        (*detach_mcast)(struct ib_qp *qp,
-                                                  union ib_gid *gid,
-                                                  u16 lid);
-       int                        (*process_mad)(struct ib_device *device,
-                                                 int process_mad_flags,
-                                                 u8 port_num,
-                                                 ib_wc_t *in_wc,
-                                                 struct ib_grh *in_grh,
-                                                 struct ib_mad *in_mad,
-                                                 struct ib_mad *out_mad);
-
-       struct ib_dma_mapping_ops   *dma_ops;
-       struct list_head             port_list;
-
-       enum {
-               IB_DEV_UNINITIALIZED,
-               IB_DEV_REGISTERED,
-               IB_DEV_UNREGISTERED
-       }                            reg_state;
-
-       u64                          uverbs_cmd_mask;
-       int                          uverbs_abi_ver;
-
-       char                         node_desc[64];
-       __be64                       node_guid;
-       u8                           node_type;
-       u8                           phys_port_cnt;
-       struct ib_device_ex          x;
-};
-
-struct ib_client {
-       char  *name;
-       void (*add)   (struct ib_device *);
-       void (*remove)(struct ib_device *);
-
-       struct list_head list;
-};
-
-struct ib_device *ib_alloc_device(size_t size);
-void ib_dealloc_device(struct ib_device *device);
-
-int ib_register_device   (struct ib_device *device);
-void ib_unregister_device(struct ib_device *device);
-
-int ib_register_client   (struct ib_client *client);
-void ib_unregister_client(struct ib_client *client);
-
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
-void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
-                        void *data);
-
-static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
-{
-       if (len > udata->inlen)
-               return -EFAULT;
-       memcpy(dest, udata->inbuf, len);
-       return 0;
-}
-
-static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
-{
-       if (len > udata->outlen)
-               return -EFAULT;
-       memcpy(udata->outbuf, src, len);
-       return 0;
-}
-
-/**
- * ib_modify_qp_is_ok - Check that the supplied attribute mask
- * contains all required attributes and no attributes not allowed for
- * the given QP state transition.
- * @cur_state: Current QP state
- * @next_state: Next QP state
- * @type: QP type
- * @mask: Mask of supplied QP attributes
- *
- * This function is a helper function that a low-level driver's
- * modify_qp method can use to validate the consumer's input.  It
- * checks that cur_state and next_state are valid QP states, that a
- * transition from cur_state to next_state is allowed by the IB spec,
- * and that the attribute mask supplied is allowed for the transition.
- */
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
-                      enum ib_qp_type type, enum ib_qp_attr_mask mask);
-
-int ib_register_event_handler  (struct ib_event_handler *event_handler);
-int ib_unregister_event_handler(struct ib_event_handler *event_handler);
-void ib_dispatch_event(struct ib_event *event);
-
-int ib_query_device(struct ib_device *device,
-                   struct ib_device_attr *device_attr);
-
-int ib_query_port(struct ib_device *device,
-                 u8 port_num, struct ib_port_attr *port_attr);
-
-int ib_query_gid_chunk(struct ib_device *device,
-                u8 port_num, int index, union ib_gid gid[8], int size);
-
-int ib_query_pkey_chunk(struct ib_device *device,
-                 u8 port_num, u16 index, __be16 pkey[32], int size);
-
-int ib_modify_device(struct ib_device *device,
-                    int device_modify_mask,
-                    struct ib_device_modify *device_modify);
-
-int ib_modify_port(struct ib_device *device,
-                  u8 port_num, int port_modify_mask,
-                  struct ib_port_modify *port_modify);
-
-int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               u8 *port_num, u16 *index);
-
-int ib_find_pkey(struct ib_device *device,
-                u8 port_num, __be16 pkey, u16 *index);
-
-/**
- * ib_alloc_pd - Allocates an unused protection domain.
- * @device: The device on which to allocate the protection domain.
- *
- * A protection domain object provides an association between QPs, shared
- * receive queues, address handles, memory regions, and memory windows.
- */
-struct ib_pd *ib_alloc_pd(struct ib_device *device);
-
-/**
- * ib_dealloc_pd - Deallocates a protection domain.
- * @pd: The protection domain to deallocate.
- */
-int ib_dealloc_pd(struct ib_pd *pd);
-
-/**
- * ib_create_ah - Creates an address handle for the given address vector.
- * @pd: The protection domain associated with the address handle.
- * @ah_attr: The attributes of the address vector.
- *
- * The address handle is used to reference a local or global destination
- * in all UD QP post sends.
- */
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
-
-/**
- * ib_init_ah_from_wc - Initializes address handle attributes from a
- *   work completion.
- * @device: Device on which the received message arrived.
- * @port_num: Port on which the received message arrived.
- * @wc: Work completion associated with the received message.
- * @grh: References the received global route header.  This parameter is
- *   ignored unless the work completion indicates that the GRH is valid.
- * @ah_attr: Returned attributes that can be used when creating an address
- *   handle for replying to the message.
- */
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, ib_wc_t *wc,
-                      struct ib_grh *grh, struct ib_ah_attr *ah_attr);
-
-/**
- * ib_create_ah_from_wc - Creates an address handle associated with the
- *   sender of the specified work completion.
- * @pd: The protection domain associated with the address handle.
- * @wc: Work completion information associated with a received message.
- * @grh: References the received global route header.  This parameter is
- *   ignored unless the work completion indicates that the GRH is valid.
- * @port_num: The outbound port number to associate with the address.
- *
- * The address handle is used to reference a local or global destination
- * in all UD QP post sends.
- */
-struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, ib_wc_t *wc,
-                                  struct ib_grh *grh, u8 port_num);
-
-/**
- * ib_modify_ah - Modifies the address vector associated with an address
- *   handle.
- * @ah: The address handle to modify.
- * @ah_attr: The new address vector attributes to associate with the
- *   address handle.
- */
-int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-/**
- * ib_query_ah - Queries the address vector associated with an address
- *   handle.
- * @ah: The address handle to query.
- * @ah_attr: The address vector attributes associated with the address
- *   handle.
- */
-int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-/**
- * ib_destroy_ah - Destroys an address handle.
- * @ah: The address handle to destroy.
- */
-int ib_destroy_ah(struct ib_ah *ah);
-
-/**
- * ib_create_srq - Creates a SRQ associated with the specified protection
- *   domain.
- * @pd: The protection domain associated with the SRQ.
- * @srq_init_attr: A list of initial attributes required to create the
- *   SRQ.  If SRQ creation succeeds, then the attributes are updated to
- *   the actual capabilities of the created SRQ.
- *
- * srq_attr->max_wr and srq_attr->max_sge are read the determine the
- * requested size of the SRQ, and set to the actual values allocated
- * on return.  If ib_create_srq() succeeds, then max_wr and max_sge
- * will always be at least as large as the requested values.
- */
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
-                            struct ib_srq_init_attr *srq_init_attr);
-
-/**
- * ib_modify_srq - Modifies the attributes for the specified SRQ.
- * @srq: The SRQ to modify.
- * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
- *   the current values of selected SRQ attributes are returned.
- * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
- *   are being modified.
- *
- * The mask may contain XIB_SRQ_MAX_WR to resize the SRQ and/or
- * XIB_SRQ_LIMIT to set the SRQ's limit and request notification when
- * the number of receives queued drops below the limit.
- */
-int ib_modify_srq(struct ib_srq *srq,
-                 struct ib_srq_attr *srq_attr,
-                 enum ib_srq_attr_mask srq_attr_mask);
-
-/**
- * ib_query_srq - Returns the attribute list and current values for the
- *   specified SRQ.
- * @srq: The SRQ to query.
- * @srq_attr: The attributes of the specified SRQ.
- */
-int ib_query_srq(struct ib_srq *srq,
-                struct ib_srq_attr *srq_attr);
-
-/**
- * ib_destroy_srq - Destroys the specified SRQ.
- * @srq: The SRQ to destroy.
- */
-int ib_destroy_srq(struct ib_srq *srq);
-
-/**
- * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
- * @srq: The SRQ to post the work request on.
- * @recv_wr: A list of work requests to post on the receive queue.
- * @bad_recv_wr: On an immediate failure, this parameter will reference
- *   the work request that failed to be posted on the QP.
- */
-static inline int ib_post_srq_recv(struct ib_srq *srq,
-                                  ib_recv_wr_t *recv_wr,
-                                  ib_recv_wr_t **bad_recv_wr)
-{
-       return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
-}
-
-/**
- * ib_create_qp - Creates a QP associated with the specified protection
- *   domain.
- * @pd: The protection domain associated with the QP.
- * @qp_init_attr: A list of initial attributes required to create the
- *   QP.  If QP creation succeeds, then the attributes are updated to
- *   the actual capabilities of the created QP.
- */
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
-                          struct ib_qp_init_attr *qp_init_attr);
-
-/**
- * ib_modify_qp - Modifies the attributes for the specified QP and then
- *   transitions the QP to the given state.
- * @qp: The QP to modify.
- * @qp_attr: On input, specifies the QP attributes to modify.  On output,
- *   the current values of selected QP attributes are returned.
- * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
- *   are being modified.
- */
-int ib_modify_qp(struct ib_qp *qp,
-                struct ib_qp_attr *qp_attr,
-                int qp_attr_mask);
-
-/**
- * ib_query_qp - Returns the attribute list and current values for the
- *   specified QP.
- * @qp: The QP to query.
- * @qp_attr: The attributes of the specified QP.
- * @qp_attr_mask: A bit-mask used to select specific attributes to query.
- * @qp_init_attr: Additional attributes of the selected QP.
- *
- * The qp_attr_mask may be used to limit the query to gathering only the
- * selected attributes.
- */
-int ib_query_qp(struct ib_qp *qp,
-               struct ib_qp_attr *qp_attr,
-               int qp_attr_mask,
-               struct ib_qp_init_attr *qp_init_attr);
-
-/**
- * ib_modify_cq - Modifies moderation params of the CQ
- * @cq: The CQ to modify.
- * @cq_count: number of CQEs that will tirgger an event
- * @cq_period: max period of time beofre triggering an event
- *
- * Users can examine the cq structure to determine the actual CQ size.
- */
-int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
-
-/**
- * ib_destroy_qp - Destroys the specified QP.
- * @qp: The QP to destroy.
- */
-int ib_destroy_qp(struct ib_qp *qp);
-
-/**
- * ib_post_send - Posts a list of work requests to the send queue of
- *   the specified QP.
- * @qp: The QP to post the work request on.
- * @send_wr: A list of work requests to post on the send queue.
- * @bad_send_wr: On an immediate failure, this parameter will reference
- *   the work request that failed to be posted on the QP.
- */
-static inline int ib_post_send(struct ib_qp *qp,
-                              ib_send_wr_t *send_wr,
-                              ib_send_wr_t **bad_send_wr)
-{
-       return qp->device->post_send(qp, send_wr, bad_send_wr);
-}
-
-/**
- * ib_post_recv - Posts a list of work requests to the receive queue of
- *   the specified QP.
- * @qp: The QP to post the work request on.
- * @recv_wr: A list of work requests to post on the receive queue.
- * @bad_recv_wr: On an immediate failure, this parameter will reference
- *   the work request that failed to be posted on the QP.
- */
-static inline int ib_post_recv(struct ib_qp *qp,
-                              ib_recv_wr_t *recv_wr,
-                              ib_recv_wr_t **bad_recv_wr)
-{
-       return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
-}
-
-/**
- * ib_create_cq - Creates a CQ on the specified device.
- * @device: The device on which to create the CQ.
- * @comp_handler: A user-specified callback that is invoked when a
- *   completion event occurs on the CQ.
- * @event_handler: A user-specified callback that is invoked when an
- *   asynchronous event not associated with a completion occurs on the CQ.
- * @cq_context: Context associated with the CQ returned to the user via
- *   the associated completion and event handlers.
- * @cqe: The minimum size of the CQ.
- * @comp_vector - Completion vector used to signal completion events.
- *     Must be >= 0 and < context->num_comp_vectors.
- *
- * Users can examine the cq structure to determine the actual CQ size.
- */
-struct ib_cq *ib_create_cq(struct ib_device *device,
-                          ib_comp_handler comp_handler,
-                          void (*event_handler)(struct ib_event *, void *),
-                          void *cq_context, int cqe, int comp_vector);
-
-/**
- * ib_resize_cq - Modifies the capacity of the CQ.
- * @cq: The CQ to resize.
- * @cqe: The minimum size of the CQ.
- *
- * Users can examine the cq structure to determine the actual CQ size.
- */
-int ib_resize_cq(struct ib_cq *cq, int cqe);
-
-/**
- * ib_destroy_cq - Destroys the specified CQ.
- * @cq: The CQ to destroy.
- */
-int ib_destroy_cq(struct ib_cq *cq);
-
-/**
- * ib_poll_cq - poll a CQ for completion(s)
- * @cq:the CQ being polled
- * @pp_free_wclist:
- *             On input, a list of work completion structures provided by
- *             the client.  These are used to report completed work requests through
- *             the pp_done_wclist.
- *
- *             On output, this contains the list of work completions structures for
- *             which no work completion was found.
- * @pp_done_wclist:A list of work completions retrieved from the completion queue.
- *
- * Poll a CQ for (possibly multiple) completions.  If the return value
- * is < 0, an error occurred.  If the return value is >= 0, it is the
- * number of completions returned.  If the return value is
- * non-negative and < num_entries, then the CQ was emptied.
- */
-static inline int ib_poll_cq(struct ib_cq *cq, ib_wc_t** const pp_free_wclist,
-                            ib_wc_t** const pp_done_wclist)
-{
-       return cq->device->poll_cq(cq, pp_free_wclist, pp_done_wclist);
-}
-
-/**
- * ib_peek_cq - Returns the number of unreaped completions currently
- *   on the specified CQ.
- * @cq: The CQ to peek.
- * @wc_cnt: A minimum number of unreaped completions to check for.
- *
- * If the number of unreaped completions is greater than or equal to wc_cnt,
- * this function returns wc_cnt, otherwise, it returns the actual number of
- * unreaped completions.
- */
-int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-/**
- * ib_req_notify_cq - Request completion notification on a CQ.
- * @cq: The CQ to generate an event for.
- * @flags:
- *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
- *   to request an event on the next solicited event or next work
- *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
- *   may also be |ed in to request a hint about missed events, as
- *   described below.
- *
- * Return Value:
- *    < 0 means an error occurred while requesting notification
- *   == 0 means notification was requested successfully, and if
- *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
- *        were missed and it is safe to wait for another event.  In
- *        this case is it guaranteed that any work completions added
- *        to the CQ since the last CQ poll will trigger a completion
- *        notification event.
- *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
- *        in.  It means that the consumer must poll the CQ again to
- *        make sure it is empty to avoid missing an event because of a
- *        race between requesting notification and an entry being
- *        added to the CQ.  This return value means it is possible
- *        (but not guaranteed) that a work completion has been added
- *        to the CQ since the last poll without triggering a
- *        completion notification event.
- */
-static inline int ib_req_notify_cq(struct ib_cq *cq,
-                                  enum ib_cq_notify_flags flags)
-{
-       return cq->device->req_notify_cq(cq, flags);
-}
-
-/**
- * ib_req_ncomp_notif - Request completion notification when there are
- *   at least the specified number of unreaped completions on the CQ.
- * @cq: The CQ to generate an event for.
- * @wc_cnt: The number of unreaped completions that should be on the
- *   CQ before an event is generated.
- */
-static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
-{
-       return cq->device->req_ncomp_notif ?
-               cq->device->req_ncomp_notif(cq, wc_cnt) :
-               -ENOSYS;
-}
-
-/**
- * ib_get_dma_mr - Returns a memory region for system memory that is
- *   usable for DMA.
- * @pd: The protection domain associated with the memory region.
- * @mr_access_flags: Specifies the memory access rights.
- *
- * Note that the ib_dma_*() functions defined below must be used
- * to create/destroy addresses used with the Lkey or Rkey returned
- * by ib_get_dma_mr().
- */
-struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-
-#if 0
-// TODO: do we need that
-/**
- * ib_dma_mapping_error - check a DMA addr for error
- * @dev: The device for which the dma_addr was created
- * @dma_addr: The DMA address to check
- */
-static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       if (dev->dma_ops)
-               return dev->dma_ops->mapping_error(dev, dma_addr);
-       return dma_mapping_error(dma_addr);