[mlx4] Fix our error handling in the case of hardware errors.
authortzachid <tzachid@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Tue, 3 Mar 2009 08:43:28 +0000 (08:43 +0000)
committertzachid <tzachid@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Tue, 3 Mar 2009 08:43:28 +0000 (08:43 +0000)
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@2001 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

13 files changed:
hw/mlx4/kernel/bus/drv/pdo.c
hw/mlx4/kernel/bus/inc/bus_intf.h
hw/mlx4/kernel/bus/inc/device.h
hw/mlx4/kernel/bus/inc/ib_verbs.h
hw/mlx4/kernel/bus/net/catas.c
hw/mlx4/kernel/bus/net/cmd.c
hw/mlx4/kernel/bus/net/cq.c
hw/mlx4/kernel/bus/net/intf.c
hw/mlx4/kernel/bus/net/mlx4.h
hw/mlx4/kernel/bus/net/qp.c
hw/mlx4/kernel/bus/net/srq.c
hw/mlx4/kernel/inc/l2w.h
hw/mlx4/kernel/inc/l2w_sync.h

index a14f042..c44160a 100644 (file)
@@ -268,6 +268,7 @@ Return Value:
        p_fdo->bus_ib_ifc.mlx4_interface.mlx4_unregister_ev_cb = mlx4_reset_cb_unregister;\r
        p_fdo->bus_ib_ifc.mlx4_interface.mlx4_reset_request = mlx4_reset_request;\r
        p_fdo->bus_ib_ifc.mlx4_interface.mlx4_reset_execute = mlx4_reset_execute;\r
+       p_fdo->bus_ib_ifc.mlx4_interface.mlx4_reset_ready = mlx4_reset_ready;\r
        \r
        //\r
        // Create a custom interface so that other drivers can\r
index a4dbe81..ef401d9 100644 (file)
@@ -117,6 +117,7 @@ typedef int (*MLX4_UNREGISTER_EVENT_HANDLER)(struct ib_event_handler *event_hand
 
 typedef int (*MLX4_RESET_REQUEST) (struct ib_event_handler *event_handler);
 typedef int (*MLX4_RESET_EXECUTE) (struct ib_event_handler *event_handler);
+typedef int (*MLX4_RESET_READY) (struct ib_event_handler *event_handler);
 
 struct mlx4_interface_ex {
        MLX4_PD_ALLOC       mlx4_pd_alloc;
@@ -169,6 +170,7 @@ struct mlx4_interface_ex {
        MLX4_UNREGISTER_EVENT_HANDLER mlx4_unregister_ev_cb;
        MLX4_RESET_REQUEST mlx4_reset_request;
        MLX4_RESET_EXECUTE mlx4_reset_execute;
+       MLX4_RESET_READY mlx4_reset_ready;
        
 };
 
index 5f60bb9..894b193 100644 (file)
@@ -40,8 +40,7 @@ enum {
        MLX4_FLAG_RESET_CLIENT  = 1 << 11,
        MLX4_FLAG_RESET_DRIVER  = 1 << 12,
        MLX4_FLAG_RESET_STARTED = 1 << 13,
-       MLX4_FLAG_CARD_IS_DEAD  = 1 << 14,
-       MLX4_FLAG_BUSY_WAIT             = 1 << 15
+       MLX4_FLAG_BUSY_WAIT             = 1 << 14
 };
 
 enum {
@@ -342,6 +341,7 @@ struct mlx4_dev {
        u32                     signature;
        struct pci_dev         *pdev;
        unsigned long           flags;
+       LONG                reset_pending;
        struct mlx4_caps        caps;
        struct radix_tree_root  qp_table_tree;
        u32                     rev_id;
index 94be539..656b0a3 100644 (file)
@@ -293,15 +293,19 @@ struct ib_event {
 };\r
 \r
 enum ib_event_handler_flags {\r
-       IB_IVH_RESET_CB         = (1 << 0),\r
-       IB_IVH_NOTIFIED         = (1 << 1),\r
-       IB_IVH_RESET_READY      = (1 << 2)\r
+       IB_IVH_RESET_CB                 = (1 << 0),     // it is handler for soft reset\r
+       IB_IVH_NOTIFIED                 = (1 << 1),     // client has been notified about requested reset \r
+       IB_IVH_RESET_READY              = (1 << 2),     // client is ready for HW reset\r
+       IB_IVH_RESET_D_PENDING  = (1 << 3),     // device reset notification is pending \r
+       IB_IVH_RESET_C_PENDING  = (1 << 4),     // client reset notification is pending \r
+       IB_IVH_NOTIF_READY              = (1 << 5)      // client is ready to get reset request notification\r
 };\r
 \r
+typedef void (*ib_event_handler_t)(struct ib_event_handler *, struct ib_event *);\r
 \r
 struct ib_event_handler {\r
        struct ib_device *device;\r
-       void            (*handler)(struct ib_event_handler *, struct ib_event *);\r
+       ib_event_handler_t handler;\r
        struct list_head  list;\r
        void *            ctx;\r
        void *            rsrv_ptr;\r
index d7a0f30..9bf15b9 100644 (file)
@@ -39,10 +39,6 @@ enum {
 static DEFINE_SPINLOCK(catas_lock);
 static LIST_HEAD(catas_list);
 
-// TODO: put into Globals
-// "Reset device on internal errors if non-zero (default 1)")
-int g_internal_err_reset = 0;
-
 void mlx4_dispatch_reset_event(struct ib_device *ibdev, enum ib_event_type type)
 {
        unsigned long flags;
@@ -56,11 +52,25 @@ void mlx4_dispatch_reset_event(struct ib_device *ibdev, enum ib_event_type type)
 
        list_for_each_entry(handler, &ibdev->event_handler_list, list, struct ib_event_handler)
        {
-               // notify only those, that are not notified
+               // notify only soft reset handlers
                if ( handler->flags & IB_IVH_RESET_CB )
+                       // notify only those, that are not yet notified
                        if ( !(handler->flags & IB_IVH_NOTIFIED) ) {
-                               handler->flags |= IB_IVH_NOTIFIED;
-                               handler->handler(handler, &event);
+                               // notify only those that are ready to get the notification
+                               if ( handler->flags & IB_IVH_NOTIF_READY ) {
+                                       // insure not to notify once more 
+                                       handler->flags |= IB_IVH_NOTIFIED;
+                                       handler->flags &= ~(IB_IVH_NOTIF_READY | 
+                                               IB_IVH_RESET_D_PENDING | IB_IVH_RESET_C_PENDING);
+                                       handler->handler(handler, &event);
+                               }
+                               else {
+                                       // pend the notification
+                                       if (type == IB_EVENT_RESET_DRIVER) 
+                                               handler->flags |= IB_IVH_RESET_D_PENDING;
+                                       else 
+                                               handler->flags |= IB_IVH_RESET_C_PENDING;
+                               }
                        }
        }
 
@@ -107,36 +117,28 @@ static void dump_err_buf(struct mlx4_dev *dev)
                         i, swab32(readl(priv->catas_err.map + i)));
 }
 
-static void catas_reset()
-{
-       struct mlx4_priv *priv, *tmppriv;
-       struct mlx4_dev *dev;
-       struct list_head tlist;
-       int ret;
-
-       INIT_LIST_HEAD(&tlist);
-       spin_lock_irq(&catas_lock);
-       list_splice_init(&catas_list, &tlist);
-       spin_unlock_irq(&catas_lock);
-
-       list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list, struct mlx4_priv, struct mlx4_priv) {
-               ret = mlx4_restart_one(priv->dev.pdev);
-               dev = &priv->dev;
-               if (ret)
-                       mlx4_err(dev, "Reset failed (%d)\n", ret);
-               else
-                       mlx4_dbg(dev, "Reset succeeded\n");
-       }
-}
-
 static void
 catas_reset_wi(
        IN                              DEVICE_OBJECT*                          p_dev_obj,
-       IN                              void*                                           context )
+       IN                              struct mlx4_dev *                       dev )
 {
+       NTSTATUS status;
+       long do_reset;
        UNUSED_PARAM(p_dev_obj);
-       IoFreeWorkItem( context );
-       catas_reset();
+
+       do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
+       if (do_reset == 0) {
+               status = mlx4_reset(dev);
+               if ( !NT_SUCCESS( status ) ) {
+                       mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n", status);
+               }
+               
+               dev->flags |= MLX4_FLAG_RESET_DRIVER;   // bar the device
+       }
+
+       dump_err_buf(dev);
+       mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
+       mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
 }
 
 /* polling on DISPATCH_LEVEL */
@@ -145,27 +147,9 @@ static void poll_catas(struct mlx4_dev *dev)
        struct mlx4_priv *priv = mlx4_priv(dev);
 
        if (readl(priv->catas_err.map)) {
-               dump_err_buf(dev);
-
-               // bar the device
-               dev->flags |= MLX4_FLAG_RESET_DRIVER;
-
-               // relay the event
-               mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
-
-               // notify the clients
-               mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
-
-               if (g_internal_err_reset) {
-                       PIO_WORKITEM catas_work = IoAllocateWorkItem( dev->pdev->p_self_do );
-
-                       spin_lock_dpc(&catas_lock);
-                       list_add(&priv->catas_err.list, &catas_list);
-                       spin_unlock_dpc(&catas_lock);
-
-                       if (!catas_work)
-                               IoQueueWorkItem( catas_work, catas_reset_wi, DelayedWorkQueue, catas_work );
-               }
+               
+               mlx4_warn(dev, "Detected catastrophic error on mdev %p\n", dev);
+               IoQueueWorkItem( priv->catas_err.catas_work, catas_reset_wi, DelayedWorkQueue, dev );
        } else {
                spin_lock_dpc(&catas_lock);
                if (!priv->catas_err.stop) {
@@ -190,12 +174,12 @@ static void  timer_dpc(
        poll_catas( dev );
 }
 
-void mlx4_start_catas_poll(struct mlx4_dev *dev)
+int mlx4_start_catas_poll(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        u64 addr;
+       int err;
 
-       INIT_LIST_HEAD(&priv->catas_err.list);
        priv->catas_err.map = NULL;
 
        addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
@@ -205,7 +189,15 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
        if (!priv->catas_err.map) {
                mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
                          addr);
-               return;
+               err = -ENOMEM;
+               goto err_map;
+       }
+       
+       priv->catas_err.catas_work = IoAllocateWorkItem( dev->pdev->p_self_do );
+       if (!priv->catas_err.catas_work) {
+               mlx4_warn(dev, "Failed to allocate work item from polling thread\n");
+               err = -EFAULT;
+               goto err_alloc;
        }
 
        priv->catas_err.stop = 0;
@@ -215,6 +207,13 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
        priv->catas_err.interval.QuadPart  = (-10)* (__int64)MLX4_CATAS_POLL_INTERVAL;
        KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval, 
                0, &priv->catas_err.timer_dpc );
+       return 0;
+
+
+err_alloc:
+       iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
+err_map:
+       return err;
 }
 
 void mlx4_stop_catas_poll(struct mlx4_dev *dev)
@@ -231,13 +230,11 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
 
        KeCancelTimer(&priv->catas_err.timer);
        KeFlushQueuedDpcs();
-
        if (priv->catas_err.map)
                iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
 
-       spin_lock_irq(&catas_lock);
-       list_del(&priv->catas_err.list);
-       spin_unlock_irq(&catas_lock);
+       if (priv->catas_err.catas_work) 
+               IoFreeWorkItem( priv->catas_err.catas_work );
 }
 
 static int wait4reset(struct ib_event_handler *event_handler)
@@ -264,6 +261,24 @@ static int wait4reset(struct ib_event_handler *event_handler)
        return n_not_ready;
 }
 
+int mlx4_reset_ready( struct ib_event_handler *event_handler )
+{
+       unsigned long flags;
+       struct ib_device *ibdev = event_handler->device;
+
+       ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
+       
+       spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
+       event_handler->flags |= IB_IVH_NOTIF_READY;
+       spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
+       if (event_handler->flags & IB_IVH_RESET_D_PENDING)
+               mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_DRIVER);
+       else
+       if (event_handler->flags & IB_IVH_RESET_C_PENDING)
+               mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_CLIENT);
+       return 0;
+}
+
 int mlx4_reset_execute( struct ib_event_handler *event_handler )
 {
        int err;
@@ -274,8 +289,9 @@ int mlx4_reset_execute( struct ib_event_handler *event_handler )
        struct pci_dev *pdev = ibdev->dma_device->pdev;
 
        // mark client as "ready for reset" and check whether we can do reset
-       if (wait4reset(event_handler))
+       if (wait4reset(event_handler)) {
                return 0;
+       }
 
        // fully bar the device
        ibdev->dma_device->flags |= MLX4_FLAG_RESET_STARTED;
@@ -330,16 +346,21 @@ int mlx4_reset_request( struct ib_event_handler *event_handler )
        struct ib_device *ibdev;
        struct mlx4_dev *dev;
 
-    ibdev = event_handler->device;
-    if (ibdev == NULL)
-        return -EFAULT;
+       unsigned long flags;
+
+       ibdev = event_handler->device;
+       if (ibdev == NULL)
+               return -EFAULT;
+
+       dev = ibdev->dma_device;
+       if (ibdev == NULL)
+               return -EFAULT;
+
+       spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
 
-    dev = ibdev->dma_device;
-    if (ibdev == NULL)
-        return -EFAULT;
         
        // set device to RESET_PENDING mode
-       if (!mlx4_is_barred(dev)) {
+       if (!(dev->flags & (MLX4_FLAG_RESET_CLIENT | MLX4_FLAG_RESET_DRIVER))) {
                PIO_WORKITEM reset_work;
 
                // bar the device
@@ -348,12 +369,17 @@ int mlx4_reset_request( struct ib_event_handler *event_handler )
                // delay reset to a system thread
                // to allow for end of operations that are in progress
                reset_work = IoAllocateWorkItem( dev->pdev->p_self_do );
-               if (!reset_work)
+               if (!reset_work) {
+                       mlx4_err(dev, "mlx4_reset_request IoAllocateWorkItem failed, reset will not be propagated\n");
                        return -EFAULT;
+               }
                event_handler->rsrv_ptr = reset_work;
                IoQueueWorkItem( reset_work, card_reset_wi, DelayedWorkQueue, event_handler );
        }
 
+       spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
+
+
        return 0;
 }
 
index 87b66c2..928407d 100644 (file)
@@ -210,6 +210,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
        int err = 0;
        u64 end;
        u8 status;
+       long do_reset;
 
        down(&priv->cmd.poll_sem);
 
@@ -224,6 +225,19 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 
        if (cmd_pending(dev)) {
                err = -ETIMEDOUT;
+
+       do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
+       if (!do_reset) {
+               NTSTATUS status1;
+               status1 = mlx4_reset(dev);
+               if ( !NT_SUCCESS( status1 ) ) {
+                       mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n", status1);
+               }
+               
+               dev->flags |= MLX4_FLAG_RESET_DRIVER;   // bar the device
+       }
+       mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
+               
                goto out;
        }
 
@@ -273,15 +287,14 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
        struct mlx4_cmd_context *context;
        int err = 0;
        u64 out_prm = out_param ? *out_param : 0;
+       long do_reset;
 
        down(&cmd->event_sem);
-
-       if ( dev->flags & MLX4_FLAG_CARD_IS_DEAD ) {
+       if ( dev->flags & MLX4_FLAG_RESET_DRIVER ) {
                err = -EBUSY;
                mlx4_warn(dev, "mlx4_cmd_wait: Command %02x is skipped because the card is stuck \n", op);
                goto exit;
        }
-
        spin_lock(&cmd->context_lock);
        BUG_ON(cmd->free_head < 0);
        context = &cmd->context[cmd->free_head];
@@ -302,8 +315,6 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                        mlx4_err(dev, "mlx4_cmd_wait: Command %02x completed with timeout after %d msecs \n",
                                  op, timeout);
 
-                       /* for debug purposes */
-                       ASSERT(FALSE);
                        /* for enabling busy-wait loop, add MLX4_FLAG_BUSY_WAIT (0x8000) to dev->flags */
                        while (dev) {
                                u32 wait_ms =2000; /* wait interval in msecs */
@@ -311,10 +322,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                                        break;
                                cl_thread_suspend( wait_ms ); 
                        }
+                       do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
+                       if (!do_reset) {
+                               NTSTATUS status = mlx4_reset(dev);
+                               if ( !NT_SUCCESS( status ) ) {
+                                       mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n", status);
+                               }
+                               
+                               dev->flags |= MLX4_FLAG_RESET_DRIVER;   // bar the device
+                       }
 
                        /* try to solve the problem */
-                       dev->flags |= MLX4_FLAG_RESET_DRIVER | MLX4_FLAG_CARD_IS_DEAD;
-                       mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
+                       if (dev->pdev->ib_dev) {
+                               mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
+                       }
                }
        }
 
@@ -331,7 +352,7 @@ out:
        cmd->free_head = (int)(context - cmd->context);
        spin_unlock(&cmd->context_lock);
 
-exit:
+exit:  
        up(&cmd->event_sem);
        return err;
 }
@@ -420,8 +441,8 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                out_is_imm, in_modifier, (int)op_modifier);
 #endif
 
-       if ( mlx4_is_barred(dev) )
-               return -EFAULT;
+               if ( mlx4_is_barred(dev) )
+                       return -EFAULT;
        
        if (mlx4_priv(dev)->cmd.use_events)
                return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
index 3dab6b0..9bec57d 100644 (file)
@@ -258,8 +258,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
 
        if (atomic_dec_and_test(&cq->refcount))
                complete(&cq->free);
-       if (!mlx4_is_barred(dev))
-               wait_for_completion(&cq->free);
+       wait_for_completion(&cq->free);
 
        mlx4_table_put(dev, &cq_table->table, cq->cqn);
        mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
index 249745b..462999b 100644 (file)
@@ -119,20 +119,21 @@ void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_device_context *dev_ctx;
 
-       spin_lock_dpc(&priv->ctx_lock);
+       spin_lock(&priv->ctx_lock);
 
        list_for_each_entry(dev_ctx, &priv->ctx_list, list, struct mlx4_device_context)
                if (dev_ctx->intf->event)
                        dev_ctx->intf->event(dev, dev_ctx->context, type,
                                             subtype, port);
 
-       spin_unlock_dpc(&priv->ctx_lock);
+       spin_unlock(&priv->ctx_lock);
 }
 
 int mlx4_register_device(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_interface *intf;
+       int err = 0;
 
        mutex_lock(&intf_mutex);
 
@@ -142,9 +143,9 @@ int mlx4_register_device(struct mlx4_dev *dev)
 
        mutex_unlock(&intf_mutex);
        if (!mlx4_is_livefish(dev))
-               mlx4_start_catas_poll(dev);
+               err = mlx4_start_catas_poll(dev);
 
-       return 0;
+       return err;
 }
 
 void mlx4_unregister_device(struct mlx4_dev *dev)
index b0c1ec3..a0cfbf4 100644 (file)
@@ -271,12 +271,12 @@ struct mlx4_mcg_table {
 
 struct mlx4_catas_err {
        u32 __iomem            *map;
-       struct list_head        list;
        /* Windows */
        int                                     stop;
        KTIMER                          timer;
        KDPC                            timer_dpc;
        LARGE_INTEGER           interval;
+       PIO_WORKITEM            catas_work;
 };
 
 struct mlx4_mac_table {
@@ -375,7 +375,7 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
 
-void mlx4_start_catas_poll(struct mlx4_dev *dev);
+int mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 int mlx4_restart_one(struct pci_dev *pdev);
 int mlx4_register_device(struct mlx4_dev *dev);
@@ -434,6 +434,7 @@ int mlx4_add_eq(struct mlx4_dev *dev, int nent,
 
 void mlx4_remove_eq(struct mlx4_dev *dev, u8 eq_num);
 
+int mlx4_reset_ready( struct ib_event_handler *event_handler );
 int mlx4_reset_execute( struct ib_event_handler *event_handler );
 
 int mlx4_reset_request( struct ib_event_handler *event_handler );
index 71ce754..9b5c52d 100644 (file)
@@ -265,8 +265,7 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
 
        if (atomic_dec_and_test(&qp->refcount))
                complete(&qp->free);
-       if (!mlx4_is_barred(dev))
-               wait_for_completion(&qp->free);
+       wait_for_completion(&qp->free);
 
        mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
        mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
index 9c45f10..1d2d100 100644 (file)
@@ -218,8 +218,7 @@ void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
 
        if (atomic_dec_and_test(&srq->refcount))
                complete(&srq->free);
-       if (!mlx4_is_barred(dev))
-               wait_for_completion(&srq->free);
+       wait_for_completion(&srq->free);
 
        mlx4_table_put(dev, &srq_table->table, srq->srqn);
        mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
index 59a5132..9f8b564 100644 (file)
@@ -334,7 +334,7 @@ static inline int mlx4_is_livefish(struct mlx4_dev *dev)
 
 static inline int mlx4_is_barred(struct mlx4_dev *dev)
 {
-       return dev->flags & (MLX4_FLAG_RESET_CLIENT | MLX4_FLAG_RESET_DRIVER);
+       return dev->flags &  MLX4_FLAG_RESET_DRIVER;
 }
 
 static inline int mlx4_is_in_reset(struct mlx4_dev *dev)
index 1c1c841..779af46 100644 (file)
@@ -103,6 +103,7 @@ static inline void init_completion( struct completion * compl )
 static inline int wait_for_completion_timeout( struct completion * compl, unsigned long timeout )
 {
        LARGE_INTEGER interval;
+       ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
        interval.QuadPart = (-10)* (__int64)timeout;
        return (int)KeWaitForSingleObject( &compl->event, Executive, KernelMode, FALSE,  &interval );
 }