[MLX4] Soft Reset: fixed bug in case when mlx4_reset, issued from cmd.c, fails. ...
authorleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Tue, 26 May 2009 18:06:01 +0000 (18:06 +0000)
committerleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Tue, 26 May 2009 18:06:01 +0000 (18:06 +0000)
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@2196 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

core/al/kernel/al_ioc_pnp.c
hw/mlx4/kernel/bus/net/catas.c
hw/mlx4/kernel/bus/net/cmd.c
hw/mlx4/kernel/bus/net/main.c

index ab8da1e..6ddf268 100644 (file)
@@ -1720,6 +1720,11 @@ __process_query(
                p_results->p_svc = p_svc;\r
                cl_fmap_init( &p_results->iou_map, __iou_cmp );\r
 \r
+               /* Reference the service till the end of sweep processing */\r
+               ref_al_obj( &p_results->p_svc->obj );\r
+               cl_dbg_out ("~%d:[IBBUS] %s() : p_results %p, p_svc %p, ref_cnt %d", \r
+                       KeGetCurrentProcessorNumber(), __FUNCTION__, p_results, p_svc, p_results->p_svc->obj.ref_cnt);\r
+\r
                /* Build the map of nodes by port GUID. */\r
                __process_nodes( p_svc, &port_map );\r
 \r
@@ -1749,6 +1754,10 @@ __process_query(
                break;\r
        default:\r
                CL_ASSERT( p_results );\r
+               /* Release the reference taken for the sweep. */\r
+               deref_al_obj( &p_results->p_svc->obj );\r
+               cl_dbg_out ("~%d:[IBBUS] %s() : p_results %p, p_svc %p, ref_cnt %d", \r
+                       KeGetCurrentProcessorNumber(), __FUNCTION__, p_results, p_svc, p_results->p_svc->obj.ref_cnt);\r
                cl_free( p_results );\r
                /* Fall through */\r
        case IB_INSUFFICIENT_MEMORY:\r
@@ -2034,8 +2043,6 @@ __query_ious(
                        if( !cl_atomic_dec( &p_results->p_svc->query_cnt ) &&\r
                                status == IB_SUCCESS )\r
                        {\r
-                               /* Reference the service till the end of processing in the thread */\r
-                               ref_al_obj( &p_results->p_svc->obj );\r
                                cl_async_proc_queue( gp_async_pnp_mgr,\r
                                        &p_results->async_item );\r
                        }\r
@@ -2231,11 +2238,8 @@ __ioc_pnp_send_cb(
         * If this is the last MAD, finish processing the IOU queries\r
         * in the PnP thread.\r
         */\r
-       if( !cl_atomic_dec( &p_results->p_svc->query_cnt ) ) {\r
-               /* Reference the service till the end of processing in the thread */\r
-               ref_al_obj( &p_results->p_svc->obj );\r
+       if( !cl_atomic_dec( &p_results->p_svc->query_cnt ) )\r
                cl_async_proc_queue( gp_async_pnp_mgr, &p_results->async_item );\r
-       }\r
 \r
        AL_EXIT( AL_DBG_PNP );\r
 }\r
@@ -2356,8 +2360,10 @@ __process_sweep(
 err:\r
                if( !cl_atomic_dec( &gp_ioc_pnp->query_cnt ) )\r
                        cl_async_proc_queue( gp_async_pnp_mgr, &gp_ioc_pnp->async_item );\r
-               /* Release the reference taken for the query. */\r
+               /* Release the reference taken for the sweep. */\r
                deref_al_obj( &p_results->p_svc->obj );\r
+               cl_dbg_out ("~%d:[IBBUS] %s() : p_results %p, p_svc %p, ref_cnt %d", \r
+                       KeGetCurrentProcessorNumber(), __FUNCTION__, p_results, p_svc, p_results->p_svc->obj.ref_cnt);\r
                cl_free( p_results );\r
        }\r
 \r
index 6c78ac6..2c287aa 100644 (file)
@@ -138,7 +138,8 @@ catas_reset_wi(
 
        dump_err_buf(dev);
        mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
-       mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
+       if (dev->pdev->ib_dev)
+               mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
 }
 
 /* polling on DISPATCH_LEVEL */
index 49ab46c..ff8f45e 100644 (file)
@@ -226,17 +226,19 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
        if (cmd_pending(dev)) {
                err = -ETIMEDOUT;
 
-       do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
-       if (!do_reset) {
-               NTSTATUS status1;
-               status1 = mlx4_reset(dev);
-               if ( !NT_SUCCESS( status1 ) ) {
-                       mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n", status1);
+               do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
+               if (!do_reset) {
+                       NTSTATUS status1;
+                       status1 = mlx4_reset(dev);
+                       if ( !NT_SUCCESS( status1 ) ) {
+                               mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n", status1);
+                       }
+                       
+                       dev->flags |= MLX4_FLAG_RESET_DRIVER;   // bar the device
                }
-               
-               dev->flags |= MLX4_FLAG_RESET_DRIVER;   // bar the device
-       }
-       mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
+
+               if (dev->pdev->ib_dev)
+                       mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
                
                goto out;
        }
@@ -333,9 +335,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                        }
 
                        /* try to solve the problem */
-                       if (dev->pdev->ib_dev) {
+                       if (dev->pdev->ib_dev)
                                mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
-                       }
                }
                else {
                        err = -EFAULT;
index 67b76d3..508336d 100644 (file)
@@ -903,6 +903,7 @@ int mlx4_init_one(struct pci_dev *pdev, struct mlx4_dev_params *dev_params)
 
        /* we are going to recreate device anyway */
        pdev->dev = NULL;
+       pdev->ib_dev = NULL;
        
        /* find the type of device */
        id = mlx4_find_pci_dev(pdev->ven_id, pdev->dev_id);