[HW] fixed bugs in low resources flow.
authorleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Sun, 29 Mar 2009 15:35:10 +0000 (15:35 +0000)
committerleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Sun, 29 Mar 2009 15:35:10 +0000 (15:35 +0000)
The bugs found with the help of Verifier  with error injection.

git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@2064 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

13 files changed:
hw/mlx4/kernel/bus/core/cache.c
hw/mlx4/kernel/bus/core/device.c
hw/mlx4/kernel/bus/drv/drv.c
hw/mlx4/kernel/bus/drv/drv.h
hw/mlx4/kernel/bus/drv/pci.c
hw/mlx4/kernel/bus/net/catas.c
hw/mlx4/kernel/bus/net/cmd.c
hw/mlx4/kernel/bus/net/intf.c
hw/mlx4/kernel/bus/net/main.c
hw/mlx4/kernel/hca/ca.c
hw/mthca/kernel/hca_pnp.c
hw/mthca/kernel/hca_verbs.c
hw/mthca/kernel/mt_cache.c

index b3ddc34..8608349 100644 (file)
@@ -366,48 +366,38 @@ static void ib_cache_setup_one(struct ib_device *device)
        int p;
        int port_num;
        
+       shutter_init( &device->cache.x.work_thread );
        rwlock_init(&device->cache.lock);
+       INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+                             device, ib_cache_event, NULL, NULL, 0);
+       ib_register_event_handler(&device->cache.event_handler);
+
        port_num = end_port(device) - start_port(device) + 1;
-       
        if (port_num > 0 ) { 
                // if port_num ==0   ==> there are no IB ports
                device->cache.pkey_cache =
                        kmalloc(sizeof *device->cache.pkey_cache * port_num, GFP_KERNEL);
                device->cache.gid_cache =
                        kmalloc(sizeof *device->cache.gid_cache * port_num, GFP_KERNEL);
-
                device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
-                                                 port_num, GFP_KERNEL);
+                       port_num, GFP_KERNEL);
 
                if (!device->cache.pkey_cache || !device->cache.gid_cache ||
-                   !device->cache.lmc_cache) {
+                       !device->cache.lmc_cache) {
                        printk(KERN_WARNING "Couldn't allocate cache "
-                              "for %s\n", device->name);
+                               "for %s\n", device->name);
                        goto err;
                }
        }
 
-       shutter_init( &device->cache.x.work_thread );
-
        for (p = 0; p < port_num; ++p) {
                device->cache.pkey_cache[p] = NULL;
                device->cache.gid_cache [p] = NULL;
                ib_cache_update(device, (u8)(p + start_port(device)));
        }
 
-       INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
-                             device, ib_cache_event, NULL, NULL, 0);
-       if (ib_register_event_handler(&device->cache.event_handler))
-               goto err_cache;
-
        return;
 
-err_cache:
-       for (p = 0; p <= end_port(device) - start_port(device); ++p) {
-               kfree(device->cache.pkey_cache[p]);
-               kfree(device->cache.gid_cache[p]);
-       }
-
 err:
        kfree(device->cache.pkey_cache);
        kfree(device->cache.gid_cache);
@@ -422,6 +412,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)
 {
        int p;
 
+       ASSERT(device->cache.event_handler.device);
        ib_unregister_event_handler(&device->cache.event_handler);
        // instead of Linux flush_scheduled_work(): wait for them to quit
        shutter_shut( &device->cache.x.work_thread );
index 8999713..481c5a2 100644 (file)
@@ -302,17 +302,23 @@ int ib_register_device(struct ib_device *device)
                goto out;
        }
 
-       list_add_tail(&device->core_list, &device_list);
-
-       device->reg_state = IB_DEV_REGISTERED;
-
        {
                struct ib_client *client;
 
-               list_for_each_entry(client, &client_list, list, struct ib_client)
-                       if (client->add && !add_client_context(device, client))
+               list_for_each_entry(client, &client_list, list, struct ib_client) {
+                       if ( add_client_context(device, client) ) {
+                               printk(KERN_WARNING "add_client_context failed for device %s\n",
+                                          device->name);
+                               ret = -EFAULT;
+                               goto out;
+                       }
+                       if (client->add)
                                client->add(device);
+               }
        }
+    
+       list_add_tail(&device->core_list, &device_list);
+       device->reg_state = IB_DEV_REGISTERED;
 
  out:
        mutex_unlock(&device_mutex);
@@ -381,17 +387,25 @@ EXPORT_SYMBOL(ib_unregister_device);
 int ib_register_client(struct ib_client *client)
 {
        struct ib_device *device;
+       int ret = 0;
 
        mutex_lock(&device_mutex);
 
-       list_add_tail(&client->list, &client_list);
-       list_for_each_entry(device, &device_list, core_list, struct ib_device)
-               if (client->add && !add_client_context(device, client))
+       list_for_each_entry(device, &device_list, core_list, struct ib_device) {
+               if ( add_client_context(device, client) ) {
+                       printk(KERN_WARNING "add_client_context failed for device %s\n",
+                                  device->name);
+                       ret = -EFAULT;
+                       goto out;
+               }
+               if (client->add)
                        client->add(device);
-
+       }
+    
+    list_add_tail(&client->list, &client_list);
+out:
        mutex_unlock(&device_mutex);
-
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL(ib_register_client);
 
index b64936c..c4ab8a2 100644 (file)
@@ -323,10 +323,12 @@ __start_card(
 \r
        p_fdo->bus_ib_ifc.pdev = &p_fdo->pci_dev;\r
        p_fdo->bus_ib_ifc.p_ibdev = p_fdo->pci_dev.ib_dev;\r
-       p_fdo->bus_ib_ifc.pmlx4_dev = to_mdev(p_fdo->pci_dev.ib_dev)->dev;\r
-       p_fdo->bus_ib_ifc.is_livefish = mlx4_is_livefish(p_fdo->pci_dev.dev);\r
-       if ( p_fdo->bus_ib_ifc.pmlx4_dev->flags & MLX4_FLAG_MSI_X )\r
-               p_fdo->bus_ib_ifc.n_msi_vectors = p_fdo->pci_dev.n_msi_vectors - 2;\r
+    p_fdo->bus_ib_ifc.is_livefish = mlx4_is_livefish(p_fdo->pci_dev.dev);    \r
+    if ( p_fdo->bus_ib_ifc.is_livefish == 0 ) {\r
+        p_fdo->bus_ib_ifc.pmlx4_dev = to_mdev(p_fdo->pci_dev.ib_dev)->dev;    \r
+           if ( p_fdo->bus_ib_ifc.pmlx4_dev->flags & MLX4_FLAG_MSI_X )\r
+                   p_fdo->bus_ib_ifc.n_msi_vectors = p_fdo->pci_dev.n_msi_vectors - 2;\r
+    }\r
 \r
        p_fdo->card_started = TRUE;\r
 \r
@@ -572,13 +574,13 @@ __get_resources(
                                        pdev->int_info = *desc;\r
                                if (desc->Flags & CM_RESOURCE_INTERRUPT_MESSAGE) {\r
                                        pdev->n_msi_vectors_alloc = (u8)(pdev->n_msi_vectors_alloc+desc_raw->u.MessageInterrupt.Raw.MessageCount);\r
-                                       MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,\r
+                                       MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,\r
                                                ("EvtPrepareHardware: Desc %d: MsiInterrupt: Share %d, Flags %#x, Level %d, Vector %#x, Affinity %#x\n", \r
                                                i, desc->ShareDisposition, desc->Flags,\r
                                                desc->u.MessageInterrupt.Translated.Level, \r
                                                desc->u.MessageInterrupt.Translated.Vector, \r
                                                (u32)desc->u.MessageInterrupt.Translated.Affinity ));\r
-                                       MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,\r
+                                       MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,\r
                                                ("EvtPrepareHardware: Desc %d: RawMsiInterrupt: Share %d, Flags %#x, MessageCount %#hx, Vector %#x, Affinity %#x\n", \r
                                                i, desc_raw->ShareDisposition, desc_raw->Flags,\r
                                                desc_raw->u.MessageInterrupt.Raw.MessageCount, \r
@@ -586,7 +588,7 @@ __get_resources(
                                                (u32)desc_raw->u.MessageInterrupt.Raw.Affinity ));\r
                                }\r
                                else { // line-based interrupt\r
-                                       MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,\r
+                                       MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,\r
                                                ("EvtPrepareHardware: Desc %d: LineInterrupt: Share %d, Flags %#x, Level %d, Vector %#x, Affinity %#x\n", \r
                                                i, desc->ShareDisposition, desc->Flags,\r
                                                desc->u.Interrupt.Level, desc->u.Interrupt.Vector, \r
@@ -774,14 +776,14 @@ inline void InitBusIsr(
 }\r
 \r
 NTSTATUS\r
-EvtDeviceAdd(\r
+EvtDriverDeviceAdd(\r
        IN WDFDRIVER        Driver,\r
        IN PWDFDEVICE_INIT  DeviceInit\r
        )\r
 /*++\r
 Routine Description:\r
 \r
-       EvtDeviceAdd is called by the framework in response to AddDevice\r
+       EvtDriverDeviceAdd is called by the framework in response to AddDevice\r
        call from the PnP manager. We create and initialize a device object to\r
        represent a new instance of mxe bus.\r
 \r
@@ -1191,7 +1193,7 @@ Return Value:
        //\r
 \r
        WDF_DRIVER_CONFIG_INIT(\r
-               &config, EvtDeviceAdd );\r
+               &config, EvtDriverDeviceAdd );\r
        config.EvtDriverUnload = EvtDriverUnload;\r
 \r
        //\r
index 30dbd59..f2f32f6 100644 (file)
@@ -155,7 +155,7 @@ EvtDriverUnload(
        );\r
        \r
 NTSTATUS\r
-EvtDeviceAdd(\r
+EvtDriverDeviceAdd(\r
        IN WDFDRIVER        Driver,\r
        IN PWDFDEVICE_INIT  DeviceInit\r
        );\r
index bddf0f9..9c319ce 100644 (file)
@@ -541,7 +541,7 @@ pci_get_msi_info(
                        p_vector = ka;\r
                        /* print (allocated+2) vectors */\r
                        for (i=0; i<pdev->n_msi_vectors_alloc+2; i++) {\r
-                               MLX4_PRINT( TRACE_LEVEL_WARNING  ,MLX4_DBG_PNP  ,\r
+                               MLX4_PRINT( TRACE_LEVEL_VERBOSE  ,MLX4_DBG_PNP  ,\r
                                        ("MSI-X Vectors: Id %d, Masked %d, Addr %#I64x, Data %#x\n",\r
                                        i, MSIX_VECTOR_MASKED(p_vector[i].Flags),\r
                                        p_vector[i].Addr, p_vector[i].Data ));\r
@@ -587,7 +587,7 @@ pci_hca_reset(
 )\r
 {\r
        u32                                                     sem;\r
-       NTSTATUS                                        status = STATUS_SUCCESS;\r
+       NTSTATUS                                        status = STATUS_SUCCESS, status1;\r
        PBUS_INTERFACE_STANDARD         p_ifc = &pdev->bus_pci_ifc;\r
        PCI_COMMON_CONFIG*                      p_cfg = &pdev->pci_cfg_space;\r
        struct msix_saved_info          msix_info;\r
@@ -703,19 +703,19 @@ pci_hca_reset(
                }\r
        }\r
 \r
+       status = STATUS_SUCCESS;\r
+\r
+err:\r
        /* restore MSI-X info after reset */\r
-       status = __pci_restore_msix_info( pdev, &msix_info );\r
-       if (!NT_SUCCESS(status))\r
-               goto err;\r
+       status1 = __pci_restore_msix_info( pdev, &msix_info );\r
+       status = (!status) ? status1 : status;  /* return the only or the first error */\r
+       if( NT_SUCCESS( status ) ) {\r
+               MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has been reset ! \n"));\r
+       }\r
 \r
-       /* check, whether MSI-X capabilities were restore */\r
+       /* check, whether MSI-X capabilities have been restored */\r
        pci_get_msi_info( pdev, p_cfg, &pdev->uplink_info );\r
 \r
-       MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has been reset ! \n"));\r
-\r
-       status = STATUS_SUCCESS;\r
-\r
-err:\r
        if (pdev->msix_info.valid) \r
                pci_free_msix_info_resources(&pdev->msix_info);\r
        MLX4_EXIT( MLX4_DBG_PNP );\r
index 9d5f497..0fe6011 100644 (file)
@@ -370,6 +370,7 @@ int mlx4_reset_request( struct ib_event_handler *event_handler )
                // to allow for end of operations that are in progress
                reset_work = IoAllocateWorkItem( dev->pdev->p_self_do );
                if (!reset_work) {
+            spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
                        mlx4_err(dev, "mlx4_reset_request IoAllocateWorkItem failed, reset will not be propagated\n");
                        err = -EFAULT;
                        goto err_workitem;
index 928407d..49ab46c 100644 (file)
@@ -337,9 +337,15 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                                mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
                        }
                }
+               else {
+                       err = -EFAULT;
+                       mlx4_err(dev, "mlx4_cmd_wait: Unexpected end of waiting for a comand \n");
+                       ASSERT(0);
+               }
        }
-
-       err = context->result;
+       else
+               err = context->result;
+       
        if (err)
                goto out;
 
index 462999b..1a1b604 100644 (file)
@@ -43,13 +43,13 @@ static LIST_HEAD(intf_list);
 static LIST_HEAD(dev_list);
 static DEFINE_MUTEX(intf_mutex);
 
-static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
+static int mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
 {
        struct mlx4_device_context *dev_ctx;
 
        dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL);
        if (!dev_ctx)
-               return;
+               return -EFAULT;
 
        dev_ctx->intf    = intf;
        dev_ctx->context = intf->add(&priv->dev);
@@ -59,8 +59,11 @@ static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
                spin_lock_irq(&priv->ctx_lock);
                list_add_tail(&dev_ctx->list, &priv->ctx_list);
                spin_unlock_irq(&priv->ctx_lock);
-       } else
+       } else {
                kfree(dev_ctx);
+               return -EFAULT;
+       }
+       return 0;
 }
 
 static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
@@ -82,19 +85,25 @@ static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *pr
 int mlx4_register_interface(struct mlx4_interface *intf)
 {
        struct mlx4_priv *priv;
+       int err = 0;
 
        if (!intf->add || !intf->remove)
                return -EINVAL;
 
        mutex_lock(&intf_mutex);
 
-       list_add_tail(&intf->list, &intf_list);
-       list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv)
-               mlx4_add_device(intf, priv);
+       list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv) {
+               if (mlx4_add_device(intf, priv)) {
+                       err = -EFAULT;
+                       goto end;
+               }
+       }
 
-       mutex_unlock(&intf_mutex);
+    list_add_tail(&intf->list, &intf_list);
 
-       return 0;
+end:
+       mutex_unlock(&intf_mutex);
+       return err;
 }
 EXPORT_SYMBOL_GPL(mlx4_register_interface);
 
@@ -137,12 +146,18 @@ int mlx4_register_device(struct mlx4_dev *dev)
 
        mutex_lock(&intf_mutex);
 
+       list_for_each_entry(intf, &intf_list, list, struct mlx4_interface) {
+               if (mlx4_add_device(intf, priv)) {
+                       err = -EFAULT;
+                       goto end;
+               }
+       }
+    
        list_add_tail(&priv->dev_list, &dev_list);
-       list_for_each_entry(intf, &intf_list, list, struct mlx4_interface)
-               mlx4_add_device(intf, priv);
-
+    
+end:
        mutex_unlock(&intf_mutex);
-       if (!mlx4_is_livefish(dev))
+       if (!err && !mlx4_is_livefish(dev))
                err = mlx4_start_catas_poll(dev);
 
        return err;
index 7742af7..2f63bcd 100644 (file)
@@ -956,9 +956,11 @@ run_as_livefish:
                                ("mlx4_register_device for livefish failed, return with error.\n"));
                        pdev->dev = NULL;
                        kfree(priv);
+               } 
+               else {
+               MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_LOW ,
+                       ("MLX4_BUS started in \"livefish\" mode !!!.\n"));
                }
-               MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_LOW ,
-                       ("MLX4_BUS started in \"livefish\" mode !!!.\n"));
                goto end;
        }
 
@@ -1064,8 +1066,8 @@ void mlx4_remove_one(struct pci_dev *pdev, int reset)
                mlx4_close_hca(dev);
                mlx4_cmd_cleanup(dev);
 
-               if (reset)
-                       mlx4_reset(dev);
+               if (reset && mlx4_reset(dev))
+                       mlx4_err(dev, "Failed to reset HCA\n");
                mlx4_dbg(dev, "MLX4_BUS: NET device (dev_id=%d) is REMOVED ! \n", (int)pdev->dev_id);
                pdev->dev = NULL;
 done:
index 18d5528..6a3352f 100644 (file)
@@ -73,11 +73,13 @@ mlnx_open_ca (
 \r
        HCA_PRINT(TRACE_LEVEL_INFORMATION  ,HCA_DBG_SHIM,\r
                ("context 0x%p\n", ca_context));\r
-       status = mlnx_set_cb(p_hca,\r
-               pfn_async_event_cb,\r
-               ca_context);\r
-       if (IB_SUCCESS != status) {\r
-               goto err_set_cb;\r
+       if (pfn_async_event_cb) {\r
+               status = mlnx_set_cb(p_hca,\r
+                       pfn_async_event_cb,\r
+                       ca_context);\r
+               if (IB_SUCCESS != status) {\r
+                       goto err_set_cb;\r
+               }\r
        }\r
 \r
        \r
index 3a9c0dd..054086a 100644 (file)
@@ -242,7 +242,7 @@ __alloc_hca_ifc(
                !!mthca_is_livefish(p_ext->hca.mdev),\r
                pIfc );\r
 \r
-       pIfc->p_hca_dev = p_ext->cl_ext.p_pdo;\r
+       pIfc->p_hca_obj = &p_ext->hca.hob;\r
        pIfc->vend_id = (uint32_t)p_ext->hcaConfig.VendorID;\r
        pIfc->dev_id = (uint16_t)p_ext->hcaConfig.DeviceID;\r
        pIfc->dev_revision = (uint16_t)p_ext->hca.hw_ver;\r
@@ -877,7 +877,6 @@ __query_ci_ifc(
        p_ifc->InterfaceHeader.InterfaceReference = __ref_ifc;\r
        p_ifc->InterfaceHeader.InterfaceDereference = __deref_ifc;\r
        p_ifc->Verbs = *p_hca_ifc;\r
-       p_ifc->p_hca_obj = &p_ext->hca.hob;\r
 \r
        /* take the reference before returning. */\r
        __ref_ifc( p_dev_obj );\r
index d8f3653..9644cef 100644 (file)
@@ -90,13 +90,14 @@ mlnx_open_ca (
 \r
        HCA_PRINT(TRACE_LEVEL_INFORMATION  ,HCA_DBG_SHIM,\r
                ("context 0x%p\n", ca_context));\r
-       status = mlnx_hobs_set_cb(&p_hca->hob,\r
-               pfn_async_event_cb,\r
-               ca_context);\r
-       if (IB_SUCCESS != status) {\r
-               goto err_set_cb;\r
+       if (pfn_async_event_cb) {\r
+               status = mlnx_hobs_set_cb(&p_hca->hob,\r
+                       pfn_async_event_cb,\r
+                       ca_context);\r
+               if (IB_SUCCESS != status) {\r
+                       goto err_set_cb;\r
+               }\r
        }\r
-\r
        \r
        //TODO: do we need something for kernel users ?\r
 \r
index 273e285..07e046e 100644 (file)
@@ -341,6 +341,9 @@ static void ib_cache_setup_one(struct ib_device *device)
        u8 p;
 
        rwlock_init(&device->cache.lock);
+       INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+                             device, ib_cache_event);
+       ib_register_event_handler(&device->cache.event_handler);
 
        device->cache.pkey_cache =
                kmalloc(sizeof *device->cache.pkey_cache *
@@ -361,19 +364,8 @@ static void ib_cache_setup_one(struct ib_device *device)
                ib_cache_update(device, p + start_port(device));
        }
 
-       INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
-                             device, ib_cache_event);
-       if (ib_register_event_handler(&device->cache.event_handler))
-               goto err_cache;
-
        return;
 
-err_cache:
-       for (p = 0; p <= end_port(device) - start_port(device); ++p) {
-               kfree(device->cache.pkey_cache[p]);
-               kfree(device->cache.gid_cache[p]);
-       }
-
 err:
        kfree(device->cache.pkey_cache);
        kfree(device->cache.gid_cache);