Fix bug: When there is an error in the osm_vendor_send call,
authoreitan <eitan@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 12 Oct 2005 08:14:52 +0000 (08:14 +0000)
committereitan <eitan@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Wed, 12 Oct 2005 08:14:52 +0000 (08:14 +0000)
need to decrement several mad counters and call the dispatcher,
if we reached '0' on the qp0_mads_outstanding.

git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@115 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

ulp/opensm/user/include/opensm/osm_log.h
ulp/opensm/user/include/opensm/osm_vl15intf.h
ulp/opensm/user/opensm/osm_opensm.c
ulp/opensm/user/opensm/osm_sm_mad_ctrl.c
ulp/opensm/user/opensm/osm_vl15intf.c

index 3337794..55e1695 100644 (file)
@@ -243,14 +243,14 @@ osm_log_init(
   else
   {
     if (accum_log_file)
-    p_log->out_port = fopen(log_file,"a+");
+      p_log->out_port = fopen(log_file,"a+");
     else
       p_log->out_port = fopen(log_file,"w+");
     
     if (!p_log->out_port)
     {
       if (accum_log_file)
-      printf("Cannot open %s for appending. Permission denied\n", log_file);
+        printf("Cannot open %s for appending. Permission denied\n", log_file);
       else
         printf("Cannot open %s for writing. Permission denied\n", log_file);
 
index 2c07ed3..4257f49 100644 (file)
 #include <complib/cl_event.h>
 #include <complib/cl_thread.h>
 #include <complib/cl_qlist.h>
+#include <complib/cl_passivelock.h>
 #include <opensm/osm_stats.h>
 #include <opensm/osm_log.h>
 #include <opensm/osm_madw.h>
 #include <opensm/osm_mad_pool.h>
 #include <vendor/osm_vendor.h>
+#include <opensm/osm_subnet.h>
 
 #ifdef __cplusplus
 #  define BEGIN_C_DECLS extern "C" {
@@ -134,6 +136,9 @@ typedef struct _osm_vl15
        osm_vendor_t                                    *p_vend;
        osm_log_t                                               *p_log;
        osm_stats_t                                             *p_stats;
+       osm_subn_t                                              *p_subn;
+       cl_disp_reg_handle_t                    h_disp;
+       cl_plock_t                                              *p_lock;
 
 } osm_vl15_t;
 /*
@@ -173,6 +178,15 @@ typedef struct _osm_vl15
 *      p_stats
 *              Pointer to the OpenSM statistics block.
 *
+*  p_subn
+*     Pointer to the Subnet object for this subnet.
+*
+*  h_disp
+*    Handle returned from dispatcher registration.
+*
+*      p_lock
+*              Pointer to the serializing lock.
+*
 * SEE ALSO
 *      VL15 object
 *********/
@@ -262,7 +276,10 @@ osm_vl15_init(
        IN osm_vendor_t* const p_vend,
        IN osm_log_t* const p_log,
        IN osm_stats_t* const p_stats,
-       IN const int32_t max_wire_smps );
+       IN const int32_t max_wire_smps,
+       IN osm_subn_t* const p_subn,
+       IN cl_dispatcher_t* const p_disp,
+       IN cl_plock_t* const p_lock );
 /*
 * PARAMETERS
 *      p_vl15
@@ -280,6 +297,15 @@ osm_vl15_init(
 *      max_wire_smps
 *              [in] Maximum number of MADs allowed on the wire at one time.
 *
+*  p_subn
+*     [in] Pointer to the subnet object.
+*
+*  p_disp
+*     [in] Pointer to the dispatcher object.
+*
+*      p_lock
+*              [in] Pointer to the OpenSM serializing lock.
+*
 * RETURN VALUES
 *      IB_SUCCESS if the VL15 object was initialized successfully.
 *
index 925d227..7ccfc81 100644 (file)
@@ -254,7 +254,8 @@ osm_opensm_init(
 
    status = osm_vl15_init( &p_osm->vl15,
                            p_osm->p_vendor,
-                           &p_osm->log, &p_osm->stats, p_opt->max_wire_smps );
+                           &p_osm->log, &p_osm->stats, p_opt->max_wire_smps,
+                           &p_osm->subn, &p_osm->disp,  &p_osm->lock );
    if( status != IB_SUCCESS )
       goto Exit;
 
index f15836b..c01b808 100644 (file)
@@ -96,7 +96,17 @@ __osm_sm_mad_ctrl_retire_trans_mad(
 
   osm_mad_pool_put( p_ctrl->p_mad_pool, p_madw );
 
-  cl_atomic_dec( &p_ctrl->p_stats->qp0_mads_outstanding );
+  if ( !&p_ctrl->p_stats->qp0_mads_outstanding )
+  {    
+    osm_log( p_ctrl->p_log, OSM_LOG_ERROR,
+             "__osm_sm_mad_ctrl_retire_trans_mad: ERR 3120: "
+             "Trying to dec qp0_mads_outstanding=0. "
+             "Problem with transaction mgr!\n");
+  }
+  else
+  {
+    cl_atomic_dec( &p_ctrl->p_stats->qp0_mads_outstanding );
+  }
 
   if( osm_log_is_active( p_ctrl->p_log, OSM_LOG_DEBUG ) )
   {
@@ -222,8 +232,19 @@ __osm_sm_mad_ctrl_update_wire_stats(
 
   OSM_LOG_ENTER( p_ctrl->p_log, __osm_sm_mad_ctrl_update_wire_stats );
 
-  mads_on_wire = cl_atomic_dec(
-    &p_ctrl->p_stats->qp0_mads_outstanding_on_wire );
+  /* Make sure we are not decrementing below zero */
+  if ( !&p_ctrl->p_stats->qp0_mads_outstanding_on_wire )
+  {    
+    osm_log( p_ctrl->p_log, OSM_LOG_ERROR,
+             "__osm_sm_mad_ctrl_update_wire_stats: ERR 3105: "
+             "Trying to dec qp0_mads_outstanding_on_wire=0. "
+             "Problem with transaction mgr!\n");
+  }
+  else
+  {
+    mads_on_wire = cl_atomic_dec(
+      &p_ctrl->p_stats->qp0_mads_outstanding_on_wire );
+  }
 
   if( osm_log_is_active( p_ctrl->p_log, OSM_LOG_DEBUG ) )
   {
index 1b9aa20..ae38285 100644 (file)
@@ -154,6 +154,8 @@ __osm_vl15_poller(
 
       if( status != IB_SUCCESS )
       {
+        uint32_t outstanding;
+        cl_status_t cl_status;
         osm_log( p_vl->p_log, OSM_LOG_ERROR,
                  "__osm_vl15_poller: ERR 3E03: "
                  "MAD send failed (%s).\n",
@@ -161,9 +163,71 @@ __osm_vl15_poller(
 
         /*
           The MAD was never successfully sent, so
-          Fix-up the pre-incremented count values.
+          fix up the pre-incremented count values.
         */
+        /* Decrement qp0_mads_sent and qp0_mads_outstanding_on_wire
+           that was incremented in the code above. */
         mads_sent = cl_atomic_dec( &p_vl->p_stats->qp0_mads_sent );
+        if( p_madw->resp_expected == TRUE ) 
+          cl_atomic_dec( &p_vl->p_stats->qp0_mads_outstanding_on_wire ); 
+
+        /* 
+           The following code is similar to the one in 
+           __osm_sm_mad_ctrl_retire_trans_mad. We need to decrement the 
+           qp0_mads_outstanding counter, and if we reached 0 - need to call
+           the cl_disp_post with OSM_SIGNAL_NO_PENDING_TRANSACTION (in order
+           to wake up the state mgr).
+        */
+        cl_atomic_dec( &p_vl->p_stats->qp0_mads_outstanding );
+        
+        osm_log( p_vl->p_log, OSM_LOG_DEBUG,
+                 "__osm_vl15_poller: "
+                 "%u QP0 MADs outstanding.\n",
+                 p_vl->p_stats->qp0_mads_outstanding );
+        
+        /*
+          Acquire the lock non-exclusively.
+          Other modules that send MADs grab this lock exclusively.
+          These modules that are in the process of sending MADs
+          will hold the lock until they finish posting all the MADs
+          they plan to send.  While the other module is sending MADs
+          the outstanding count may temporarily go to zero.
+          Thus, by grabbing the lock ourselves, we get an accurate
+          view of whether or not the number of outstanding MADs is
+          really zero.
+        */
+        CL_PLOCK_ACQUIRE( p_vl->p_lock );
+        outstanding = p_vl->p_stats->qp0_mads_outstanding;
+        CL_PLOCK_RELEASE( p_vl->p_lock );
+
+        if( outstanding == 0 )
+        {
+          /*
+            The wire is clean.
+            Signal the state manager.
+          */
+          if( osm_log_is_active( p_vl->p_log, OSM_LOG_DEBUG ) )
+          {
+            osm_log( p_vl->p_log, OSM_LOG_DEBUG,
+                     "__osm_vl15_poller: "
+                     "Posting Dispatcher message %s.\n",
+                     osm_get_disp_msg_str( OSM_MSG_NO_SMPS_OUTSTANDING ) );
+          }
+          
+          cl_status = cl_disp_post( p_vl->h_disp,
+                                    OSM_MSG_NO_SMPS_OUTSTANDING,
+                                    (void *)OSM_SIGNAL_NO_PENDING_TRANSACTIONS,
+                                    NULL,
+                                    NULL );
+          
+          if( cl_status != CL_SUCCESS )
+          {
+            osm_log( p_vl->p_log, OSM_LOG_ERROR,
+                     "__osm_vl15_poller: ERR 3E06: "
+                     "Dispatcher post message failed (%s).\n",
+                     CL_STATUS_MSG( cl_status ) );
+          }
+        }
       }
       else
       {
@@ -171,8 +235,8 @@ __osm_vl15_poller(
         {
           osm_log( p_vl->p_log, OSM_LOG_DEBUG,
                    "__osm_vl15_poller: "
-                   "%u on wire, %u outstanding, %u unicasts sent, "
-                   "%u sent total.\n",
+                   "%u QP0 MADs on wire, %u outstanding, %u unicasts sent, "
+                   "%u total sent.\n",
                    p_vl->p_stats->qp0_mads_outstanding_on_wire,
                    p_vl->p_stats->qp0_mads_outstanding,
                    p_vl->p_stats->qp0_unicasts_sent,
@@ -229,6 +293,7 @@ osm_vl15_construct(
   cl_qlist_init( &p_vl->rfifo );
   cl_qlist_init( &p_vl->ufifo );
   cl_thread_construct( &p_vl->poller );
+  p_vl->h_disp = CL_DISP_INVALID_HANDLE;
 }
 
 /**********************************************************************
@@ -278,6 +343,8 @@ osm_vl15_destroy(
   p_vl->state = OSM_VL15_STATE_INIT;
   cl_spinlock_destroy( &p_vl->lock );
 
+  cl_disp_unregister( p_vl->h_disp );
+
   OSM_LOG_EXIT( p_vl->p_log );
 }
 
@@ -289,7 +356,11 @@ osm_vl15_init(
   IN osm_vendor_t* const p_vend,
   IN osm_log_t* const p_log,
   IN osm_stats_t* const p_stats,
-  IN const int32_t max_wire_smps )
+  IN const int32_t max_wire_smps,
+  IN osm_subn_t* const p_subn,
+  IN cl_dispatcher_t* const p_disp,
+  IN cl_plock_t* const p_lock
+ )
 {
   ib_api_status_t status = IB_SUCCESS;
   OSM_LOG_ENTER( p_log, osm_vl15_init );
@@ -298,6 +369,8 @@ osm_vl15_init(
   p_vl->p_log = p_log;
   p_vl->p_stats = p_stats;
   p_vl->max_wire_smps = max_wire_smps;
+  p_vl->p_subn = p_subn;
+  p_vl->p_lock = p_lock;
 
   status = cl_event_init( &p_vl->signal, FALSE );
   if( status != IB_SUCCESS )
@@ -318,6 +391,21 @@ osm_vl15_init(
   if( status != IB_SUCCESS )
     goto Exit;
 
+  p_vl->h_disp = cl_disp_register(
+    p_disp,
+    CL_DISP_MSGID_NONE,
+    NULL,
+    NULL );
+
+  if( p_vl->h_disp == CL_DISP_INVALID_HANDLE )
+  {
+    osm_log( p_log, OSM_LOG_ERROR,
+             "osm_vl15_init: ERR 3E01: "
+             "Dispatcher registration failed.\n" );
+    status = IB_INSUFFICIENT_RESOURCES;
+    goto Exit;
+  }
+
  Exit:
   OSM_LOG_EXIT( p_log );
   return( status );
@@ -396,7 +484,7 @@ osm_vl15_post(
   {
     osm_log( p_vl->p_log, OSM_LOG_DEBUG,
              "osm_vl15_post: "
-             "%u MADs on wire, %u MADs outstanding.\n",
+             "%u QP0 MADs on wire, %u QP0 MADs outstanding.\n",
              p_vl->p_stats->qp0_mads_outstanding_on_wire,
              p_vl->p_stats->qp0_mads_outstanding );
   }