[DAPL2] DAPL2 debug cleanup, along with patches to IBAL provider to correctly handle...
authorstansmith <stansmith@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Fri, 18 Jul 2008 23:33:50 +0000 (23:33 +0000)
committerstansmith <stansmith@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Fri, 18 Jul 2008 23:33:50 +0000 (23:33 +0000)
Fixed the async QP & CQ error handlers as they had not been updated with latest typedef's (ib_async_qp_handler_t & ib_async_cq_handler_t), hence calling arg mismatch vs. handling definitions. Resulted in bad EP pointer dereference during Async QP error handling.
Added debug value DAPL_DBG_EVD_DEQUEUE to throttle debug output when watching EVD dequeue operations.

git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@1418 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

ulp/dapl2/dapl/common/dapl_ep_util.c
ulp/dapl2/dapl/common/dapl_evd_dequeue.c
ulp/dapl2/dapl/common/dapl_evd_qp_async_error_callb.c
ulp/dapl2/dapl/common/dapl_evd_util.c
ulp/dapl2/dapl/common/dapl_ia_util.c
ulp/dapl2/dapl/ibal/dapl_ibal_cm.c
ulp/dapl2/dapl/ibal/dapl_ibal_cq.c
ulp/dapl2/dapl/ibal/dapl_ibal_qp.c
ulp/dapl2/dapl/ibal/dapl_ibal_util.h
ulp/dapl2/dapl/include/dapl_debug.h

index cc8f890..bea9b92 100644 (file)
@@ -66,7 +66,6 @@ extern void dapli_ep_default_attrs (
 \r
 char *dapl_get_ep_state_str(DAT_EP_STATE state)\r
 {\r
-#ifdef DAPL_DBG\r
     static char *state_str[DAT_EP_STATE_CONNECTED_MULTI_PATH+1] = {\r
         "DAT_EP_STATE_UNCONNECTED",                  /* quiescent state */\r
         "DAT_EP_STATE_UNCONFIGURED_UNCONNECTED",\r
@@ -86,11 +85,6 @@ char *dapl_get_ep_state_str(DAT_EP_STATE state)
      };\r
      return (state > DAT_EP_STATE_CONNECTED_MULTI_PATH ?\r
                "BAD EP STATE" : state_str[state]);\r
-#else\r
-    static char buf[12];\r
-    sprintf(buf,"%d",state);\r
-    return buf;\r
-#endif\r
 }\r
 \r
 \r
index 6f5274b..95fa5fa 100644 (file)
@@ -71,7 +71,7 @@ DAT_RETURN DAT_API dapl_evd_dequeue (
     DAT_EVENT  *local_event;\r
     DAT_RETURN dat_status;\r
 \r
-    dapl_dbg_log (DAPL_DBG_TYPE_API,\r
+    dapl_dbg_log (DAPL_DBG_EVD_DEQUEUE,\r
                  "dapl_evd_dequeue (%p, %p)\n",\r
                  evd_handle, \r
                  event);\r
index eebff2f..82f1da6 100644 (file)
@@ -99,29 +99,35 @@ dapl_evd_qp_async_error_callback (
     DAT_RETURN         dat_status;\r
 \r
 #ifdef _VENDOR_IBAL_\r
-    dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() IB err %s\n",\r
-                       __FUNCTION__, ib_get_async_event_str(cause_ptr->code));\r
+    dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() IB err %s ctx %p\n", __FUNCTION__,\r
+                       ib_get_async_event_str(cause_ptr->code), context);\r
 #else\r
     dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() IB async QP err - ctx=%p\n",\r
                        __FUNCTION__, context);\r
 #endif\r
 \r
-    ep_ptr    = (DAPL_EP *) context;\r
+    ep_ptr = (DAPL_EP *) context;\r
     if ( !ep_ptr ) {\r
         dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() NULL context?\n",__FUNCTION__);\r
         return;\r
     }\r
 \r
+    if (DAPL_BAD_HANDLE (ep_ptr, DAPL_MAGIC_EP )) {\r
+        dapl_dbg_log (DAPL_DBG_TYPE_ERR,\r
+                       "%s() BAD EP Handle %p\n", __FUNCTION__, ep_ptr);\r
+        return;\r
+    } \r
+\r
     ia_ptr    = ep_ptr->header.owner_ia;\r
     async_evd = (DAPL_EVD *) ia_ptr->async_error_evd;\r
 \r
     dapl_dbg_log (\r
        DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION,\r
-       "--> %s: ep %p qp %p (%x) state %d\n", __FUNCTION__,\r
+       "--> %s: ep %p qp %p (%x) state %s\n", __FUNCTION__,\r
        ep_ptr, \r
        ep_ptr->qp_handle, \r
        ep_ptr->qpn,\r
-       ep_ptr->param.ep_state);\r
+       dapl_get_ep_state_str( ep_ptr->param.ep_state) );\r
 \r
     /*\r
      * Transition to ERROR if we are connected; other states need to\r
@@ -147,7 +153,7 @@ dapl_evd_qp_async_error_callback (
                                              async_evd->header.owner_ia);\r
     }\r
     dapl_dbg_log (DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION,\r
-                 "%s() returns\n",__FUNCTION__);\r
+                 "%s() returns %x\n",__FUNCTION__,dat_status);\r
 }\r
 \r
 /*\r
index ea72ee8..28b10a8 100644 (file)
@@ -959,6 +959,10 @@ dapls_evd_post_async_error_event (
     IN DAT_IA_HANDLE                   ia_handle)\r
 {\r
     DAT_EVENT          *event_ptr;\r
+\r
+    dapl_dbg_log (DAPL_DBG_TYPE_ERR, "%s: event_num %d\n",     \r
+                                       __FUNCTION__,event_number);\r
+\r
     event_ptr = dapli_evd_get_and_init_event (evd_ptr, event_number);\r
     /*\r
      * Note event lock may be held on successful return\r
@@ -968,6 +972,9 @@ dapls_evd_post_async_error_event (
 \r
     if (event_ptr == NULL)\r
     {\r
+       dapl_dbg_log (DAPL_DBG_TYPE_ERR,\r
+                       "%s: Err: unable to get evd by event num\n",\r
+                        __FUNCTION__,event_number);\r
        return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY);\r
     }\r
 \r
@@ -975,6 +982,9 @@ dapls_evd_post_async_error_event (
 \r
     dapli_evd_post_event (evd_ptr, event_ptr);\r
 \r
+    dapl_dbg_log (DAPL_DBG_TYPE_CALLBACK, "%s: posted evd %p num 0x%x(%d)\n",\r
+               __FUNCTION__, evd_ptr, event_number, event_number);\r
+\r
     return DAT_SUCCESS;\r
 }\r
 \r
index 7551a23..debf1a2 100644 (file)
@@ -1163,7 +1163,7 @@ dapls_ia_setup_callbacks (
        ia_ptr,\r
        DAPL_ASYNC_CQ_ERROR,\r
        NULL,\r
-       (ib_async_handler_t)dapl_evd_cq_async_error_callback,\r
+       (ib_async_cq_handler_t)dapl_evd_cq_async_error_callback,\r
        async_evd_ptr);\r
 \r
     if (dat_status != DAT_SUCCESS)\r
@@ -1178,7 +1178,7 @@ dapls_ia_setup_callbacks (
        ia_ptr,\r
        DAPL_ASYNC_QP_ERROR,\r
        NULL,\r
-       (ib_async_handler_t)dapl_evd_qp_async_error_callback,\r
+       (ib_async_qp_handler_t)dapl_evd_qp_async_error_callback,\r
        ia_ptr);\r
     if (dat_status != DAT_SUCCESS)\r
     {\r
index cfe2abe..fb7ccc5 100644 (file)
@@ -98,6 +98,36 @@ dapli_ib_cm_event_str(ib_cm_events_t e)
 }\r
 \r
 \r
+#if defined(DAPL_DBG)\r
+\r
+void dapli_print_private_data( char *prefix, const uint8_t *pd, int len )\r
+{\r
+    int i;\r
+            \r
+    if ( !pd || len <= 0 )\r
+       return;\r
+\r
+    dapl_log ( DAPL_DBG_TYPE_CM, "--> %s: private_data:\n    ",prefix);\r
+\r
+    if (len > IB_MAX_REP_PDATA_SIZE)\r
+    {\r
+       dapl_log ( DAPL_DBG_TYPE_ERR,\r
+               "    Private data size(%d) > Max(%d), ignored.\n    ",\r
+                                       len,DAPL_MAX_PRIVATE_DATA_SIZE);\r
+       len = IB_MAX_REP_PDATA_SIZE;\r
+    }\r
+\r
+    for ( i = 0 ; i < len; i++ )\r
+    {\r
+       dapl_log ( DAPL_DBG_TYPE_CM, "%2x ", pd[i]);\r
+       if ( ((i+1) % 20) == 0 ) \r
+           dapl_log ( DAPL_DBG_TYPE_CM, "\n    ");\r
+    }\r
+   dapl_log ( DAPL_DBG_TYPE_CM, "\n");\r
+}\r
+#endif\r
+\r
+\r
 static void \r
 dapli_ib_cm_apr_cb (\r
         IN    ib_cm_apr_rec_t          *p_cm_apr_rec )\r
@@ -345,23 +375,10 @@ dapli_ib_cm_rep_cb (
 \r
     prd_ptr = (DAPL_PRIVATE * __ptr64) p_cm_rep_rec->p_rep_pdata;\r
 \r
-#ifdef DAPL_DBG\r
-#if 0\r
-    {\r
-        int i;\r
-            \r
-        dapl_dbg_log ( DAPL_DBG_TYPE_EP, "--> DiCRpcb: private_data: ");\r
-\r
-        for ( i = 0 ; i < IB_MAX_REP_PDATA_SIZE ; i++ )\r
-        {\r
-            dapl_dbg_log ( DAPL_DBG_TYPE_EP, \r
-                                "0x%x ", prd_ptr->private_data[i]);\r
-                                 \r
-        }\r
-        dapl_dbg_log ( DAPL_DBG_TYPE_EP, "\n");\r
-                            \r
-    }\r
-#endif\r
+#if defined(DAPL_DBG) && 0\r
+    dapli_print_private_data( "DiCRpcb",\r
+                             prd_ptr->private_data,\r
+                             IB_MAX_REP_PDATA_SIZE);\r
 #endif\r
 \r
     dapl_evd_connection_callback ( \r
@@ -739,8 +756,8 @@ dapls_ib_cm_remote_addr (
  *        ep_handle,\r
  *        remote_ia_address,\r
  *        remote_conn_qual,\r
- *          prd_size                size of private data and structure\r
- *          prd_prt                pointer to private data structure\r
+ *        prd_size               size of private data and structure\r
+ *        prd_prt                pointer to private data structure\r
  *\r
  * Output:\r
  *         none\r
@@ -756,8 +773,8 @@ dapls_ib_connect (
         IN        DAT_EP_HANDLE                ep_handle,\r
         IN        DAT_IA_ADDRESS_PTR           remote_ia_address,\r
         IN        DAT_CONN_QUAL                remote_conn_qual,\r
-        IN        DAT_COUNT                    prd_size,\r
-        IN        DAPL_PRIVATE                 *prd_ptr )\r
+        IN        DAT_COUNT                    private_data_size,\r
+        IN        DAT_PVOID                    private_data )\r
 {\r
     DAPL_EP                      *ep_ptr;\r
     DAPL_IA                      *ia_ptr;\r
@@ -922,8 +939,9 @@ dapls_ib_connect (
     cm_req.p_alt_path       = NULL;\r
     cm_req.h_qp             = ep_ptr->qp_handle;\r
     cm_req.qp_type          = IB_QPT_RELIABLE_CONN;\r
-    cm_req.p_req_pdata      = (uint8_t *) prd_ptr;\r
-    cm_req.req_length       = (uint8_t)prd_size;\r
+    cm_req.p_req_pdata      = (uint8_t *) private_data;\r
+    cm_req.req_length       = (uint8_t)\r
+                               min(private_data_size,IB_MAX_REQ_PDATA_SIZE);\r
     /* cm retry to send this request messages, IB max of 4 bits */\r
     cm_req.max_cm_retries   = 15; /* timer outside of call, s/be infinite */\r
     /* qp retry to send any wr */\r
@@ -1312,9 +1330,23 @@ dapls_ib_reject_connection ( IN  dp_ib_cm_handle_t   ib_cm_handle,
     cm_rej.rej_status   = IB_REJ_USER_DEFINED;\r
     cm_rej.p_ari        = (ib_ari_t *)&rej_table[reject_reason]; \r
     cm_rej.ari_length   = (uint8_t)strlen (rej_table[reject_reason]);\r
+\r
+    if (private_data_size > \r
+       dapls_ib_private_data_size(NULL,DAPL_PDATA_CONN_REJ,NULL))\r
+    {\r
+        dapl_dbg_log ( DAPL_DBG_TYPE_ERR,\r
+                       "--> DsRjC: private_data size(%d) > Max(%d)\n", \r
+                       private_data_size, IB_MAX_REJ_PDATA_SIZE );\r
+       return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);\r
+    }\r
+\r
     cm_rej.p_rej_pdata  = private_data;\r
     cm_rej.rej_length   = private_data_size;\r
 \r
+#if defined(DAPL_DBG) && 0\r
+    dapli_print_private_data("DsRjC",private_data,private_data_size);\r
+#endif\r
+\r
     ib_status = ib_cm_rej ( *ib_cm_handle, &cm_rej);\r
 \r
     if (ib_status != IB_SUCCESS)\r
@@ -1363,8 +1395,8 @@ dapli_query_qp( ib_qp_handle_t qp_handle, ib_qp_attr_t  *qpa )
  * Input:\r
  *        cr_handle\r
  *        ep_handle\r
- *        private_data_size - ignored as DAT layer sets 0\r
- *        private_data      - ignored as DAT layer sets NULL\r
+ *        private_data_size\r
+ *        private_data\r
  *\r
  * Output:\r
  *         none\r
@@ -1379,8 +1411,8 @@ DAT_RETURN
 dapls_ib_accept_connection (\r
         IN        DAT_CR_HANDLE            cr_handle,\r
         IN        DAT_EP_HANDLE            ep_handle,\r
-        IN        DAT_COUNT                p_size,\r
-        IN        DAPL_PRIVATE             *prd_ptr )\r
+        IN        DAT_COUNT                private_data_size,\r
+        IN const  DAT_PVOID                private_data )\r
 {\r
     DAPL_CR                *cr_ptr;\r
     DAPL_EP                *ep_ptr;\r
@@ -1467,24 +1499,21 @@ dapls_ib_accept_connection (
 \r
     cm_rep.h_qp           = ep_ptr->qp_handle;\r
     cm_rep.qp_type        = IB_QPT_RELIABLE_CONN;\r
-    cm_rep.p_rep_pdata    = (uint8_t *) cr_ptr->private_data;\r
-    cm_rep.rep_length     = IB_REQ_PDATA_SIZE;\r
 \r
-#if defined(DAPL_DBG) && 0\r
-    {\r
-        int i;\r
-            \r
-        dapl_dbg_log ( DAPL_DBG_TYPE_EP, "--> DsAC: private_data: ");\r
+    if (private_data_size > IB_MAX_REP_PDATA_SIZE) {\r
+       dapl_dbg_log (DAPL_DBG_TYPE_ERR,\r
+                       "--> DsIBAC: private_data_size(%d) > Max(%d)\n",\r
+                       private_data_size, IB_MAX_REP_PDATA_SIZE);\r
+       return DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);\r
+       \r
+    } \r
+    cm_rep.p_rep_pdata    = (const uint8_t *)private_data;\r
+    cm_rep.rep_length     = private_data_size;\r
 \r
-        for ( i = 0 ; i < IB_MAX_REP_PDATA_SIZE ; i++ )\r
-        {\r
-            dapl_dbg_log ( DAPL_DBG_TYPE_EP, \r
-                                "0x%x ", prd_ptr->private_data[i]);\r
-                                 \r
-        }\r
-        dapl_dbg_log ( DAPL_DBG_TYPE_EP, "\n");\r
-                            \r
-    }\r
+#if defined(DAPL_DBG) && 0\r
+    dapli_print_private_data( "DsIBAC",\r
+                             (const uint8_t*)private_data,\r
+                             private_data_size );\r
 #endif\r
 \r
     cm_rep.pfn_cm_rej_cb = dapli_ib_cm_rej_cb;\r
index 575f4ef..45f2289 100644 (file)
@@ -102,7 +102,9 @@ dapli_ibal_cq_async_error_callback ( IN  ib_async_event_rec_t  *p_err_rec )
 \r
     /* maps to dapl_evd_cq_async_error_callback(), context is EVD */\r
     evd_cb->pfn_async_cq_err_cb( (ib_hca_handle_t)p_ca, \r
-                               (ib_error_record_t*)&p_err_rec->code, evd_ptr);\r
+                                evd_ptr->ib_cq_handle,\r
+                                (ib_error_record_t*)&p_err_rec->code,\r
+                                evd_ptr );\r
 \r
 }\r
 \r
index bd8f538..0cd9827 100644 (file)
@@ -101,7 +101,7 @@ dapli_ib_qp_async_error_cb( IN  ib_async_event_rec_t* p_err_rec )
        if ((evd_cb == NULL) || (evd_cb->pfn_async_qp_err_cb == NULL))\r
        {\r
                dapl_dbg_log (DAPL_DBG_TYPE_ERR,\r
-                       "--> DiQpAEC: no ERROR cb on %p found \n", p_ca);\r
+                       "--> DiQpAEC: no ERROR cb on p_ca %p found\n", p_ca);\r
                return;\r
        }\r
 \r
@@ -111,11 +111,12 @@ dapli_ib_qp_async_error_cb( IN  ib_async_event_rec_t* p_err_rec )
 \r
        /* force disconnect, QP error state, to insure DTO's get flushed */\r
        dapls_ib_disconnect ( ep_ptr, DAT_CLOSE_ABRUPT_FLAG );\r
-       \r
+\r
        /* maps to dapl_evd_qp_async_error_callback(), context is EP */\r
-       evd_cb->pfn_async_qp_err_cb(    (ib_hca_handle_t)p_ca, \r
-                                       (ib_error_record_t*)&p_err_rec->code,\r
-                                       ep_ptr);\r
+       evd_cb->pfn_async_qp_err_cb( (ib_hca_handle_t)p_ca, \r
+                                    ep_ptr->qp_handle,\r
+                                    (ib_error_record_t*)&p_err_rec->code,\r
+                                    ep_ptr );\r
 }\r
 \r
 /*\r
index beab1ff..6d3bec4 100644 (file)
@@ -82,6 +82,18 @@ typedef void (*ib_async_handler_t)(
     IN    ib_error_record_t  *err_code,\r
     IN    void               *context);\r
 \r
+typedef void (*ib_async_qp_handler_t)(\r
+    IN    ib_hca_handle_t    ib_hca_handle,\r
+    IN    ib_qp_handle_t     ib_qp_handle,\r
+    IN    ib_error_record_t  *err_code,\r
+    IN    void               *context);\r
+\r
+typedef void (*ib_async_cq_handler_t)(\r
+    IN    ib_hca_handle_t    ib_hca_handle,\r
+    IN    ib_cq_handle_t     ib_cq_handle,\r
+    IN    ib_error_record_t  *err_code,\r
+    IN    void               *context);\r
+\r
 typedef ib_net64_t   ib_guid_t;\r
 typedef ib_net16_t   ib_lid_t;\r
 typedef boolean_t    ib_bool_t;\r
@@ -210,11 +222,11 @@ typedef struct _dapl_ibal_port
 \r
 typedef struct _dapl_ibal_evd_cb\r
 {\r
-    cl_list_item_t     next;        // peer CA list\r
-    ib_async_handler_t pfn_async_err_cb;\r
-    ib_async_handler_t pfn_async_qp_err_cb;\r
-    ib_async_handler_t pfn_async_cq_err_cb;\r
-    void               *context;\r
+    cl_list_item_t         next;        // peer CA list\r
+    ib_async_handler_t     pfn_async_err_cb;\r
+    ib_async_qp_handler_t  pfn_async_qp_err_cb;\r
+    ib_async_cq_handler_t  pfn_async_cq_err_cb;\r
+    void                   *context;\r
 } dapl_ibal_evd_cb_t;\r
 \r
 /*\r
index c7ade44..023ead9 100644 (file)
@@ -65,7 +65,8 @@ typedef enum
     DAPL_DBG_TYPE_RTN          = 0x0200,\r
     DAPL_DBG_TYPE_EXCEPTION    = 0x0400,\r
     DAPL_DBG_TYPE_SRQ          = 0x0800,\r
-    DAPL_DBG_TYPE_CNTR         = 0x1000\r
+    DAPL_DBG_TYPE_CNTR         = 0x1000,\r
+       DAPL_DBG_EVD_DEQUEUE    = 0x2000\r
 \r
 } DAPL_DBG_TYPE;\r
 \r