Backported some 2.6.32 IB bug fixes to the 2.6.30 kernel.
[mirror/scst/.git] / srpt / patches / kernel-2.6.30-6b2eef8fd78ff909c3396b8671d57c42559cc51d-ib-mad-fix-possible-lock-lock-timer-deadlock.patch
1 commit 6b2eef8fd78ff909c3396b8671d57c42559cc51d
2 Author: Roland Dreier <rolandd@cisco.com>
3 Date:   Mon Sep 7 08:27:50 2009 -0700
4
5     IB/mad: Fix possible lock-lock-timer deadlock
6     
7     Lockdep reported a possible deadlock with cm_id_priv->lock,
8     mad_agent_priv->lock and mad_agent_priv->timed_work.timer; this
9     happens because the mad module does
10     
11         cancel_delayed_work(&mad_agent_priv->timed_work);
12     
13     while holding mad_agent_priv->lock.  cancel_delayed_work() internally
14     does del_timer_sync(&mad_agent_priv->timed_work.timer).
15     
16     This can turn into a deadlock because mad_agent_priv->lock is taken
17     inside cm_id_priv->lock, so we can get the following set of contexts
18     that deadlock each other:
19     
20      A: holding cm_id_priv->lock, waiting for mad_agent_priv->lock
21      B: holding mad_agent_priv->lock, waiting for del_timer_sync()
22      C: interrupt during mad_agent_priv->timed_work.timer that takes
23         cm_id_priv->lock
24     
25     Fix this by using the new __cancel_delayed_work() interface (which
26     internally does del_timer() instead of del_timer_sync()) in all the
27     places where we are holding a lock.
28     
29     Addresses: http://bugzilla.kernel.org/show_bug.cgi?id=13757
30     Reported-by: Bart Van Assche <bart.vanassche@gmail.com>
31     Signed-off-by: Roland Dreier <rolandd@cisco.com>
32
33 diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
34 index de922a0..bc30c00 100644
35 --- a/drivers/infiniband/core/mad.c
36 +++ b/drivers/infiniband/core/mad.c
37 @@ -1974,7 +1974,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
38         unsigned long delay;
39  
40         if (list_empty(&mad_agent_priv->wait_list)) {
41 -               cancel_delayed_work(&mad_agent_priv->timed_work);
42 +               __cancel_delayed_work(&mad_agent_priv->timed_work);
43         } else {
44                 mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
45                                          struct ib_mad_send_wr_private,
46 @@ -1983,7 +1983,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
47                 if (time_after(mad_agent_priv->timeout,
48                                mad_send_wr->timeout)) {
49                         mad_agent_priv->timeout = mad_send_wr->timeout;
50 -                       cancel_delayed_work(&mad_agent_priv->timed_work);
51 +                       __cancel_delayed_work(&mad_agent_priv->timed_work);
52                         delay = mad_send_wr->timeout - jiffies;
53                         if ((long)delay <= 0)
54                                 delay = 1;
55 @@ -2023,7 +2023,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
56  
57         /* Reschedule a work item if we have a shorter timeout */
58         if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
59 -               cancel_delayed_work(&mad_agent_priv->timed_work);
60 +               __cancel_delayed_work(&mad_agent_priv->timed_work);
61                 queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
62                                    &mad_agent_priv->timed_work, delay);
63         }