Backported some 2.6.32 IB bug fixes to the 2.6.30 kernel.
authorbvassche <bvassche@d57e44dd-8a1f-0410-8b47-8ef2f437770f>
Sun, 10 Jan 2010 13:14:28 +0000 (13:14 +0000)
committerbvassche <bvassche@d57e44dd-8a1f-0410-8b47-8ef2f437770f>
Sun, 10 Jan 2010 13:14:28 +0000 (13:14 +0000)
git-svn-id: https://scst.svn.sourceforge.net/svnroot/scst/trunk@1442 d57e44dd-8a1f-0410-8b47-8ef2f437770f

srpt/patches/kernel-2.6.30-4e49627b9bc29a14b393c480e8c979e3bc922ef7-introduce-__cancel_delayed_work.patch [new file with mode: 0644]
srpt/patches/kernel-2.6.30-6b2eef8fd78ff909c3396b8671d57c42559cc51d-ib-mad-fix-possible-lock-lock-timer-deadlock.patch [new file with mode: 0644]
srpt/patches/kernel-2.6.30-721d67cdca5b7642b380ca0584de8dceecf6102f-drop-priv-lock-before-calling-ipoib_send.patch [new file with mode: 0644]

diff --git a/srpt/patches/kernel-2.6.30-4e49627b9bc29a14b393c480e8c979e3bc922ef7-introduce-__cancel_delayed_work.patch b/srpt/patches/kernel-2.6.30-4e49627b9bc29a14b393c480e8c979e3bc922ef7-introduce-__cancel_delayed_work.patch
new file mode 100644 (file)
index 0000000..62914fd
--- /dev/null
@@ -0,0 +1,60 @@
+This patch was introduced in kernel 2.6.31 with the following description:
+
+commit 4e49627b9bc29a14b393c480e8c979e3bc922ef7
+Author: Oleg Nesterov <oleg@redhat.com>
+Date:   Sat Sep 5 11:17:06 2009 -0700
+
+    workqueues: introduce __cancel_delayed_work()
+
+    cancel_delayed_work() has to use del_timer_sync() to guarantee the timer
+    function is not running after return.  But most users doesn't actually
+    need this, and del_timer_sync() has problems: it is not useable from
+    interrupt, and it depends on every lock which could be taken from irq.
+
+    Introduce __cancel_delayed_work() which calls del_timer() instead.
+
+    The immediate reason for this patch is
+    http://bugzilla.kernel.org/show_bug.cgi?id=13757
+    but hopefully this helper makes sense anyway.
+
+    As for 13757 bug, actually we need requeue_delayed_work(), but its
+    semantics are not yet clear.
+
+    Merge this patch early to resolves cross-tree interdependencies between
+    input and infiniband.
+
+    Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+    Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+    Cc: Roland Dreier <rdreier@cisco.com>
+    Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
+    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+---
+
+diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
+index 13e1adf..6273fa9 100644
+--- a/include/linux/workqueue.h
++++ b/include/linux/workqueue.h
+@@ -240,6 +240,21 @@ static inline int cancel_delayed_work(struct delayed_work *work)
+       return ret;
+ }
++/*
++ * Like above, but uses del_timer() instead of del_timer_sync(). This means,
++ * if it returns 0 the timer function may be running and the queueing is in
++ * progress.
++ */
++static inline int __cancel_delayed_work(struct delayed_work *work)
++{
++      int ret;
++
++      ret = del_timer(&work->timer);
++      if (ret)
++              work_clear_pending(&work->work);
++      return ret;
++}
++
+ extern int cancel_delayed_work_sync(struct delayed_work *work);
+ /* Obsolete. use cancel_delayed_work_sync() */
diff --git a/srpt/patches/kernel-2.6.30-6b2eef8fd78ff909c3396b8671d57c42559cc51d-ib-mad-fix-possible-lock-lock-timer-deadlock.patch b/srpt/patches/kernel-2.6.30-6b2eef8fd78ff909c3396b8671d57c42559cc51d-ib-mad-fix-possible-lock-lock-timer-deadlock.patch
new file mode 100644 (file)
index 0000000..fa04216
--- /dev/null
@@ -0,0 +1,63 @@
+commit 6b2eef8fd78ff909c3396b8671d57c42559cc51d
+Author: Roland Dreier <rolandd@cisco.com>
+Date:   Mon Sep 7 08:27:50 2009 -0700
+
+    IB/mad: Fix possible lock-lock-timer deadlock
+    
+    Lockdep reported a possible deadlock with cm_id_priv->lock,
+    mad_agent_priv->lock and mad_agent_priv->timed_work.timer; this
+    happens because the mad module does
+    
+       cancel_delayed_work(&mad_agent_priv->timed_work);
+    
+    while holding mad_agent_priv->lock.  cancel_delayed_work() internally
+    does del_timer_sync(&mad_agent_priv->timed_work.timer).
+    
+    This can turn into a deadlock because mad_agent_priv->lock is taken
+    inside cm_id_priv->lock, so we can get the following set of contexts
+    that deadlock each other:
+    
+     A: holding cm_id_priv->lock, waiting for mad_agent_priv->lock
+     B: holding mad_agent_priv->lock, waiting for del_timer_sync()
+     C: interrupt during mad_agent_priv->timed_work.timer that takes
+        cm_id_priv->lock
+    
+    Fix this by using the new __cancel_delayed_work() interface (which
+    internally does del_timer() instead of del_timer_sync()) in all the
+    places where we are holding a lock.
+    
+    Addresses: http://bugzilla.kernel.org/show_bug.cgi?id=13757
+    Reported-by: Bart Van Assche <bart.vanassche@gmail.com>
+    Signed-off-by: Roland Dreier <rolandd@cisco.com>
+
+diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
+index de922a0..bc30c00 100644
+--- a/drivers/infiniband/core/mad.c
++++ b/drivers/infiniband/core/mad.c
+@@ -1974,7 +1974,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
+       unsigned long delay;
+       if (list_empty(&mad_agent_priv->wait_list)) {
+-              cancel_delayed_work(&mad_agent_priv->timed_work);
++              __cancel_delayed_work(&mad_agent_priv->timed_work);
+       } else {
+               mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
+                                        struct ib_mad_send_wr_private,
+@@ -1983,7 +1983,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
+               if (time_after(mad_agent_priv->timeout,
+                              mad_send_wr->timeout)) {
+                       mad_agent_priv->timeout = mad_send_wr->timeout;
+-                      cancel_delayed_work(&mad_agent_priv->timed_work);
++                      __cancel_delayed_work(&mad_agent_priv->timed_work);
+                       delay = mad_send_wr->timeout - jiffies;
+                       if ((long)delay <= 0)
+                               delay = 1;
+@@ -2023,7 +2023,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
+       /* Reschedule a work item if we have a shorter timeout */
+       if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
+-              cancel_delayed_work(&mad_agent_priv->timed_work);
++              __cancel_delayed_work(&mad_agent_priv->timed_work);
+               queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+                                  &mad_agent_priv->timed_work, delay);
+       }
diff --git a/srpt/patches/kernel-2.6.30-721d67cdca5b7642b380ca0584de8dceecf6102f-drop-priv-lock-before-calling-ipoib_send.patch b/srpt/patches/kernel-2.6.30-721d67cdca5b7642b380ca0584de8dceecf6102f-drop-priv-lock-before-calling-ipoib_send.patch
new file mode 100644 (file)
index 0000000..244bfff
--- /dev/null
@@ -0,0 +1,63 @@
+This is a backported version of a 2.6.32 patch with the following description:
+commit 721d67cdca5b7642b380ca0584de8dceecf6102f
+Author: Roland Dreier <rolandd@cisco.com>
+Date:   Sat Sep 5 20:23:40 2009 -0700
+
+    IPoIB: Drop priv->lock before calling ipoib_send()
+
+    IPoIB currently must use irqsave locking for priv->lock, since it is
+    taken from interrupt context in one path.  However, ipoib_send() does
+    skb_orphan(), and the network stack locking is not IRQ-safe.
+    Therefore we need to make sure we don't hold priv->lock when calling
+    ipoib_send() to avoid lockdep warnings (the code was almost certainly
+    safe in practice, since the only code path that takes priv->lock from
+    interrupt context would never call into the network stack).
+
+    Addresses: http://bugzilla.kernel.org/show_bug.cgi?id=13757
+    Reported-by: Bart Van Assche <bart.vanassche@gmail.com>
+    Signed-off-by: Roland Dreier <rolandd@cisco.com>
+
+---
+
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+index e319d91..2bf5116 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -604,8 +604,11 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
+                                          skb_queue_len(&neigh->queue));
+                               goto err_drop;
+                       }
+-              } else
++              } else {
++                      spin_unlock_irqrestore(&priv->lock, flags);
+                       ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
++                      return;
++              }
+       } else {
+               neigh->ah  = NULL;
+@@ -688,7 +691,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
+               ipoib_dbg(priv, "Send unicast ARP to %04x\n",
+                         be16_to_cpu(path->pathrec.dlid));
++              spin_unlock_irqrestore(&priv->lock, flags);
+               ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
++              return;
+       } else if ((path->query || !path_rec_start(dev, path)) &&
+                  skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+               /* put pseudoheader back on for next time */
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+index a0e9753..a0825fe 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+@@ -720,7 +720,9 @@ out:
+                       }
+               }
++              spin_unlock_irqrestore(&priv->lock, flags);
+               ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
++              return;
+       }
+ unlock: