Update ppp TX restart patch to avoid race condition

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> SVN-Revision: 31312
author: Jo-Philipp Wich <jow@openwrt.org> 2012-04-16 15:04:44 +0000
committer: Jo-Philipp Wich <jow@openwrt.org> 2012-04-16 15:04:44 +0000
commit: 2036c23faa0f6ed2784769d3f4a787dd76a0c520 (patch)
tree: 6e0e5f234d8893d2e7c6ed943fe26d597653f46f /target
parent: f638b544f3b2ffe568f581f0225760ef6cc31d92 (diff)
download: upstream-2036c23faa0f6ed2784769d3f4a787dd76a0c520.tar.gz
upstream-2036c23faa0f6ed2784769d3f4a787dd76a0c520.tar.bz2
upstream-2036c23faa0f6ed2784769d3f4a787dd76a0c520.zip
2 files changed, 118 insertions, 114 deletions
diff --git a/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch b/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch
index 84eb4e41ba..165801cac6 100644
--- a/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch
+++ b/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch
@@ -1,75 +1,77 @@
-For every transmitted packet, ppp_start_xmit() will stop the netdev
-queue and then, if appropriate, restart it. This causes the TX softirq
-to run, entirely gratuitously.
+commit 9a5d2bd99e0dfe9a31b3c160073ac445ba3d773f
+Author: David Woodhouse <dwmw2@infradead.org>
+Date:   Sun Apr 8 10:01:44 2012 +0000
 
-This is "only" a waste of CPU time in the normal case, but it's actively
-harmful when the PPP device is a TEQL slave — the wakeup will cause the
-offending device to receive the next TX packet from the TEQL queue, when
-it *should* have gone to the next slave in the list. We end up seeing
-large bursts of packets on just *one* slave device, rather than using
-the full available bandwidth over all slaves.
+    ppp: Fix race condition with queue start/stop
+    
+    Commit e675f0cc9a872fd152edc0c77acfed19bf28b81e ("ppp: Don't stop and
+    restart queue on every TX packet") introduced a race condition which
+    could leave the net queue stopped even when the channel is no longer
+    busy. By calling netif_stop_queue() from ppp_start_xmit(), based on the
+    return value from ppp_xmit_process() but *after* all the locks have been
+    dropped, we could potentially do so *after* the channel has actually
+    finished transmitting and attempted to re-wake the queue.
+    
+    Fix this by moving the netif_stop_queue() into ppp_xmit_process() under
+    the xmit lock. I hadn't done this previously, because it gets called
+    from other places than ppp_start_xmit(). But I now think it's the better
+    option. The net queue *should* be stopped if the channel becomes
+    congested due to writes from pppd, anyway.
+    
+    Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+    Signed-off-by: David S. Miller <davem@davemloft.net>
 
-This patch fixes the problem by *not* unconditionally stopping the queue
-in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
-which indicates whether the queue should be stopped or not.
+commit e675f0cc9a872fd152edc0c77acfed19bf28b81e
+Author: David Woodhouse <dwmw2@infradead.org>
+Date:   Mon Mar 26 00:03:42 2012 +0000
 
-It *doesn't* remove the call to netif_wake_queue() from
-ppp_xmit_process(), because other code paths (especially from
-ppp_output_wakeup()) need it there and it's messy to push it out to the
-other callers to do it based on the return value. So we leave it in
-place — it's a no-op in the case where the queue wasn't stopped, so it's
-harmless in the TX path.
+    ppp: Don't stop and restart queue on every TX packet
+    
+    For every transmitted packet, ppp_start_xmit() will stop the netdev
+    queue and then, if appropriate, restart it. This causes the TX softirq
+    to run, entirely gratuitously.
+    
+    This is "only" a waste of CPU time in the normal case, but it's actively
+    harmful when the PPP device is a TEQL slave — the wakeup will cause the
+    offending device to receive the next TX packet from the TEQL queue, when
+    it *should* have gone to the next slave in the list. We end up seeing
+    large bursts of packets on just *one* slave device, rather than using
+    the full available bandwidth over all slaves.
+    
+    This patch fixes the problem by *not* unconditionally stopping the queue
+    in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
+    which indicates whether the queue should be stopped or not.
+    
+    It *doesn't* remove the call to netif_wake_queue() from
+    ppp_xmit_process(), because other code paths (especially from
+    ppp_output_wakeup()) need it there and it's messy to push it out to the
+    other callers to do it based on the return value. So we leave it in
+    place — it's a no-op in the case where the queue wasn't stopped, so it's
+    harmless in the TX path.
+    
+    Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+    Signed-off-by: David S. Miller <davem@davemloft.net>
 
-Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
 
+
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index 159da29..21d7151 100644
 --- a/drivers/net/ppp/ppp_generic.c
 +++ b/drivers/net/ppp/ppp_generic.c
-@@ -235,7 +235,7 @@ struct ppp_net {
- /* Prototypes. */
- static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
- 			struct file *file, unsigned int cmd, unsigned long arg);
--static void ppp_xmit_process(struct ppp *ppp);
-+static int ppp_xmit_process(struct ppp *ppp);
- static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
- static void ppp_push(struct ppp *ppp);
- static void ppp_channel_push(struct channel *pch);
-@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
+@@ -968,7 +968,6 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
  	proto = npindex_to_proto[npi];
  	put_unaligned_be16(proto, pp);
  
 -	netif_stop_queue(dev);
  	skb_queue_tail(&ppp->file.xq, skb);
--	ppp_xmit_process(ppp);
-+	if (!ppp_xmit_process(ppp))
-+		netif_stop_queue(dev);
+ 	ppp_xmit_process(ppp);
  	return NETDEV_TX_OK;
- 
-  outf:
-@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
-  * Called to do any work queued up on the transmit side
-  * that can now be done.
-  */
--static void
-+static int
- ppp_xmit_process(struct ppp *ppp)
- {
- 	struct sk_buff *skb;
-+	int ret = 0;
- 
- 	ppp_xmit_lock(ppp);
- 	if (!ppp->closing) {
-@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
- 			ppp_send_frame(ppp, skb);
- 		/* If there's no work left to do, tell the core net
+@@ -1063,6 +1062,8 @@ ppp_xmit_process(struct ppp *ppp)
  		   code that we can accept some more. */
--		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
-+		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
+ 		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
  			netif_wake_queue(ppp->dev);
-+			ret = 1;
-+		}
++		else
++			netif_stop_queue(ppp->dev);
  	}
  	ppp_xmit_unlock(ppp);
-+	return ret;
  }
- 
- static inline struct sk_buff *
diff --git a/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch b/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch
index 84eb4e41ba..165801cac6 100644
--- a/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch
+++ b/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch
@@ -1,75 +1,77 @@
-For every transmitted packet, ppp_start_xmit() will stop the netdev
-queue and then, if appropriate, restart it. This causes the TX softirq
-to run, entirely gratuitously.
+commit 9a5d2bd99e0dfe9a31b3c160073ac445ba3d773f
+Author: David Woodhouse <dwmw2@infradead.org>
+Date:   Sun Apr 8 10:01:44 2012 +0000
 
-This is "only" a waste of CPU time in the normal case, but it's actively
-harmful when the PPP device is a TEQL slave — the wakeup will cause the
-offending device to receive the next TX packet from the TEQL queue, when
-it *should* have gone to the next slave in the list. We end up seeing
-large bursts of packets on just *one* slave device, rather than using
-the full available bandwidth over all slaves.
+    ppp: Fix race condition with queue start/stop
+    
+    Commit e675f0cc9a872fd152edc0c77acfed19bf28b81e ("ppp: Don't stop and
+    restart queue on every TX packet") introduced a race condition which
+    could leave the net queue stopped even when the channel is no longer
+    busy. By calling netif_stop_queue() from ppp_start_xmit(), based on the
+    return value from ppp_xmit_process() but *after* all the locks have been
+    dropped, we could potentially do so *after* the channel has actually
+    finished transmitting and attempted to re-wake the queue.
+    
+    Fix this by moving the netif_stop_queue() into ppp_xmit_process() under
+    the xmit lock. I hadn't done this previously, because it gets called
+    from other places than ppp_start_xmit(). But I now think it's the better
+    option. The net queue *should* be stopped if the channel becomes
+    congested due to writes from pppd, anyway.
+    
+    Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+    Signed-off-by: David S. Miller <davem@davemloft.net>
 
-This patch fixes the problem by *not* unconditionally stopping the queue
-in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
-which indicates whether the queue should be stopped or not.
+commit e675f0cc9a872fd152edc0c77acfed19bf28b81e
+Author: David Woodhouse <dwmw2@infradead.org>
+Date:   Mon Mar 26 00:03:42 2012 +0000
 
-It *doesn't* remove the call to netif_wake_queue() from
-ppp_xmit_process(), because other code paths (especially from
-ppp_output_wakeup()) need it there and it's messy to push it out to the
-other callers to do it based on the return value. So we leave it in
-place — it's a no-op in the case where the queue wasn't stopped, so it's
-harmless in the TX path.
+    ppp: Don't stop and restart queue on every TX packet
+    
+    For every transmitted packet, ppp_start_xmit() will stop the netdev
+    queue and then, if appropriate, restart it. This causes the TX softirq
+    to run, entirely gratuitously.
+    
+    This is "only" a waste of CPU time in the normal case, but it's actively
+    harmful when the PPP device is a TEQL slave — the wakeup will cause the
+    offending device to receive the next TX packet from the TEQL queue, when
+    it *should* have gone to the next slave in the list. We end up seeing
+    large bursts of packets on just *one* slave device, rather than using
+    the full available bandwidth over all slaves.
+    
+    This patch fixes the problem by *not* unconditionally stopping the queue
+    in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
+    which indicates whether the queue should be stopped or not.
+    
+    It *doesn't* remove the call to netif_wake_queue() from
+    ppp_xmit_process(), because other code paths (especially from
+    ppp_output_wakeup()) need it there and it's messy to push it out to the
+    other callers to do it based on the return value. So we leave it in
+    place — it's a no-op in the case where the queue wasn't stopped, so it's
+    harmless in the TX path.
+    
+    Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+    Signed-off-by: David S. Miller <davem@davemloft.net>
 
-Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
 
+
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index 159da29..21d7151 100644
 --- a/drivers/net/ppp/ppp_generic.c
 +++ b/drivers/net/ppp/ppp_generic.c
-@@ -235,7 +235,7 @@ struct ppp_net {
- /* Prototypes. */
- static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
- 			struct file *file, unsigned int cmd, unsigned long arg);
--static void ppp_xmit_process(struct ppp *ppp);
-+static int ppp_xmit_process(struct ppp *ppp);
- static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
- static void ppp_push(struct ppp *ppp);
- static void ppp_channel_push(struct channel *pch);
-@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
+@@ -968,7 +968,6 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
  	proto = npindex_to_proto[npi];
  	put_unaligned_be16(proto, pp);
  
 -	netif_stop_queue(dev);
  	skb_queue_tail(&ppp->file.xq, skb);
--	ppp_xmit_process(ppp);
-+	if (!ppp_xmit_process(ppp))
-+		netif_stop_queue(dev);
+ 	ppp_xmit_process(ppp);
  	return NETDEV_TX_OK;
- 
-  outf:
-@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
-  * Called to do any work queued up on the transmit side
-  * that can now be done.
-  */
--static void
-+static int
- ppp_xmit_process(struct ppp *ppp)
- {
- 	struct sk_buff *skb;
-+	int ret = 0;
- 
- 	ppp_xmit_lock(ppp);
- 	if (!ppp->closing) {
-@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
- 			ppp_send_frame(ppp, skb);
- 		/* If there's no work left to do, tell the core net
+@@ -1063,6 +1062,8 @@ ppp_xmit_process(struct ppp *ppp)
  		   code that we can accept some more. */
--		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
-+		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
+ 		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
  			netif_wake_queue(ppp->dev);
-+			ret = 1;
-+		}
++		else
++			netif_stop_queue(ppp->dev);
  	}
  	ppp_xmit_unlock(ppp);
-+	return ret;
  }
- 
- static inline struct sk_buff *
author	Jo-Philipp Wich <jow@openwrt.org>	2012-04-16 15:04:44 +0000
committer	Jo-Philipp Wich <jow@openwrt.org>	2012-04-16 15:04:44 +0000
commit	2036c23faa0f6ed2784769d3f4a787dd76a0c520 (patch)
tree	6e0e5f234d8893d2e7c6ed943fe26d597653f46f /target
parent	f638b544f3b2ffe568f581f0225760ef6cc31d92 (diff)
download	upstream-2036c23faa0f6ed2784769d3f4a787dd76a0c520.tar.gz upstream-2036c23faa0f6ed2784769d3f4a787dd76a0c520.tar.bz2 upstream-2036c23faa0f6ed2784769d3f4a787dd76a0c520.zip