1
0
mirror of git://projects.qi-hardware.com/openwrt-xburst.git synced 2025-01-11 23:40:15 +02:00

Update ppp TX restart patch to avoid race condition

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

git-svn-id: svn://svn.openwrt.org/openwrt/trunk@31312 3c298f89-4303-0410-b956-a3cf2f4a3e73
This commit is contained in:
jow 2012-04-16 15:04:44 +00:00
parent 378de966db
commit b497447c4e
2 changed files with 118 additions and 114 deletions

View File

@ -1,75 +1,77 @@
For every transmitted packet, ppp_start_xmit() will stop the netdev commit 9a5d2bd99e0dfe9a31b3c160073ac445ba3d773f
queue and then, if appropriate, restart it. This causes the TX softirq Author: David Woodhouse <dwmw2@infradead.org>
to run, entirely gratuitously. Date: Sun Apr 8 10:01:44 2012 +0000
This is "only" a waste of CPU time in the normal case, but it's actively ppp: Fix race condition with queue start/stop
harmful when the PPP device is a TEQL slave — the wakeup will cause the
offending device to receive the next TX packet from the TEQL queue, when Commit e675f0cc9a872fd152edc0c77acfed19bf28b81e ("ppp: Don't stop and
it *should* have gone to the next slave in the list. We end up seeing restart queue on every TX packet") introduced a race condition which
large bursts of packets on just *one* slave device, rather than using could leave the net queue stopped even when the channel is no longer
the full available bandwidth over all slaves. busy. By calling netif_stop_queue() from ppp_start_xmit(), based on the
return value from ppp_xmit_process() but *after* all the locks have been
dropped, we could potentially do so *after* the channel has actually
finished transmitting and attempted to re-wake the queue.
Fix this by moving the netif_stop_queue() into ppp_xmit_process() under
the xmit lock. I hadn't done this previously, because it gets called
from other places than ppp_start_xmit(). But I now think it's the better
option. The net queue *should* be stopped if the channel becomes
congested due to writes from pppd, anyway.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This patch fixes the problem by *not* unconditionally stopping the queue commit e675f0cc9a872fd152edc0c77acfed19bf28b81e
in ppp_start_xmit(). It adds a return value from ppp_xmit_process() Author: David Woodhouse <dwmw2@infradead.org>
which indicates whether the queue should be stopped or not. Date: Mon Mar 26 00:03:42 2012 +0000
It *doesn't* remove the call to netif_wake_queue() from ppp: Don't stop and restart queue on every TX packet
ppp_xmit_process(), because other code paths (especially from
ppp_output_wakeup()) need it there and it's messy to push it out to the For every transmitted packet, ppp_start_xmit() will stop the netdev
other callers to do it based on the return value. So we leave it in queue and then, if appropriate, restart it. This causes the TX softirq
place — it's a no-op in the case where the queue wasn't stopped, so it's to run, entirely gratuitously.
harmless in the TX path.
This is "only" a waste of CPU time in the normal case, but it's actively
harmful when the PPP device is a TEQL slave — the wakeup will cause the
offending device to receive the next TX packet from the TEQL queue, when
it *should* have gone to the next slave in the list. We end up seeing
large bursts of packets on just *one* slave device, rather than using
the full available bandwidth over all slaves.
This patch fixes the problem by *not* unconditionally stopping the queue
in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
which indicates whether the queue should be stopped or not.
It *doesn't* remove the call to netif_wake_queue() from
ppp_xmit_process(), because other code paths (especially from
ppp_output_wakeup()) need it there and it's messy to push it out to the
other callers to do it based on the return value. So we leave it in
place — it's a no-op in the case where the queue wasn't stopped, so it's
harmless in the TX path.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 159da29..21d7151 100644
--- a/drivers/net/ppp/ppp_generic.c --- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c
@@ -235,7 +235,7 @@ struct ppp_net { @@ -968,7 +968,6 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* Prototypes. */
static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
struct file *file, unsigned int cmd, unsigned long arg);
-static void ppp_xmit_process(struct ppp *ppp);
+static int ppp_xmit_process(struct ppp *ppp);
static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
static void ppp_push(struct ppp *ppp);
static void ppp_channel_push(struct channel *pch);
@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
proto = npindex_to_proto[npi]; proto = npindex_to_proto[npi];
put_unaligned_be16(proto, pp); put_unaligned_be16(proto, pp);
- netif_stop_queue(dev); - netif_stop_queue(dev);
skb_queue_tail(&ppp->file.xq, skb); skb_queue_tail(&ppp->file.xq, skb);
- ppp_xmit_process(ppp); ppp_xmit_process(ppp);
+ if (!ppp_xmit_process(ppp))
+ netif_stop_queue(dev);
return NETDEV_TX_OK; return NETDEV_TX_OK;
@@ -1063,6 +1062,8 @@ ppp_xmit_process(struct ppp *ppp)
outf:
@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
* Called to do any work queued up on the transmit side
* that can now be done.
*/
-static void
+static int
ppp_xmit_process(struct ppp *ppp)
{
struct sk_buff *skb;
+ int ret = 0;
ppp_xmit_lock(ppp);
if (!ppp->closing) {
@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
ppp_send_frame(ppp, skb);
/* If there's no work left to do, tell the core net
code that we can accept some more. */ code that we can accept some more. */
- if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
+ if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
netif_wake_queue(ppp->dev); netif_wake_queue(ppp->dev);
+ ret = 1; + else
+ } + netif_stop_queue(ppp->dev);
} }
ppp_xmit_unlock(ppp); ppp_xmit_unlock(ppp);
+ return ret;
} }
static inline struct sk_buff *

View File

@ -1,75 +1,77 @@
For every transmitted packet, ppp_start_xmit() will stop the netdev commit 9a5d2bd99e0dfe9a31b3c160073ac445ba3d773f
queue and then, if appropriate, restart it. This causes the TX softirq Author: David Woodhouse <dwmw2@infradead.org>
to run, entirely gratuitously. Date: Sun Apr 8 10:01:44 2012 +0000
This is "only" a waste of CPU time in the normal case, but it's actively ppp: Fix race condition with queue start/stop
harmful when the PPP device is a TEQL slave — the wakeup will cause the
offending device to receive the next TX packet from the TEQL queue, when Commit e675f0cc9a872fd152edc0c77acfed19bf28b81e ("ppp: Don't stop and
it *should* have gone to the next slave in the list. We end up seeing restart queue on every TX packet") introduced a race condition which
large bursts of packets on just *one* slave device, rather than using could leave the net queue stopped even when the channel is no longer
the full available bandwidth over all slaves. busy. By calling netif_stop_queue() from ppp_start_xmit(), based on the
return value from ppp_xmit_process() but *after* all the locks have been
dropped, we could potentially do so *after* the channel has actually
finished transmitting and attempted to re-wake the queue.
Fix this by moving the netif_stop_queue() into ppp_xmit_process() under
the xmit lock. I hadn't done this previously, because it gets called
from other places than ppp_start_xmit(). But I now think it's the better
option. The net queue *should* be stopped if the channel becomes
congested due to writes from pppd, anyway.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This patch fixes the problem by *not* unconditionally stopping the queue commit e675f0cc9a872fd152edc0c77acfed19bf28b81e
in ppp_start_xmit(). It adds a return value from ppp_xmit_process() Author: David Woodhouse <dwmw2@infradead.org>
which indicates whether the queue should be stopped or not. Date: Mon Mar 26 00:03:42 2012 +0000
It *doesn't* remove the call to netif_wake_queue() from ppp: Don't stop and restart queue on every TX packet
ppp_xmit_process(), because other code paths (especially from
ppp_output_wakeup()) need it there and it's messy to push it out to the For every transmitted packet, ppp_start_xmit() will stop the netdev
other callers to do it based on the return value. So we leave it in queue and then, if appropriate, restart it. This causes the TX softirq
place — it's a no-op in the case where the queue wasn't stopped, so it's to run, entirely gratuitously.
harmless in the TX path.
This is "only" a waste of CPU time in the normal case, but it's actively
harmful when the PPP device is a TEQL slave — the wakeup will cause the
offending device to receive the next TX packet from the TEQL queue, when
it *should* have gone to the next slave in the list. We end up seeing
large bursts of packets on just *one* slave device, rather than using
the full available bandwidth over all slaves.
This patch fixes the problem by *not* unconditionally stopping the queue
in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
which indicates whether the queue should be stopped or not.
It *doesn't* remove the call to netif_wake_queue() from
ppp_xmit_process(), because other code paths (especially from
ppp_output_wakeup()) need it there and it's messy to push it out to the
other callers to do it based on the return value. So we leave it in
place — it's a no-op in the case where the queue wasn't stopped, so it's
harmless in the TX path.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 159da29..21d7151 100644
--- a/drivers/net/ppp/ppp_generic.c --- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c
@@ -235,7 +235,7 @@ struct ppp_net { @@ -968,7 +968,6 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* Prototypes. */
static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
struct file *file, unsigned int cmd, unsigned long arg);
-static void ppp_xmit_process(struct ppp *ppp);
+static int ppp_xmit_process(struct ppp *ppp);
static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
static void ppp_push(struct ppp *ppp);
static void ppp_channel_push(struct channel *pch);
@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
proto = npindex_to_proto[npi]; proto = npindex_to_proto[npi];
put_unaligned_be16(proto, pp); put_unaligned_be16(proto, pp);
- netif_stop_queue(dev); - netif_stop_queue(dev);
skb_queue_tail(&ppp->file.xq, skb); skb_queue_tail(&ppp->file.xq, skb);
- ppp_xmit_process(ppp); ppp_xmit_process(ppp);
+ if (!ppp_xmit_process(ppp))
+ netif_stop_queue(dev);
return NETDEV_TX_OK; return NETDEV_TX_OK;
@@ -1063,6 +1062,8 @@ ppp_xmit_process(struct ppp *ppp)
outf:
@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
* Called to do any work queued up on the transmit side
* that can now be done.
*/
-static void
+static int
ppp_xmit_process(struct ppp *ppp)
{
struct sk_buff *skb;
+ int ret = 0;
ppp_xmit_lock(ppp);
if (!ppp->closing) {
@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
ppp_send_frame(ppp, skb);
/* If there's no work left to do, tell the core net
code that we can accept some more. */ code that we can accept some more. */
- if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
+ if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
netif_wake_queue(ppp->dev); netif_wake_queue(ppp->dev);
+ ret = 1; + else
+ } + netif_stop_queue(ppp->dev);
} }
ppp_xmit_unlock(ppp); ppp_xmit_unlock(ppp);
+ return ret;
} }
static inline struct sk_buff *