From f8cf19c19528a468cc0b9846c0328a94cccdc605 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 May 2012 09:30:50 +0000 Subject: [PATCH] fq_codel: Fair Queue Codel AQM commit 4b549a2ef4bef9965d97cbd992ba67930cd3e0fe upstream. Fair Queue Codel packet scheduler Principles : - Packets are classified (internal classifier or external) on flows. - This is a Stochastic model (as we use a hash, several flows might be hashed on same slot) - Each flow has a CoDel managed queue. - Flows are linked onto two (Round Robin) lists, so that new flows have priority on old ones. - For a given flow, packets are not reordered (CoDel uses a FIFO) - head drops only. - ECN capability is on by default. - Very low memory footprint (64 bytes per flow) tc qdisc ... fq_codel [ limit PACKETS ] [ flows number ] [ target TIME ] [ interval TIME ] [ noecn ] [ quantum BYTES ] defaults : 1024 flows, 10240 packets limit, quantum : device MTU target : 5ms (CoDel default) interval : 100ms (CoDel default) Impressive results on load : class htb 1:1 root leaf 10: prio 0 quantum 1514 rate 200000Kbit ceil 200000Kbit burst 1475b/8 mpu 0b overhead 0b cburst 1475b/8 mpu 0b overhead 0b level 0 Sent 43304920109 bytes 33063109 pkt (dropped 0, overlimits 0 requeues 0) rate 201691Kbit 28595pps backlog 0b 312p requeues 0 lended: 33063109 borrowed: 0 giants: 0 tokens: -912 ctokens: -912 class fq_codel 10:1735 parent 10: (dropped 1292, overlimits 0 requeues 0) backlog 15140b 10p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 7.1ms class fq_codel 10:4524 parent 10: (dropped 1291, overlimits 0 requeues 0) backlog 16654b 11p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 7.1ms class fq_codel 10:4e74 parent 10: (dropped 1290, overlimits 0 requeues 0) backlog 6056b 4p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 6.4ms dropping drop_next 92.0ms class fq_codel 10:628a parent 10: (dropped 1289, overlimits 0 requeues 0) backlog 7570b 5p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 5.4ms dropping drop_next 90.9ms class fq_codel 10:a4b3 parent 10: (dropped 302, overlimits 0 requeues 0) backlog 16654b 11p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 7.1ms class fq_codel 10:c3c2 parent 10: (dropped 1284, overlimits 0 requeues 0) backlog 13626b 9p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 5.9ms class fq_codel 10:d331 parent 10: (dropped 299, overlimits 0 requeues 0) backlog 15140b 10p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 7.0ms class fq_codel 10:d526 parent 10: (dropped 12160, overlimits 0 requeues 0) backlog 35870b 211p requeues 0 deficit 1508 count 12160 lastcount 1 ldelay 15.3ms dropping drop_next 247us class fq_codel 10:e2c6 parent 10: (dropped 1288, overlimits 0 requeues 0) backlog 15140b 10p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 7.1ms class fq_codel 10:eab5 parent 10: (dropped 1285, overlimits 0 requeues 0) backlog 16654b 11p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 5.9ms class fq_codel 10:f220 parent 10: (dropped 1289, overlimits 0 requeues 0) backlog 15140b 10p requeues 0 deficit 1514 count 1 lastcount 1 ldelay 7.1ms qdisc htb 1: root refcnt 6 r2q 10 default 1 direct_packets_stat 0 ver 3.17 Sent 43331086547 bytes 33092812 pkt (dropped 0, overlimits 66063544 requeues 71) rate 201697Kbit 28602pps backlog 0b 260p requeues 71 qdisc fq_codel 10: parent 1:1 limit 10240p flows 65536 target 5.0ms interval 100.0ms ecn Sent 43331086547 bytes 33092812 pkt (dropped 949359, overlimits 0 requeues 0) rate 201697Kbit 28602pps backlog 189352b 260p requeues 0 maxpacket 1514 drop_overlimit 0 new_flow_count 5582 ecn_mark 125593 new_flows_len 0 old_flows_len 11 PING 172.30.42.18 (172.30.42.18) 56(84) bytes of data. 64 bytes from 172.30.42.18: icmp_req=1 ttl=64 time=0.227 ms 64 bytes from 172.30.42.18: icmp_req=2 ttl=64 time=0.165 ms 64 bytes from 172.30.42.18: icmp_req=3 ttl=64 time=0.166 ms 64 bytes from 172.30.42.18: icmp_req=4 ttl=64 time=0.151 ms 64 bytes from 172.30.42.18: icmp_req=5 ttl=64 time=0.164 ms 64 bytes from 172.30.42.18: icmp_req=6 ttl=64 time=0.172 ms 64 bytes from 172.30.42.18: icmp_req=7 ttl=64 time=0.175 ms 64 bytes from 172.30.42.18: icmp_req=8 ttl=64 time=0.183 ms 64 bytes from 172.30.42.18: icmp_req=9 ttl=64 time=0.158 ms 64 bytes from 172.30.42.18: icmp_req=10 ttl=64 time=0.200 ms 10 packets transmitted, 10 received, 0% packet loss, time 8999ms rtt min/avg/max/mdev = 0.151/0.176/0.227/0.022 ms Much better than SFQ because of priority given to new flows, and fast path dirtying less cache lines. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 54 ++++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/sch_fq_codel.c | 624 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 690 insertions(+) create mode 100644 net/sched/sch_fq_codel.c --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -659,4 +659,58 @@ struct tc_codel_xstats { __u32 dropping; /* are we in dropping state ? */ }; +/* FQ_CODEL */ + +enum { + TCA_FQ_CODEL_UNSPEC, + TCA_FQ_CODEL_TARGET, + TCA_FQ_CODEL_LIMIT, + TCA_FQ_CODEL_INTERVAL, + TCA_FQ_CODEL_ECN, + TCA_FQ_CODEL_FLOWS, + TCA_FQ_CODEL_QUANTUM, + __TCA_FQ_CODEL_MAX +}; + +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) + +enum { + TCA_FQ_CODEL_XSTATS_QDISC, + TCA_FQ_CODEL_XSTATS_CLASS, +}; + +struct tc_fq_codel_qd_stats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 drop_overlimit; /* number of time max qdisc + * packet limit was hit + */ + __u32 ecn_mark; /* number of packets we ECN marked + * instead of being dropped + */ + __u32 new_flow_count; /* number of time packets + * created a 'new flow' + */ + __u32 new_flows_len; /* count of flows in new list */ + __u32 old_flows_len; /* count of flows in old list */ +}; + +struct tc_fq_codel_cl_stats { + __s32 deficit; + __u32 ldelay; /* in-queue delay seen by most recently + * dequeued packet + */ + __u32 count; + __u32 lastcount; + __u32 dropping; + __s32 drop_next; +}; + +struct tc_fq_codel_xstats { + __u32 type; + union { + struct tc_fq_codel_qd_stats qdisc_stats; + struct tc_fq_codel_cl_stats class_stats; + }; +}; + #endif --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -261,6 +261,17 @@ config NET_SCH_CODEL If unsure, say N. +config NET_SCH_FQ_CODEL + tristate "Fair Queue Controlled Delay AQM (FQ_CODEL)" + help + Say Y here if you want to use the FQ Controlled Delay (FQ_CODEL) + packet scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_fq_codel. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqpr obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o +obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o --- /dev/null +++ b/net/sched/sch_fq_codel.c @@ -0,0 +1,624 @@ +/* + * Fair Queue CoDel discipline + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2012 Eric Dumazet + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Fair Queue CoDel. + * + * Principles : + * Packets are classified (internal classifier or external) on flows. + * This is a Stochastic model (as we use a hash, several flows + * might be hashed on same slot) + * Each flow has a CoDel managed queue. + * Flows are linked onto two (Round Robin) lists, + * so that new flows have priority on old ones. + * + * For a given flow, packets are not reordered (CoDel uses a FIFO) + * head drops only. + * ECN capability is on by default. + * Low memory footprint (64 bytes per flow) + */ + +struct fq_codel_flow { + struct sk_buff *head; + struct sk_buff *tail; + struct list_head flowchain; + int deficit; + u32 dropped; /* number of drops (or ECN marks) on this flow */ + struct codel_vars cvars; +}; /* please try to keep this structure <= 64 bytes */ + +s
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -2128,6 +2128,17 @@ static struct ssb_sprom bcm63xx_sprom =
 	.boardflags_lo		= 0x2848,
 	.boardflags_hi		= 0x0000,
 };
+
+int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
 #endif
 
 /*
@@ -2328,8 +2339,9 @@ void __init board_prom_init(void)
 	if (!board_get_mac_address(bcm63xx_sprom.il0mac)) {
 		memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN);
 		memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN);
-		if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0)
-			printk(KERN_ERR "failed to register fallback SPROM\n");
+		if (ssb_arch_register_fallback_sprom(
+				&bcm63xx_get_fallback_sprom) < 0)
+			printk(KERN_ERR PFX "failed to register fallback SPROM\n");
 	}
 #endif
 }
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -662,7 +662,6 @@ static int sprom_extract(struct ssb_bus
 static int ssb_pci_sprom_get(struct ssb_bus *bus,
 			     struct ssb_sprom *sprom)
 {
-	const struct ssb_sprom *fallback;
 	int err;
 	u16 *buf;
 
@@ -707,10 +706,14 @@ static int ssb_pci_sprom_get(struct ssb_
 		if (err) {
 			/* All CRC attempts failed.
 			 * Maybe there is no SPROM on the device?
-			 * If we have a fallback, use that. */
-			fallback = ssb_get_fallback_sprom();
-			if (fallback) {
-				memcpy(sprom, fallback, sizeof(*sprom));
+			 * Now we ask the arch code if there is some sprom
+			 * avaliable for this device in some other storage */
+			err = ssb_fill_sprom_with_fallback(bus, sprom);
+			if (err) {
+				ssb_printk(KERN_WARNING PFX "WARNING: Using"
+					   " fallback SPROM failed (err %d)\n",
+					   err);
+			} else {
 				err = 0;
 				goto out_free;
 			}
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 
 
-static const struct ssb_sprom *fallback_sprom;
+static int(*get_fallback_sprom)(struct ssb_bus *dev, struct ssb_sprom *out);
 
 
 static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len,
@@ -145,13 +145,14 @@ out:
 }
 
 /**
- * ssb_arch_set_fallback_sprom - Set a fallback SPROM for use if no SPROM is found.
+ * ssb_arch_register_fallback_sprom - Registers a method providing a fallback
+ * SPROM if no SPROM is found.
  *
- * @sprom: The SPROM data structure to register.
+ * @sprom_callback: The callbcak function.
  *
- * With this function the architecture implementation may register a fallback
- * SPROM data structure. The fallback is only used for PCI based SSB devices,
- * where no valid SPROM can be found in the shadow registers.
+ * With this function the architecture implementation may register a callback
+ * handler which wills the SPROM data structure. The fallback is only used for
+ * PCI based SSB devices, where no valid SPROM can be found in the shadow registers.
  *
  * This function is useful for weird architectures that have a half-assed SSB device
  * hardwired to their PCI bus.
@@ -163,18 +164,21 @@ out:
  *
  * This function is available for architecture code, only. So it is not exported.
  */
-int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom)
+int ssb_arch_register_fallback_sprom(int (*sprom_callback)(struct ssb_bus *bus, struct ssb_sprom *out))
 {
-	if (fallback_sprom)
+	if (get_fallback_sprom)
 		return -EEXIST;
-	fallback_sprom = sprom;
+	get_fallback_sprom = sprom_callback;
 
 	return 0;
 }
 
-const struct ssb_sprom *ssb_get_fallback_sprom(void)
+int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out)
 {
-	return fallback_sprom;
+	if (!get_fallback_sprom)
+		return -ENOENT;
+
+	return get_fallback_sprom(bus, out);
 }
 
 /* http://bcm-v4.sipsolutions.net/802.11/IsSpromAvailable */
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -171,7 +171,7 @@ ssize_t ssb_attr_sprom_store(struct ssb_
 			     const char *buf, size_t count,
 			     int (*sprom_check_crc)(const u16 *sprom, size_t size),
 			     int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom));
-extern const struct ssb_sprom *ssb_get_fallback_sprom(void);
+extern int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out);
 
 
 /* core.c */
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -404,7 +404,9 @@ extern bool ssb_is_sprom_available(struc
 
 /* Set a fallback SPROM.
  * See kdoc at the function definition for complete documentation. */
-extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom);
+extern int ssb_arch_register_fallback_sprom(
+		int (*sprom_callback)(struct ssb_bus *bus,
+		struct ssb_sprom *out));
 
 /* Suspend a SSB bus.
  * Call this from the parent bus suspend routine. */
S; + xstats.class_stats.deficit = flow->deficit; + xstats.class_stats.ldelay = + codel_time_to_us(flow->cvars.ldelay); + xstats.class_stats.count = flow->cvars.count; + xstats.class_stats.lastcount = flow->cvars.lastcount; + xstats.class_stats.dropping = flow->cvars.dropping; + if (flow->cvars.dropping) { + codel_tdiff_t delta = flow->cvars.drop_next - + codel_get_time(); + + xstats.class_stats.drop_next = (delta >= 0) ? + codel_time_to_us(delta) : + -codel_time_to_us(-delta); + } + while (skb) { + qs.qlen++; + skb = skb->next; + } + qs.backlog = q->backlogs[idx]; + qs.drops = flow->dropped; + } + if (gnet_stats_copy_queue(d, &qs) < 0) + return -1; + if (idx < q->flows_cnt) + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); + return 0; +} + +static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct fq_codel_sched_data *q = qdisc_priv(sch); + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->flows_cnt; i++) { + if (list_empty(&q->flows[i].flowchain) || + arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, i + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops fq_codel_class_ops = { + .leaf = fq_codel_leaf, + .get = fq_codel_get, + .put = fq_codel_put, + .tcf_chain = fq_codel_find_tcf, + .bind_tcf = fq_codel_bind, + .unbind_tcf = fq_codel_put, + .dump = fq_codel_dump_class, + .dump_stats = fq_codel_dump_class_stats, + .walk = fq_codel_walk, +}; + +static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { + .cl_ops = &fq_codel_class_ops, + .id = "fq_codel", + .priv_size = sizeof(struct fq_codel_sched_data), + .enqueue = fq_codel_enqueue, + .dequeue = fq_codel_dequeue, + .peek = qdisc_peek_dequeued, + .drop = fq_codel_drop, + .init = fq_codel_init, + .reset = fq_codel_reset, + .destroy = fq_codel_destroy, + .change = fq_codel_change, + .dump = fq_codel_dump, + .dump_stats = fq_codel_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init fq_codel_module_init(void) +{ + return register_qdisc(&fq_codel_qdisc_ops); +} + +static void __exit fq_codel_module_exit(void) +{ + unregister_qdisc(&fq_codel_qdisc_ops); +} + +module_init(fq_codel_module_init) +module_exit(fq_codel_module_exit) +MODULE_AUTHOR("Eric Dumazet"); +MODULE_LICENSE("GPL");