From 88f8c8d7eb0fa4a45126f8a5631979cd91453c41 Mon Sep 17 00:00:00 2001 From: Kevin Darbyshire-Bryant Date: Wed, 12 Oct 2016 10:40:05 +0100 Subject: [PATCH] iproute2: support latest cake & restore DSCP washing Support new packet overhead passing paradigm in cake qdisc, also restore DSCP wash/nowash keywords. Signed-off-by: Kevin Darbyshire-Bryant --- .../iproute2/patches/950-add-cake-to-tc.patch | 130 ++++++++++-------- 1 file changed, 75 insertions(+), 55 deletions(-) diff --git a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch index ffb3e63e13..d0fc66daaa 100644 --- a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch +++ b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch @@ -1,6 +1,8 @@ +diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h +index 8d2530d..028b83c 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h -@@ -850,4 +850,57 @@ struct tc_pie_xstats { +@@ -850,4 +850,59 @@ struct tc_pie_xstats { __u32 maxq; /* maximum queue size */ __u32 ecn_mark; /* packets marked with ecn*/ }; @@ -18,6 +20,8 @@ + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, ++ TCA_CAKE_ETHERNET, ++ TCA_CAKE_WASH, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) @@ -58,6 +62,8 @@ +}; + #endif +diff --git a/tc/Makefile b/tc/Makefile +index 56acbaa..d421b8e 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -63,6 +63,7 @@ TCMODULES += q_codel.o @@ -68,9 +74,12 @@ TCMODULES += q_hhf.o TCMODULES += e_bpf.o +diff --git a/tc/q_cake.c b/tc/q_cake.c +new file mode 100644 +index 0000000..14e5aab --- /dev/null +++ b/tc/q_cake.c -@@ -0,0 +1,643 @@ +@@ -0,0 +1,654 @@ +/* + * Common Applications Kept Enhanced -- CAKE + * @@ -129,6 +138,7 @@ + " [ besteffort | precedence | diffserv8 | diffserv4* ]\n" + " [ flowblind | srchost | dsthost | hosts | flows* | dual-srchost | dual-dsthost | triple-isolate ] [ nat | nonat* ]\n" + " [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n" ++ " [ wash | nowash* ]\n" + " [ memlimit LIMIT ]\n" + " (* marks defaults)\n"); +} @@ -144,6 +154,8 @@ + unsigned memlimit = 0; + int overhead = 0; + bool overhead_set = false; ++ bool overhead_override = false; ++ int wash = -1; + int flowmode = -1; + int nat = -1; + int atm = -1; @@ -213,6 +225,11 @@ + } else if (strcmp(*argv, "diffserv-llt") == 0) { + diffserv = 5; + ++ } else if (strcmp(*argv, "nowash") == 0) { ++ wash = 0; ++ } else if (strcmp(*argv, "wash") == 0) { ++ wash = 1; ++ + } else if (strcmp(*argv, "flowblind") == 0) { + flowmode = 0; + } else if (strcmp(*argv, "srchost") == 0) { @@ -246,6 +263,7 @@ + atm = 0; + overhead = 0; + overhead_set = true; ++ overhead_override = true; + } else if (strcmp(*argv, "conservative") == 0) { + /* + * Deliberately over-estimate overhead: @@ -256,28 +274,6 @@ + overhead = 48; + overhead_set = true; + -+ /* -+ * DOCSIS overhead figures courtesy of Greg White @ CableLabs. -+ * The "-ip" versions include the Ethernet frame header, in case -+ * you are shaping an IP interface instead of an Ethernet one. -+ */ -+ } else if (strcmp(*argv, "docsis-downstream-ip") == 0) { -+ atm = 0; -+ overhead += 35; -+ overhead_set = true; -+ } else if (strcmp(*argv, "docsis-downstream") == 0) { -+ atm = 0; -+ overhead += 35 - 14; -+ overhead_set = true; -+ } else if (strcmp(*argv, "docsis-upstream-ip") == 0) { -+ atm = 0; -+ overhead += 28; -+ overhead_set = true; -+ } else if (strcmp(*argv, "docsis-upstream") == 0) { -+ atm = 0; -+ overhead += 28 - 14; -+ overhead_set = true; -+ + /* Various ADSL framing schemes, all over ATM cells */ + } else if (strcmp(*argv, "ipoa-vcmux") == 0) { + atm = 1; @@ -325,38 +321,38 @@ + + } else if (strcmp(*argv, "via-ethernet") == 0) { + /* -+ * The above overheads are relative to an IP packet, -+ * but Linux includes Ethernet framing overhead already -+ * if we are shaping an Ethernet interface rather than -+ * an IP interface. ++ * We used to use this flag to manually compensate for ++ * Linux including the Ethernet header on Ethernet-type ++ * interfaces, but not on IP-type interfaces. ++ * ++ * It is no longer needed, because Cake now adjusts for ++ * that automatically, and is thus ignored. ++ * ++ * It would be deleted entirely, but it appears in the ++ * stats output when the automatic compensation is active. + */ -+ overhead -= 14; -+ overhead_set = true; + -+ /* Additional Ethernet-related overheads used by some ISPs */ -+ } else if (strcmp(*argv, "ether-phy") == 0) { -+ /* ethernet pre-amble & interframe gap 20 bytes -+ * Linux will have already accounted for MACs & frame type 14 bytes -+ * you probably want to add an FCS as well*/ -+ overhead += 20; -+ overhead_set = true; -+ } else if (strcmp(*argv, "ether-all") == 0) { ++ } else if (strcmp(*argv, "ethernet") == 0) { + /* ethernet pre-amble & interframe gap & FCS -+ * Linux will have already accounted for MACs & frame type 14 bytes -+ * you may need to add vlan tag*/ -+ overhead += 24; ++ * you may need to add vlan tag */ ++ overhead += 38; + overhead_set = true; + -+ } else if (strcmp(*argv, "ether-fcs") == 0) { -+ /* Frame Check Sequence */ -+ /* we ignore the minimum frame size, because IP packets usually meet it */ -+ overhead += 4; -+ overhead_set = true; ++ /* Additional Ethernet-related overhead used by some ISPs */ + } else if (strcmp(*argv, "ether-vlan") == 0) { + /* 802.1q VLAN tag - may be repeated */ + overhead += 4; + overhead_set = true; + ++ /* ++ * DOCSIS cable shapers account for Ethernet frame with FCS, ++ * but not interframe gap nor preamble. ++ */ ++ } else if (strcmp(*argv, "docsis") == 0) { ++ atm = 0; ++ overhead += 18; ++ overhead_set = true; ++ + } else if (strcmp(*argv, "overhead") == 0) { + char* p = NULL; + NEXT_ARG(); @@ -397,6 +393,10 @@ + addattr_l(n, 1024, TCA_CAKE_FLOW_MODE, &flowmode, sizeof(flowmode)); + if (overhead_set) + addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead)); ++ if (overhead_override) { ++ unsigned zero = 0; ++ addattr_l(n, 1024, TCA_CAKE_ETHERNET, &zero, sizeof(zero)); ++ } + if (interval) + addattr_l(n, 1024, TCA_CAKE_RTT, &interval, sizeof(interval)); + if (target) @@ -407,6 +407,8 @@ + addattr_l(n, 1024, TCA_CAKE_MEMORY, &memlimit, sizeof(memlimit)); + if (nat != -1) + addattr_l(n, 1024, TCA_CAKE_NAT, &nat, sizeof(nat)); ++ if (wash != -1) ++ addattr_l(n, 1024, TCA_CAKE_WASH, &wash, sizeof(wash)); + + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; + return 0; @@ -422,9 +424,11 @@ + unsigned interval = 0; + unsigned memlimit = 0; + int overhead = 0; ++ int ethernet = 0; + int atm = 0; + int nat = 0; + int autorate = 0; ++ int wash = 0; + SPRINT_BUF(b1); + SPRINT_BUF(b2); + @@ -511,6 +515,10 @@ + if(nat) + fprintf(f, "nat "); + } ++ if (tb[TCA_CAKE_WASH] && ++ RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) { ++ wash = rta_getattr_u32(tb[TCA_CAKE_WASH]); ++ } + if (tb[TCA_CAKE_ATM] && + RTA_PAYLOAD(tb[TCA_CAKE_ATM]) >= sizeof(__u32)) { + atm = rta_getattr_u32(tb[TCA_CAKE_ATM]); @@ -519,26 +527,38 @@ + RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) { + overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]); + } ++ if (tb[TCA_CAKE_ETHERNET] && ++ RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) { ++ ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]); ++ } + if (tb[TCA_CAKE_RTT] && + RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) { + interval = rta_getattr_u32(tb[TCA_CAKE_RTT]); + } + ++ if (wash) ++ fprintf(f,"wash "); ++ + if (interval) + fprintf(f, "rtt %s ", sprint_time(interval, b2)); + -+ if (atm == 1) -+ fprintf(f, "atm "); -+ else if (atm == 2) -+ fprintf(f, "ptm "); -+ else if (overhead) -+ fprintf(f, "noatm "); ++ if (!atm && overhead == ethernet) { ++ fprintf(f, "raw "); ++ } else { ++ if (atm == 1) ++ fprintf(f, "atm "); ++ else if (atm == 2) ++ fprintf(f, "ptm "); ++ else ++ fprintf(f, "noatm "); + -+ if (overhead || atm) + fprintf(f, "overhead %d ", overhead); + -+ if (!atm && !overhead) -+ fprintf(f, "raw "); ++ // This is actually the *amount* of automatic compensation, but we only report ++ // its presence as a boolean for now. ++ if (ethernet) ++ fprintf(f, "via-ethernet "); ++ } + + if (memlimit) + fprintf(f, "memlimit %s", sprint_size(memlimit, b1));