aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--package/network/utils/iproute2/patches/950-add-cake-to-tc.patch1391
1 files changed, 1019 insertions, 372 deletions
diff --git a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch
index 882db8af19..c2a9bdef1a 100644
--- a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch
+++ b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch
@@ -1,14 +1,12 @@
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
-@@ -850,4 +850,63 @@ struct tc_pie_xstats {
- __u32 maxq; /* maximum queue size */
- __u32 ecn_mark; /* packets marked with ecn*/
+@@ -852,2 +852,116 @@ enum {
};
-+
+/* CAKE */
+enum {
+ TCA_CAKE_UNSPEC,
-+ TCA_CAKE_BASE_RATE,
++ TCA_CAKE_PAD,
++ TCA_CAKE_BASE_RATE64,
+ TCA_CAKE_DIFFSERV_MODE,
+ TCA_CAKE_ATM,
+ TCA_CAKE_FLOW_MODE,
@@ -18,100 +16,761 @@
+ TCA_CAKE_AUTORATE,
+ TCA_CAKE_MEMORY,
+ TCA_CAKE_NAT,
-+ TCA_CAKE_ETHERNET,
++ TCA_CAKE_RAW, // was _ETHERNET
+ TCA_CAKE_WASH,
+ TCA_CAKE_MPU,
+ TCA_CAKE_INGRESS,
+ TCA_CAKE_ACK_FILTER,
++ TCA_CAKE_SPLIT_GSO,
+ __TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
+
-+struct tc_cake_traffic_stats {
-+ __u32 packets;
-+ __u32 link_ms;
-+ __u64 bytes;
++enum {
++ __TCA_CAKE_STATS_INVALID,
++ TCA_CAKE_STATS_PAD,
++ TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
++ TCA_CAKE_STATS_MEMORY_LIMIT,
++ TCA_CAKE_STATS_MEMORY_USED,
++ TCA_CAKE_STATS_AVG_NETOFF,
++ TCA_CAKE_STATS_MIN_NETLEN,
++ TCA_CAKE_STATS_MAX_NETLEN,
++ TCA_CAKE_STATS_MIN_ADJLEN,
++ TCA_CAKE_STATS_MAX_ADJLEN,
++ TCA_CAKE_STATS_TIN_STATS,
++ TCA_CAKE_STATS_DEFICIT,
++ TCA_CAKE_STATS_COBALT_COUNT,
++ TCA_CAKE_STATS_DROPPING,
++ TCA_CAKE_STATS_DROP_NEXT_US,
++ TCA_CAKE_STATS_P_DROP,
++ TCA_CAKE_STATS_BLUE_TIMER_US,
++ __TCA_CAKE_STATS_MAX
+};
++#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
++enum {
++ __TCA_CAKE_TIN_STATS_INVALID,
++ TCA_CAKE_TIN_STATS_PAD,
++ TCA_CAKE_TIN_STATS_SENT_PACKETS,
++ TCA_CAKE_TIN_STATS_SENT_BYTES64,
++ TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
++ TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
++ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
++ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
++ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
++ TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
++ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
++ TCA_CAKE_TIN_STATS_TARGET_US,
++ TCA_CAKE_TIN_STATS_INTERVAL_US,
++ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
++ TCA_CAKE_TIN_STATS_WAY_MISSES,
++ TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
++ TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
++ TCA_CAKE_TIN_STATS_AVG_DELAY_US,
++ TCA_CAKE_TIN_STATS_BASE_DELAY_US,
++ TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
++ TCA_CAKE_TIN_STATS_BULK_FLOWS,
++ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
++ TCA_CAKE_TIN_STATS_MAX_SKBLEN,
++ TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
++ __TCA_CAKE_TIN_STATS_MAX
++};
++#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
-+struct tc_cake_xstats {
-+ __u16 version; /* == 5, increments when struct extended */
-+ __u8 max_tins; /* == TC_CAKE_MAX_TINS */
-+ __u8 tin_cnt; /* <= TC_CAKE_MAX_TINS */
-+
-+ __u32 threshold_rate [TC_CAKE_MAX_TINS];
-+ __u32 target_us [TC_CAKE_MAX_TINS];
-+ struct tc_cake_traffic_stats sent [TC_CAKE_MAX_TINS];
-+ struct tc_cake_traffic_stats dropped [TC_CAKE_MAX_TINS];
-+ struct tc_cake_traffic_stats ecn_marked[TC_CAKE_MAX_TINS];
-+ struct tc_cake_traffic_stats backlog [TC_CAKE_MAX_TINS];
-+ __u32 interval_us [TC_CAKE_MAX_TINS];
-+ __u32 way_indirect_hits[TC_CAKE_MAX_TINS];
-+ __u32 way_misses [TC_CAKE_MAX_TINS];
-+ __u32 way_collisions [TC_CAKE_MAX_TINS];
-+ __u32 peak_delay_us [TC_CAKE_MAX_TINS]; /* ~= bulk flow delay */
-+ __u32 avge_delay_us [TC_CAKE_MAX_TINS];
-+ __u32 base_delay_us [TC_CAKE_MAX_TINS]; /* ~= sparse flows delay */
-+ __u16 sparse_flows [TC_CAKE_MAX_TINS];
-+ __u16 bulk_flows [TC_CAKE_MAX_TINS];
-+ __u16 unresponse_flows [TC_CAKE_MAX_TINS]; /* v4 - was u32 last_len */
-+ __u16 spare [TC_CAKE_MAX_TINS]; /* v4 - split last_len */
-+ __u32 max_skblen [TC_CAKE_MAX_TINS];
-+ __u32 capacity_estimate; /* version 2 */
-+ __u32 memory_limit; /* version 3 */
-+ __u32 memory_used; /* version 3 */
-+ struct tc_cake_traffic_stats ack_drops [TC_CAKE_MAX_TINS]; /* v5 */
++
++enum {
++ CAKE_FLOW_NONE = 0,
++ CAKE_FLOW_SRC_IP,
++ CAKE_FLOW_DST_IP,
++ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
++ CAKE_FLOW_FLOWS,
++ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
++ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
++ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */
++ CAKE_FLOW_MAX,
+};
+
++enum {
++ CAKE_DIFFSERV_DIFFSERV3 = 0,
++ CAKE_DIFFSERV_DIFFSERV4,
++ CAKE_DIFFSERV_DIFFSERV8,
++ CAKE_DIFFSERV_BESTEFFORT,
++ CAKE_DIFFSERV_PRECEDENCE,
++ CAKE_DIFFSERV_MAX
++};
++
++enum {
++ CAKE_ACK_NONE = 0,
++ CAKE_ACK_FILTER,
++ CAKE_ACK_AGGRESSIVE,
++ CAKE_ACK_MAX
++};
++
++enum {
++ CAKE_ATM_NONE = 0,
++ CAKE_ATM_ATM,
++ CAKE_ATM_PTM,
++ CAKE_ATM_MAX
++};
++
++
#endif
+--- /dev/null
++++ b/man/man8/tc-cake.8
+@@ -0,0 +1,632 @@
++.TH CAKE 8 "23 November 2017" "iproute2" "Linux"
++.SH NAME
++CAKE \- Common Applications Kept Enhanced (CAKE)
++.SH SYNOPSIS
++.B tc qdisc ... cake
++.br
++[
++.BR bandwidth
++RATE |
++.BR unlimited*
++|
++.BR autorate_ingress
++]
++.br
++[
++.BR rtt
++TIME |
++.BR datacentre
++|
++.BR lan
++|
++.BR metro
++|
++.BR regional
++|
++.BR internet*
++|
++.BR oceanic
++|
++.BR satellite
++|
++.BR interplanetary
++]
++.br
++[
++.BR besteffort
++|
++.BR diffserv8
++|
++.BR diffserv4
++|
++.BR diffserv3*
++]
++.br
++[
++.BR flowblind
++|
++.BR srchost
++|
++.BR dsthost
++|
++.BR hosts
++|
++.BR flows
++|
++.BR dual-srchost
++|
++.BR dual-dsthost
++|
++.BR triple-isolate*
++]
++.br
++[
++.BR nat
++|
++.BR nonat*
++]
++.br
++[
++.BR wash
++|
++.BR nowash*
++]
++.br
++[
++.BR ack-filter
++|
++.BR ack-filter-aggressive
++|
++.BR no-ack-filter*
++]
++.br
++[
++.BR memlimit
++LIMIT ]
++.br
++[
++.BR ptm
++|
++.BR atm
++|
++.BR noatm*
++]
++.br
++[
++.BR overhead
++N |
++.BR conservative
++|
++.BR raw*
++]
++.br
++[
++.BR mpu
++N ]
++.br
++[
++.BR ingress
++|
++.BR egress*
++]
++.br
++(* marks defaults)
++
++
++.SH DESCRIPTION
++CAKE (Common Applications Kept Enhanced) is a shaping-capable queue discipline
++which uses both AQM and FQ. It combines COBALT, which is an AQM algorithm
++combining Codel and BLUE, a shaper which operates in deficit mode, and a variant
++of DRR++ for flow isolation. 8-way set-associative hashing is used to virtually
++eliminate hash collisions. Priority queuing is available through a simplified
++diffserv implementation. Overhead compensation for various encapsulation
++schemes is tightly integrated.
++
++All settings are optional; the default settings are chosen to be sensible in
++most common deployments. Most people will only need to set the
++.B bandwidth
++parameter to get useful results, but reading the
++.B Overhead Compensation
++and
++.B Round Trip Time
++sections is strongly encouraged.
++
++.SH SHAPER PARAMETERS
++CAKE uses a deficit-mode shaper, which does not exhibit the initial burst
++typical of token-bucket shapers. It will automatically burst precisely as much
++as required to maintain the configured throughput. As such, it is very
++straightforward to configure.
++.PP
++.B unlimited
++(default)
++.br
++ No limit on the bandwidth.
++.PP
++.B bandwidth
++RATE
++.br
++ Set the shaper bandwidth. See
++.BR tc(8)
++or examples below for details of the RATE value.
++.PP
++.B autorate_ingress
++.br
++ Automatic capacity estimation based on traffic arriving at this qdisc.
++This is most likely to be useful with cellular links, which tend to change
++quality randomly. A
++.B bandwidth
++parameter can be used in conjunction to specify an initial estimate. The shaper
++will periodically be set to a bandwidth slightly below the estimated rate. This
++estimator cannot estimate the bandwidth of links downstream of itself.
++
++.SH OVERHEAD COMPENSATION PARAMETERS
++The size of each packet on the wire may differ from that seen by Linux. The
++following parameters allow CAKE to compensate for this difference by internally
++considering each packet to be bigger than Linux informs it. To assist users who
++are not expert network engineers, keywords have been provided to represent a
++number of common link technologies.
++
++.SS Manual Overhead Specification
++.B overhead
++BYTES
++.br
++ Adds BYTES to the size of each packet. BYTES may be negative; values
++between -64 and 256 (inclusive) are accepted.
++.PP
++.B mpu
++BYTES
++.br
++ Rounds each packet (including overhead) up to a minimum length
++BYTES. BYTES may not be negative; values between 0 and 256 (inclusive)
++are accepted.
++.PP
++.B atm
++.br
++ Compensates for ATM cell framing, which is normally found on ADSL links.
++This is performed after the
++.B overhead
++parameter above. ATM uses fixed 53-byte cells, each of which can carry 48 bytes
++payload.
++.PP
++.B ptm
++.br
++ Compensates for PTM encoding, which is normally found on VDSL2 links and
++uses a 64b/65b encoding scheme. It is even more efficient to simply
++derate the specified shaper bandwidth by a factor of 64/65 or 0.984. See
++ITU G.992.3 Annex N and IEEE 802.3 Section 61.3 for details.
++.PP
++.B noatm
++.br
++ Disables ATM and PTM compensation.
++
++.SS Failsafe Overhead Keywords
++These two keywords are provided for quick-and-dirty setup. Use them if you
++can't be bothered to read the rest of this section.
++.PP
++.B raw
++(default)
++.br
++ Turns off all overhead compensation in CAKE. The packet size reported
++by Linux will be used directly.
++.PP
++ Other overhead keywords may be added after "raw". The effect of this is
++to make the overhead compensation operate relative to the reported packet size,
++not the underlying IP packet size.
++.PP
++.B conservative
++.br
++ Compensates for more overhead than is likely to occur on any
++widely-deployed link technology.
++.br
++ Equivalent to
++.B overhead 48 atm.
++
++.SS ADSL Overhead Keywords
++Most ADSL modems have a way to check which framing scheme is in use. Often this
++is also specified in the settings document provided by the ISP. The keywords in
++this section are intended to correspond with these sources of information. All
++of them implicitly set the
++.B atm
++flag.
++.PP
++.B pppoa-vcmux
++.br
++ Equivalent to
++.B overhead 10 atm
++.PP
++.B pppoa-llc
++.br
++ Equivalent to
++.B overhead 14 atm
++.PP
++.B pppoe-vcmux
++.br
++ Equivalent to
++.B overhead 32 atm
++.PP
++.B pppoe-llcsnap
++.br
++ Equivalent to
++.B overhead 40 atm
++.PP
++.B bridged-vcmux
++.br
++ Equivalent to
++.B overhead 24 atm
++.PP
++.B bridged-llcsnap
++.br
++ Equivalent to
++.B overhead 32 atm
++.PP
++.B ipoa-vcmux
++.br
++ Equivalent to
++.B overhead 8 atm
++.PP
++.B ipoa-llcsnap
++.br
++ Equivalent to
++.B overhead 16 atm
++.PP
++See also the Ethernet Correction Factors section below.
++
++.SS VDSL2 Overhead Keywords
++ATM was dropped from VDSL2 in favour of PTM, which is a much more
++straightforward framing scheme. Some ISPs retained PPPoE for compatibility with
++their existing back-end systems.
++.PP
++.B pppoe-ptm
++.br
++ Equivalent to
++.B overhead 30 ptm
++
++.br
++ PPPoE: 2B PPP + 6B PPPoE +
++.br
++ ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence +
++.br
++ PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS)
++.br
++.PP
++.B bridged-ptm
++.br
++ Equivalent to
++.B overhead 22 ptm
++.br
++ ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence +
++.br
++ PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS)
++.br
++.PP
++See also the Ethernet Correction Factors section below.
++
++.SS DOCSIS Cable Overhead Keyword
++DOCSIS is the universal standard for providing Internet service over cable-TV
++infrastructure.
++
++In this case, the actual on-wire overhead is less important than the packet size
++the head-end equipment uses for shaping and metering. This is specified to be
++an Ethernet frame including the CRC (aka FCS).
++.PP
++.B docsis
++.br
++ Equivalent to
++.B overhead 18 mpu 64 noatm
++
++.SS Ethernet Overhead Keywords
++.PP
++.B ethernet
++.br
++ Accounts for Ethernet's preamble, inter-frame gap, and Frame Check
++Sequence. Use this keyword when the bottleneck being shaped for is an
++actual Ethernet cable.
++.br
++ Equivalent to
++.B overhead 38 mpu 84 noatm
++.PP
++.B ether-vlan
++.br
++ Adds 4 bytes to the overhead compensation, accounting for an IEEE 802.1Q
++VLAN header appended to the Ethernet frame header. NB: Some ISPs use one or
++even two of these within PPPoE; this keyword may be repeated as necessary to
++express this.
++
++.SH ROUND TRIP TIME PARAMETERS
++Active Queue Management (AQM) consists of embedding congestion signals in the
++packet flow, which receivers use to instruct senders to slow down when the queue
++is persistently occupied. CAKE uses ECN signalling when available, and packet
++drops otherwise, according to a combination of the Codel and BLUE AQM algorithms
++called COBALT.
++
++Very short latencies require a very rapid AQM response to adequately control
++latency. However, such a rapid response tends to impair throughput when the
++actual RTT is relatively long. CAKE allows specifying the RTT it assumes for
++tuning various parameters. Actual RTTs within an order of magnitude of this
++will generally work well for both throughput and latency management.
++
++At the 'lan' setting and below, the time constants are similar in magnitude to
++the jitter in the Linux kernel itself, so congestion might be signalled
++prematurely. The flows will then become sparse and total throughput reduced,
++leaving little or no back-pressure for the fairness logic to work against. Use
++the "metro" setting for local lans unless you have a custom kernel.
++.PP
++.B rtt
++TIME
++.br
++ Manually specify an RTT.
++.PP
++.B datacentre
++.br
++ For extremely high-performance 10GigE+ networks only. Equivalent to
++.B rtt 100us.
++.PP
++.B lan
++.br
++ For pure Ethernet (not Wi-Fi) networks, at home or in the office. Don't
++use this when shaping for an Internet access link. Equivalent to
++.B rtt 1ms.
++.PP
++.B metro
++.br
++ For traffic mostly within a single city. Equivalent to
++.B rtt 10ms.
++.PP
++.B regional
++.br
++ For traffic mostly within a European-sized country. Equivalent to
++.B rtt 30ms.
++.PP
++.B internet
++(default)
++.br
++ This is suitable for most Internet traffic. Equivalent to
++.B rtt 100ms.
++.PP
++.B oceanic
++.br
++ For Internet traffic with generally above-average latency, such as that
++suffered by Australasian residents. Equivalent to
++.B rtt 300ms.
++.PP
++.B satellite
++.br
++ For traffic via geostationary satellites. Equivalent to
++.B rtt 1000ms.
++.PP
++.B interplanetary
++.br
++ So named because Jupiter is about 1 light-hour from Earth. Use this to
++(almost) completely disable AQM actions. Equivalent to
++.B rtt 1000s.
++
++.SH FLOW ISOLATION PARAMETERS
++With flow isolation enabled, CAKE places packets from different flows into
++different queues, each of which carries its own AQM state. Packets from each
++queue are then delivered fairly, according to a DRR++ algorithm which minimises
++latency for "sparse" flows. CAKE uses a set-associative hashing algorithm to
++minimise flow collisions.
++
++These keywords specify whether fairness based on source address, destination
++address, individual flows, or any combination of those is desired.
++.PP
++.B flowblind
++.br
++ Disables flow isolation; all traffic passes through a single queue for
++each tin.
++.PP
++.B srchost
++.br
++ Flows are defined only by source address. Could be useful on the egress
++path of an ISP backhaul.
++.PP
++.B dsthost
++.br
++ Flows are defined only by destination address. Could be useful on the
++ingress path of an ISP backhaul.
++.PP
++.B hosts
++.br
++ Flows are defined by source-destination host pairs. This is host
++isolation, rather than flow isolation.
++.PP
++.B flows
++.br
++ Flows are defined by the entire 5-tuple of source address, destination
++address, transport protocol, source port and destination port. This is the type
++of flow isolation performed by SFQ and fq_codel.
++.PP
++.B dual-srchost
++.br
++ Flows are defined by the 5-tuple, and fairness is applied first over
++source addresses, then over individual flows. Good for use on egress traffic
++from a LAN to the internet, where it'll prevent any one LAN host from
++monopolising the uplink, regardless of the number of flows they use.
++.PP
++.B dual-dsthost
++.br
++ Flows are defined by the 5-tuple, and fairness is applied first over
++destination addresses, then over individual flows. Good for use on ingress
++traffic to a LAN from the internet, where it'll prevent any one LAN host from
++monopolising the downlink, regardless of the number of flows they use.
++.PP
++.B triple-isolate
++(default)
++.br
++ Flows are defined by the 5-tuple, and fairness is applied over source
++*and* destination addresses intelligently (ie. not merely by host-pairs), and
++also over individual flows. Use this if you're not certain whether to use
++dual-srchost or dual-dsthost; it'll do both jobs at once, preventing any one
++host on *either* side of the link from monopolising it with a large number of
++flows.
++.PP
++.B nat
++.br
++ Instructs Cake to perform a NAT lookup before applying flow-isolation
++rules, to determine the true addresses and port numbers of the packet, to
++improve fairness between hosts "inside" the NAT. This has no practical effect
++in "flowblind" or "flows" modes, or if NAT is performed on a different host.
++.PP
++.B nonat
++(default)
++.br
++ Cake will not perform a NAT lookup. Flow isolation will be performed
++using the addresses and port numbers directly visible to the interface Cake is
++attached to.
++
++.SH PRIORITY QUEUE PARAMETERS
++CAKE can divide traffic into "tins" based on the Diffserv field. Each tin has
++its own independent set of flow-isolation queues, and is serviced based on a WRR
++algorithm. To avoid perverse Diffserv marking incentives, tin weights have a
++"priority sharing" value when bandwidth used by that tin is below a threshold,
++and a lower "bandwidth sharing" value when above. Bandwidth is compared against
++the threshold using the same algorithm as the deficit-mode shaper.
++
++Detailed customisation of tin parameters is not provided. The following presets
++perform all necessary tuning, relative to the current shaper bandwidth and RTT
++settings.
++.PP
++.B besteffort
++.br
++ Disables priority queuing by placing all traffic in one tin.
++.PP
++.B precedence
++.br
++ Enables legacy interpretation of TOS "Precedence" field. Use of this
++preset on the modern Internet is firmly discouraged.
++.PP
++.B diffserv4
++.br
++ Provides a general-purpose Diffserv implementation with four tins:
++.br
++ Bulk (CS1), 6.25% threshold, generally low priority.
++.br
++ Best Effort (general), 100% threshold.
++.br
++ Video (AF4x, AF3x, CS3, AF2x, CS2, TOS4, TOS1), 50% threshold.
++.br
++ Voice (CS7, CS6, EF, VA, CS5, CS4), 25% threshold.
++.PP
++.B diffserv3
++(default)
++.br
++ Provides a simple, general-purpose Diffserv implementation with three tins:
++.br
++ Bulk (CS1), 6.25% threshold, generally low priority.
++.br
++ Best Effort (general), 100% threshold.
++.br
++ Voice (CS7, CS6, EF, VA, TOS4), 25% threshold, reduced Codel interval.
++
++.SH OTHER PARAMETERS
++.B memlimit
++LIMIT
++.br
++ Limit the memory consumed by Cake to LIMIT bytes. Note that this does
++not translate directly to queue size (so do not size this based on bandwidth
++delay product considerations, but rather on worst case acceptable memory
++consumption), as there is some overhead in the data structures containing the
++packets, especially for small packets.
++
++ By default, the limit is calculated based on the bandwidth and RTT
++settings.
++
++.PP
++.B wash
++
++.br
++ Traffic entering your diffserv domain is frequently mis-marked in
++transit from the perspective of your network, and traffic exiting yours may be
++mis-marked from the perspective of the transiting provider.
++
++Apply the wash option to clear all extra diffserv (but not ECN bits), after
++priority queuing has taken place.
++
++If you are shaping inbound, and cannot trust the diffserv markings (as is the
++case for Comcast Cable, among others), it is best to use a single queue
++"besteffort" mode with wash.
++
++.SH EXAMPLES
++# tc qdisc delete root dev eth0
++.br
++# tc qdisc add root dev eth0 cake bandwidth 100Mbit ethernet
++.br
++# tc -s qdisc show dev eth0
++.br
++qdisc cake 1: dev eth0 root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84
++ Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
++ backlog 0b 0p requeues 0
++ memory used: 0b of 5000000b
++ capacity estimate: 100Mbit
++ min/max network layer size: 65535 / 0
++ min/max overhead-adjusted size: 65535 / 0
++ average network hdr offset: 0
++
++ Bulk Best Effort Voice
++ thresh 6250Kbit 100Mbit 25Mbit
++ target 5.0ms 5.0ms 5.0ms
++ interval 100.0ms 100.0ms 100.0ms
++ pk_delay 0us 0us 0us
++ av_delay 0us 0us 0us
++ sp_delay 0us 0us 0us
++ pkts 0 0 0
++ bytes 0 0 0
++ way_inds 0 0 0
++ way_miss 0 0 0
++ way_cols 0 0 0
++ drops 0 0 0
++ marks 0 0 0
++ ack_drop 0 0 0
++ sp_flows 0 0 0
++ bk_flows 0 0 0
++ un_flows 0 0 0
++ max_len 0 0 0
++ quantum 300 1514 762
++
++After some use:
++.br
++# tc -s qdisc show dev eth0
++
++qdisc cake 1: root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84
++ Sent 44709231 bytes 31931 pkt (dropped 45, overlimits 93782 requeues 0)
++ backlog 33308b 22p requeues 0
++ memory used: 292352b of 5000000b
++ capacity estimate: 100Mbit
++ min/max network layer size: 28 / 1500
++ min/max overhead-adjusted size: 84 / 1538
++ average network hdr offset: 14
++
++ Bulk Best Effort Voice
++ thresh 6250Kbit 100Mbit 25Mbit
++ target 5.0ms 5.0ms 5.0ms
++ interval 100.0ms 100.0ms 100.0ms
++ pk_delay 8.7ms 6.9ms 5.0ms
++ av_delay 4.9ms 5.3ms 3.8ms
++ sp_delay 727us 1.4ms 511us
++ pkts 2590 21271 8137
++ bytes 3081804 30302659 11426206
++ way_inds 0 46 0
++ way_miss 3 17 4
++ way_cols 0 0 0
++ drops 20 15 10
++ marks 0 0 0
++ ack_drop 0 0 0
++ sp_flows 2 4 1
++ bk_flows 1 2 1
++ un_flows 0 0 0
++ max_len 1514 1514 1514
++ quantum 300 1514 762
++
++.SH SEE ALSO
++.BR tc (8),
++.BR tc-codel (8),
++.BR tc-fq_codel (8),
++.BR tc-red (8)
++
++.SH AUTHORS
++Cake's principal author is Jonathan Morton, with contributions from
++Tony Ambardar, Kevin Darbyshire-Bryant, Toke Høiland-Jørgensen,
++Sebastian Moeller, Ryan Mounce, Dean Scarff, Nils Andreas Svee, and Dave Täht.
++
++This manual page was written by Loganaden Velvindron. Please report corrections
++to the Linux Networking mailing list <netdev@vger.kernel.org>.
--- a/tc/Makefile
+++ b/tc/Makefile
-@@ -63,6 +63,7 @@ TCMODULES += q_codel.o
+@@ -64,6 +64,7 @@ TCMODULES += em_meta.o
+ TCMODULES += q_mqprio.o
+ TCMODULES += q_codel.o
TCMODULES += q_fq_codel.o
++TCMODULES += q_cake.o
TCMODULES += q_fq.o
TCMODULES += q_pie.o
-+TCMODULES += q_cake.o
TCMODULES += q_hhf.o
- TCMODULES += e_bpf.o
-
--- /dev/null
+++ b/tc/q_cake.c
-@@ -0,0 +1,771 @@
+@@ -0,0 +1,730 @@
++/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Common Applications Kept Enhanced -- CAKE
+ *
-+ * Copyright (C) 2014-2015 Jonathan Morton <chromatix99@gmail.com>
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions, and the following disclaimer,
-+ * without modification.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. The names of the authors may not be used to endorse or promote products
-+ * derived from this software without specific prior written permission.
-+ *
-+ * Alternatively, provided that this notice is retained in full, this
-+ * software may be distributed under the terms of the GNU General
-+ * Public License ("GPL") version 2, in which case the provisions of the
-+ * GPL apply INSTEAD OF those given above.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-+ * DAMAGE.
-+ *
++ * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
++ * Copyright (C) 2017-2018 Toke Høiland-Jørgensen <toke@toke.dk>
+ */
+
+#include <stddef.h>
@@ -128,19 +787,46 @@
+#include "utils.h"
+#include "tc_util.h"
+
++struct cake_preset {
++ char *name;
++ unsigned int target;
++ unsigned int interval;
++};
++
++static struct cake_preset presets[] = {
++ {"datacentre", 5, 100},
++ {"lan", 50, 1000},
++ {"metro", 500, 10000},
++ {"regional", 1500, 30000},
++ {"internet", 5000, 100000},
++ {"oceanic", 15000, 300000},
++ {"satellite", 50000, 1000000},
++ {"interplanetary", 50000000, 1000000000},
++};
++
++
++static struct cake_preset *find_preset(char *argv)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(presets); i++)
++ if (!strcmp(argv, presets[i].name))
++ return &presets[i];
++ return NULL;
++}
++
+static void explain(void)
+{
+ fprintf(stderr,
+"Usage: ... cake [ bandwidth RATE | unlimited* | autorate_ingress ]\n"
+" [ rtt TIME | datacentre | lan | metro | regional |\n"
+" internet* | oceanic | satellite | interplanetary ]\n"
-+" [ besteffort | diffserv8 | diffserv4 | diffserv-llt |\n"
-+" diffserv3* ]\n"
++" [ besteffort | diffserv8 | diffserv4 | diffserv3* ]\n"
+" [ flowblind | srchost | dsthost | hosts | flows |\n"
+" dual-srchost | dual-dsthost | triple-isolate* ]\n"
+" [ nat | nonat* ]\n"
-+" [ wash | nowash * ]\n"
-+" [ ack-filter | ack-filter-aggressive | no-ack-filter * ]\n"
++" [ wash | nowash* ]\n"
++" [ ack-filter | ack-filter-aggressive | no-ack-filter* ]\n"
+" [ memlimit LIMIT ]\n"
+" [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n"
+" [ mpu N ] [ ingress | egress* ]\n"
@@ -148,10 +834,10 @@
+}
+
+static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv,
-+ struct nlmsghdr *n)
++ struct nlmsghdr *n, const char *dev)
+{
+ int unlimited = 0;
-+ unsigned bandwidth = 0;
++ __u64 bandwidth = 0;
+ unsigned interval = 0;
+ unsigned target = 0;
+ unsigned diffserv = 0;
@@ -168,11 +854,12 @@
+ int ingress = -1;
+ int ack_filter = -1;
+ struct rtattr *tail;
++ struct cake_preset *preset, *preset_set = NULL;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "bandwidth") == 0) {
+ NEXT_ARG();
-+ if (get_rate(&bandwidth, *argv)) {
++ if (get_rate64(&bandwidth, *argv)) {
+ fprintf(stderr, "Illegal \"bandwidth\"\n");
+ return -1;
+ }
@@ -194,45 +881,25 @@
+ target = interval / 20;
+ if(!target)
+ target = 1;
-+ } else if (strcmp(*argv, "datacentre") == 0) {
-+ interval = 100;
-+ target = 5;
-+ } else if (strcmp(*argv, "lan") == 0) {
-+ interval = 1000;
-+ target = 50;
-+ } else if (strcmp(*argv, "metro") == 0) {
-+ interval = 10000;
-+ target = 500;
-+ } else if (strcmp(*argv, "regional") == 0) {
-+ interval = 30000;
-+ target = 1500;
-+ } else if (strcmp(*argv, "internet") == 0) {
-+ interval = 100000;
-+ target = 5000;
-+ } else if (strcmp(*argv, "oceanic") == 0) {
-+ interval = 300000;
-+ target = 15000;
-+ } else if (strcmp(*argv, "satellite") == 0) {
-+ interval = 1000000;
-+ target = 50000;
-+ } else if (strcmp(*argv, "interplanetary") == 0) {
-+ interval = 3600000000U;
-+ target = 5000;
++ } else if ((preset = find_preset(*argv))) {
++ if (preset_set)
++ duparg(*argv, preset_set->name);
++ preset_set = preset;
++ target = preset->target;
++ interval = preset->interval;
+
+ } else if (strcmp(*argv, "besteffort") == 0) {
-+ diffserv = 1;
++ diffserv = CAKE_DIFFSERV_BESTEFFORT;
+ } else if (strcmp(*argv, "precedence") == 0) {
-+ diffserv = 2;
++ diffserv = CAKE_DIFFSERV_PRECEDENCE;
+ } else if (strcmp(*argv, "diffserv8") == 0) {
-+ diffserv = 3;
++ diffserv = CAKE_DIFFSERV_DIFFSERV8;
+ } else if (strcmp(*argv, "diffserv4") == 0) {
-+ diffserv = 4;
++ diffserv = CAKE_DIFFSERV_DIFFSERV4;
+ } else if (strcmp(*argv, "diffserv") == 0) {
-+ diffserv = 4;
-+ } else if (strcmp(*argv, "diffserv-llt") == 0) {
-+ diffserv = 5;
++ diffserv = CAKE_DIFFSERV_DIFFSERV4;
+ } else if (strcmp(*argv, "diffserv3") == 0) {
-+ diffserv = 6;
++ diffserv = CAKE_DIFFSERV_DIFFSERV3;
+
+ } else if (strcmp(*argv, "nowash") == 0) {
+ wash = 0;
@@ -240,21 +907,21 @@
+ wash = 1;
+
+ } else if (strcmp(*argv, "flowblind") == 0) {
-+ flowmode = 0;
++ flowmode = CAKE_FLOW_NONE;
+ } else if (strcmp(*argv, "srchost") == 0) {
-+ flowmode = 1;
++ flowmode = CAKE_FLOW_SRC_IP;
+ } else if (strcmp(*argv, "dsthost") == 0) {
-+ flowmode = 2;
++ flowmode = CAKE_FLOW_DST_IP;
+ } else if (strcmp(*argv, "hosts") == 0) {
-+ flowmode = 3;
++ flowmode = CAKE_FLOW_HOSTS;
+ } else if (strcmp(*argv, "flows") == 0) {
-+ flowmode = 4;
++ flowmode = CAKE_FLOW_FLOWS;
+ } else if (strcmp(*argv, "dual-srchost") == 0) {
-+ flowmode = 5;
++ flowmode = CAKE_FLOW_DUAL_SRC;
+ } else if (strcmp(*argv, "dual-dsthost") == 0) {
-+ flowmode = 6;
++ flowmode = CAKE_FLOW_DUAL_DST;
+ } else if (strcmp(*argv, "triple-isolate") == 0) {
-+ flowmode = 7;
++ flowmode = CAKE_FLOW_TRIPLE;
+
+ } else if (strcmp(*argv, "nat") == 0) {
+ nat = 1;
@@ -262,14 +929,14 @@
+ nat = 0;
+
+ } else if (strcmp(*argv, "ptm") == 0) {
-+ atm = 2;
++ atm = CAKE_ATM_PTM;
+ } else if (strcmp(*argv, "atm") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ } else if (strcmp(*argv, "noatm") == 0) {
-+ atm = 0;
++ atm = CAKE_ATM_NONE;
+
+ } else if (strcmp(*argv, "raw") == 0) {
-+ atm = 0;
++ atm = CAKE_ATM_NONE;
+ overhead = 0;
+ overhead_set = true;
+ overhead_override = true;
@@ -279,41 +946,41 @@
+ * one whole ATM cell plus ATM framing.
+ * A safe choice if the actual overhead is unknown.
+ */
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead = 48;
+ overhead_set = true;
+
+ /* Various ADSL framing schemes, all over ATM cells */
+ } else if (strcmp(*argv, "ipoa-vcmux") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 8;
+ overhead_set = true;
+ } else if (strcmp(*argv, "ipoa-llcsnap") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 16;
+ overhead_set = true;
+ } else if (strcmp(*argv, "bridged-vcmux") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 24;
+ overhead_set = true;
+ } else if (strcmp(*argv, "bridged-llcsnap") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 32;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoa-vcmux") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 10;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoa-llc") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 14;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoe-vcmux") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 32;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoe-llcsnap") == 0) {
-+ atm = 1;
++ atm = CAKE_ATM_ATM;
+ overhead += 40;
+ overhead_set = true;
+
@@ -325,7 +992,7 @@
+ * + 1B Start of Frame (S) + 1B End of Frame (Ck)
+ * + 2B TC-CRC (PTM-FCS) = 30B
+ */
-+ atm = 2;
++ atm = CAKE_ATM_PTM;
+ overhead += 30;
+ overhead_set = true;
+ } else if (strcmp(*argv, "bridged-ptm") == 0) {
@@ -334,7 +1001,7 @@
+ * + 1B Start of Frame (S) + 1B End of Frame (Ck)
+ * + 2B TC-CRC (PTM-FCS) = 22B
+ */
-+ atm = 2;
++ atm = CAKE_ATM_PTM;
+ overhead += 22;
+ overhead_set = true;
+
@@ -352,23 +1019,6 @@
+ * active.
+ */
+
-+ } else if (strcmp(*argv, "total_overhead") == 0) {
-+ /*
-+ * This is the overhead cake accounts for; added here so
-+ * that cake's "tc -s qdisc" output can be directly
-+ * pasted into the tc command to instantate a new cake..
-+ */
-+ NEXT_ARG();
-+
-+ } else if (strcmp(*argv, "hard_header_len") == 0) {
-+ /*
-+ * This is the overhead the kernel automatically
-+ * accounted for; added here so that cake's "tc -s
-+ * qdisc" output can be directly pasted into the tc
-+ * command to instantiate a new cake..
-+ */
-+ NEXT_ARG();
-+
+ } else if (strcmp(*argv, "ethernet") == 0) {
+ /* ethernet pre-amble & interframe gap & FCS
+ * you may need to add vlan tag */
@@ -387,7 +1037,7 @@
+ * but not interframe gap or preamble.
+ */
+ } else if (strcmp(*argv, "docsis") == 0) {
-+ atm = 0;
++ atm = CAKE_ATM_NONE;
+ overhead += 18;
+ overhead_set = true;
+ mpu = 64;
@@ -417,11 +1067,11 @@
+ ingress = 0;
+
+ } else if (strcmp(*argv, "no-ack-filter") == 0) {
-+ ack_filter = 0;
++ ack_filter = CAKE_ACK_NONE;
+ } else if (strcmp(*argv, "ack-filter") == 0) {
-+ ack_filter = 0x0200;
++ ack_filter = CAKE_ACK_FILTER;
+ } else if (strcmp(*argv, "ack-filter-aggressive") == 0) {
-+ ack_filter = 0x0600;
++ ack_filter = CAKE_ACK_AGGRESSIVE;
+
+ } else if (strcmp(*argv, "memlimit") == 0) {
+ NEXT_ARG();
@@ -444,7 +1094,7 @@
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+ if (bandwidth || unlimited)
-+ addattr_l(n, 1024, TCA_CAKE_BASE_RATE, &bandwidth, sizeof(bandwidth));
++ addattr_l(n, 1024, TCA_CAKE_BASE_RATE64, &bandwidth, sizeof(bandwidth));
+ if (diffserv)
+ addattr_l(n, 1024, TCA_CAKE_DIFFSERV_MODE, &diffserv, sizeof(diffserv));
+ if (atm != -1)
@@ -455,7 +1105,7 @@
+ addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead));
+ if (overhead_override) {
+ unsigned zero = 0;
-+ addattr_l(n, 1024, TCA_CAKE_ETHERNET, &zero, sizeof(zero));
++ addattr_l(n, 1024, TCA_CAKE_RAW, &zero, sizeof(zero));
+ }
+ if (mpu > 0)
+ addattr_l(n, 1024, TCA_CAKE_MPU, &mpu, sizeof(mpu));
@@ -484,13 +1134,13 @@
+static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct rtattr *tb[TCA_CAKE_MAX + 1];
-+ unsigned bandwidth = 0;
++ __u64 bandwidth = 0;
+ unsigned diffserv = 0;
+ unsigned flowmode = 0;
+ unsigned interval = 0;
+ unsigned memlimit = 0;
+ int overhead = 0;
-+ int ethernet = 0;
++ int raw = 0;
+ int mpu = 0;
+ int atm = 0;
+ int nat = 0;
@@ -498,6 +1148,7 @@
+ int wash = 0;
+ int ingress = 0;
+ int ack_filter = 0;
++ int split_gso = 0;
+ SPRINT_BUF(b1);
+ SPRINT_BUF(b2);
+
@@ -506,87 +1157,89 @@
+
+ parse_rtattr_nested(tb, TCA_CAKE_MAX, opt);
+
-+ if (tb[TCA_CAKE_BASE_RATE] &&
-+ RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE]) >= sizeof(__u32)) {
-+ bandwidth = rta_getattr_u32(tb[TCA_CAKE_BASE_RATE]);
-+ if(bandwidth)
++ if (tb[TCA_CAKE_BASE_RATE64] &&
++ RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE64]) >= sizeof(bandwidth)) {
++ bandwidth = rta_getattr_u64(tb[TCA_CAKE_BASE_RATE64]);
++ if(bandwidth) {
+ fprintf(f, "bandwidth %s ", sprint_rate(bandwidth, b1));
-+ else
++ } else
+ fprintf(f, "unlimited ");
+ }
+ if (tb[TCA_CAKE_AUTORATE] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_AUTORATE]) >= sizeof(__u32)) {
+ autorate = rta_getattr_u32(tb[TCA_CAKE_AUTORATE]);
+ if(autorate == 1)
-+ fprintf(f, "autorate_ingress ");
++ fprintf(f, "ingress");
+ else if(autorate)
-+ fprintf(f, "(?autorate?) ");
++ fprintf(f, "unknown");
+ }
+ if (tb[TCA_CAKE_DIFFSERV_MODE] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_DIFFSERV_MODE]) >= sizeof(__u32)) {
+ diffserv = rta_getattr_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+ switch(diffserv) {
-+ case 1:
-+ fprintf(f, "besteffort ");
++ case CAKE_DIFFSERV_DIFFSERV3:
++ fprintf(f, "diffserv3 ");
+ break;
-+ case 2:
-+ fprintf(f, "precedence ");
++ case CAKE_DIFFSERV_DIFFSERV4:
++ fprintf(f, "diffserv4 ");
+ break;
-+ case 3:
++ case CAKE_DIFFSERV_DIFFSERV8:
+ fprintf(f, "diffserv8 ");
+ break;
-+ case 4:
-+ fprintf(f, "diffserv4 ");
-+ break;
-+ case 5:
-+ fprintf(f, "diffserv-llt ");
++ case CAKE_DIFFSERV_BESTEFFORT:
++ fprintf(f, "besteffort ");
+ break;
-+ case 6:
-+ fprintf(f, "diffserv3 ");
++ case CAKE_DIFFSERV_PRECEDENCE:
++ fprintf(f, "precedence ");
+ break;
+ default:
-+ fprintf(f, "(?diffserv?) ");
++ fprintf(f, "unknown ");
+ break;
+ };
+ }
+ if (tb[TCA_CAKE_FLOW_MODE] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_FLOW_MODE]) >= sizeof(__u32)) {
+ flowmode = rta_getattr_u32(tb[TCA_CAKE_FLOW_MODE]);
-+ nat = !!(flowmode & 64);
-+ flowmode &= ~64;
+ switch(flowmode) {
-+ case 0:
++ case CAKE_FLOW_NONE:
+ fprintf(f, "flowblind ");
+ break;
-+ case 1:
++ case CAKE_FLOW_SRC_IP:
+ fprintf(f, "srchost ");
+ break;
-+ case 2:
++ case CAKE_FLOW_DST_IP:
+ fprintf(f, "dsthost ");
+ break;
-+ case 3:
++ case CAKE_FLOW_HOSTS:
+ fprintf(f, "hosts ");
+ break;
-+ case 4:
++ case CAKE_FLOW_FLOWS:
+ fprintf(f, "flows ");
+ break;
-+ case 5:
++ case CAKE_FLOW_DUAL_SRC:
+ fprintf(f, "dual-srchost ");
+ break;
-+ case 6:
++ case CAKE_FLOW_DUAL_DST:
+ fprintf(f, "dual-dsthost ");
+ break;
-+ case 7:
++ case CAKE_FLOW_TRIPLE:
+ fprintf(f, "triple-isolate ");
+ break;
+ default:
-+ fprintf(f, "(?flowmode?) ");
++ fprintf(f, "unknown ");
+ break;
+ };
+
-+ if(nat)
-+ fprintf(f, "nat ");
+ }
++
++ if (tb[TCA_CAKE_NAT] &&
++ RTA_PAYLOAD(tb[TCA_CAKE_NAT]) >= sizeof(__u32)) {
++ nat = rta_getattr_u32(tb[TCA_CAKE_NAT]);
++ }
++
++ if(nat)
++ fprintf(f, "nat ");
++
+ if (tb[TCA_CAKE_WASH] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) {
+ wash = rta_getattr_u32(tb[TCA_CAKE_WASH]);
@@ -596,8 +1249,8 @@
+ atm = rta_getattr_u32(tb[TCA_CAKE_ATM]);
+ }
+ if (tb[TCA_CAKE_OVERHEAD] &&
-+ RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) {
-+ overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]);
++ RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__s32)) {
++ overhead = *(__s32 *) RTA_DATA(tb[TCA_CAKE_OVERHEAD]);
+ }
+ if (tb[TCA_CAKE_MPU] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_MPU]) >= sizeof(__u32)) {
@@ -611,9 +1264,12 @@
+ RTA_PAYLOAD(tb[TCA_CAKE_ACK_FILTER]) >= sizeof(__u32)) {
+ ack_filter = rta_getattr_u32(tb[TCA_CAKE_ACK_FILTER]);
+ }
-+ if (tb[TCA_CAKE_ETHERNET] &&
-+ RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) {
-+ ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]);
++ if (tb[TCA_CAKE_SPLIT_GSO] &&
++ RTA_PAYLOAD(tb[TCA_CAKE_SPLIT_GSO]) >= sizeof(__u32)) {
++ split_gso = rta_getattr_u32(tb[TCA_CAKE_SPLIT_GSO]);
++ }
++ if (tb[TCA_CAKE_RAW]) {
++ raw = 1;
+ }
+ if (tb[TCA_CAKE_RTT] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) {
@@ -621,50 +1277,42 @@
+ }
+
+ if (wash)
-+ fprintf(f,"wash ");
++ fprintf(f, "wash ");
+
+ if (ingress)
-+ fprintf(f,"ingress ");
++ fprintf(f, "ingress ");
+
-+ if (ack_filter == 0x0600)
-+ fprintf(f,"ack-filter-aggressive ");
-+ else if (ack_filter)
-+ fprintf(f,"ack-filter ");
++ if (ack_filter == CAKE_ACK_AGGRESSIVE)
++ fprintf(f, "ack-filter-aggresssive ");
++ else if (ack_filter == CAKE_ACK_FILTER)
++ fprintf(f, "ack-filter ");
++ else
++ fprintf(f, "no-ack-filter ");
++
++ if (split_gso)
++ fprintf(f, "split-gso ");
+
+ if (interval)
+ fprintf(f, "rtt %s ", sprint_time(interval, b2));
+
-+ if (!atm && overhead == ethernet) {
++ if (raw)
+ fprintf(f, "raw ");
-+ } else {
-+ if (atm == 1)
-+ fprintf(f, "atm ");
-+ else if (atm == 2)
-+ fprintf(f, "ptm ");
-+ else
-+ fprintf(f, "noatm ");
-+
-+ fprintf(f, "overhead %d ", overhead);
-+
-+ /* This is actually the *amount* of automatic compensation, but
-+ * we only report its presence as a boolean for now.
-+ */
-+ if (ethernet)
-+ fprintf(f, "via-ethernet ");
-+ }
+
-+ /* unconditionally report the overhead and hard_header_len overhead the
-+ * kernel added automatically
-+ */
-+ fprintf(f, "total_overhead %d ", overhead);
-+ fprintf(f, "hard_header_len %d ", ethernet);
++ if (atm == CAKE_ATM_ATM)
++ fprintf(f, "atm ");
++ else if (atm == CAKE_ATM_PTM)
++ fprintf(f, "ptm ");
++ else if (!raw)
++ fprintf(f, "noatm ");
+
-+ if (mpu) {
-+ fprintf(f, "mpu %d ", mpu);
-+ }
++ fprintf(f, "overhead %d ", overhead);
+
-+ if (memlimit)
++ if (mpu)
++ fprintf(f, "mpu %u ", mpu);
++
++ if (memlimit) {
+ fprintf(f, "memlimit %s", sprint_size(memlimit, b1));
++ }
+
+ return 0;
+}
@@ -672,58 +1320,108 @@
+static int cake_print_xstats(struct qdisc_util *qu, FILE *f,
+ struct rtattr *xstats)
+{
-+ /* fq_codel stats format borrowed */
-+ struct tc_fq_codel_xstats *st;
-+ struct tc_cake_xstats *stnc;
+ SPRINT_BUF(b1);
-+ SPRINT_BUF(b2);
++ struct rtattr *st[TCA_CAKE_STATS_MAX + 1];
++ int i;
+
+ if (xstats == NULL)
+ return 0;
+
-+ if (RTA_PAYLOAD(xstats) < sizeof(st->type))
-+ return -1;
-+
-+ st = RTA_DATA(xstats);
-+ stnc = RTA_DATA(xstats);
-+
-+ if (st->type == TCA_FQ_CODEL_XSTATS_QDISC && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
-+ fprintf(f, " maxpacket %u drop_overlimit %u new_flow_count %u ecn_mark %u",
-+ st->qdisc_stats.maxpacket,
-+ st->qdisc_stats.drop_overlimit,
-+ st->qdisc_stats.new_flow_count,
-+ st->qdisc_stats.ecn_mark);
-+ fprintf(f, "\n new_flows_len %u old_flows_len %u",
-+ st->qdisc_stats.new_flows_len,
-+ st->qdisc_stats.old_flows_len);
-+ } else if (st->type == TCA_FQ_CODEL_XSTATS_CLASS && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
-+ fprintf(f, " deficit %d count %u lastcount %u ldelay %s",
-+ st->class_stats.deficit,
-+ st->class_stats.count,
-+ st->class_stats.lastcount,
-+ sprint_time(st->class_stats.ldelay, b1));
-+ if (st->class_stats.dropping) {
-+ fprintf(f, " dropping");
-+ if (st->class_stats.drop_next < 0)
++#define GET_STAT_U32(attr) rta_getattr_u32(st[TCA_CAKE_STATS_ ## attr])
++#define GET_STAT_S32(attr) (*(__s32*)RTA_DATA(st[TCA_CAKE_STATS_ ## attr]))
++#define GET_STAT_U64(attr) rta_getattr_u64(st[TCA_CAKE_STATS_ ## attr])
++
++ parse_rtattr_nested(st, TCA_CAKE_STATS_MAX, xstats);
++
++ if (st[TCA_CAKE_STATS_MEMORY_USED] &&
++ st[TCA_CAKE_STATS_MEMORY_LIMIT]) {
++ fprintf(f, " memory used: %s",
++ sprint_size(GET_STAT_U32(MEMORY_USED), b1));
++
++ fprintf(f, " of %s\n",
++ sprint_size(GET_STAT_U32(MEMORY_LIMIT), b1));
++ }
++
++ if (st[TCA_CAKE_STATS_CAPACITY_ESTIMATE64]) {
++ fprintf(f, " capacity estimate: %s\n",
++ sprint_rate(GET_STAT_U64(CAPACITY_ESTIMATE64), b1));
++ }
++
++ if (st[TCA_CAKE_STATS_MIN_NETLEN] &&
++ st[TCA_CAKE_STATS_MAX_NETLEN]) {
++ fprintf(f, " min/max network layer size: %8u",
++ GET_STAT_U32(MIN_NETLEN));
++ fprintf(f, " /%8u\n", GET_STAT_U32(MAX_NETLEN));
++ }
++
++ if (st[TCA_CAKE_STATS_MIN_ADJLEN] &&
++ st[TCA_CAKE_STATS_MAX_ADJLEN]) {
++ fprintf(f, " min/max overhead-adjusted size: %8u",
++ GET_STAT_U32(MIN_ADJLEN));
++ fprintf(f, " /%8u\n", GET_STAT_U32(MAX_ADJLEN));
++ }
++
++ if (st[TCA_CAKE_STATS_AVG_NETOFF])
++ fprintf(f, " average network hdr offset: %8u\n\n",
++ GET_STAT_U32(AVG_NETOFF));
++
++ /* class stats */
++ if (st[TCA_CAKE_STATS_DEFICIT])
++ fprintf(f, "deficit %u",
++ GET_STAT_S32(DEFICIT));
++ if (st[TCA_CAKE_STATS_COBALT_COUNT])
++ fprintf(f, "count %u",
++ GET_STAT_U32(COBALT_COUNT));
++
++ if (st[TCA_CAKE_STATS_DROPPING] && GET_STAT_U32(DROPPING)) {
++ fprintf(f, " dropping");
++ if (st[TCA_CAKE_STATS_DROP_NEXT_US]) {
++ int drop_next = GET_STAT_S32(DROP_NEXT_US);
++ if (drop_next < 0) {
+ fprintf(f, " drop_next -%s",
-+ sprint_time(-st->class_stats.drop_next, b1));
-+ else
++ sprint_time(drop_next, b1));
++ } else {
+ fprintf(f, " drop_next %s",
-+ sprint_time(st->class_stats.drop_next, b1));
++ sprint_time(drop_next, b1));
++ }
++ }
++ }
++
++ if (st[TCA_CAKE_STATS_P_DROP]) {
++ fprintf(f, " blue_prob %u",
++ GET_STAT_U32(P_DROP));
++ if (st[TCA_CAKE_STATS_BLUE_TIMER_US]) {
++ int blue_timer = GET_STAT_S32(BLUE_TIMER_US);
++ if (blue_timer < 0) {
++ fprintf(f, " blue_timer -%s",
++ sprint_time(blue_timer, b1));
++ } else {
++ fprintf(f, " blue_timer %s",
++ sprint_time(blue_timer, b1));
++ }
+ }
-+ } else if (stnc->version >= 1 && stnc->version < 0xFF
-+ && stnc->max_tins == TC_CAKE_MAX_TINS
-+ && RTA_PAYLOAD(xstats) >= offsetof(struct tc_cake_xstats, capacity_estimate))
-+ {
-+ int i;
++ }
++
++#undef GET_STAT_U32
++#undef GET_STAT_S32
++#undef GET_STAT_U64
++
++ if (st[TCA_CAKE_STATS_TIN_STATS]) {
++ struct rtattr *tins[TC_CAKE_MAX_TINS + 1];
++ struct rtattr *tstat[TC_CAKE_MAX_TINS][TCA_CAKE_TIN_STATS_MAX + 1];
++ int num_tins = 0;
+
-+ if(stnc->version >= 3)
-+ fprintf(f, " memory used: %s of %s\n", sprint_size(stnc->memory_used, b1), sprint_size(stnc->memory_limit, b2));
++ parse_rtattr_nested(tins, TC_CAKE_MAX_TINS, st[TCA_CAKE_STATS_TIN_STATS]);
+
-+ if(stnc->version >= 2)
-+ fprintf(f, " capacity estimate: %s\n", sprint_rate(stnc->capacity_estimate, b1));
++ for (i = 1; i <= TC_CAKE_MAX_TINS && tins[i]; i++) {
++ parse_rtattr_nested(tstat[i-1], TCA_CAKE_TIN_STATS_MAX, tins[i]);
++ num_tins++;
++ }
++
++ if (!num_tins)
++ return 0;
+
-+ switch(stnc->tin_cnt) {
++ switch(num_tins) {
+ case 3:
+ fprintf(f, " Bulk Best Effort Voice\n");
+ break;
@@ -732,112 +1430,61 @@
+ fprintf(f, " Bulk Best Effort Video Voice\n");
+ break;
+
-+ case 5:
-+ fprintf(f, " Low Loss Best Effort Low Delay Bulk Net Control\n");
-+ break;
-+
+ default:
+ fprintf(f, " ");
-+ for(i=0; i < stnc->tin_cnt; i++)
++ for(i=0; i < num_tins; i++)
+ fprintf(f, " Tin %u", i);
+ fprintf(f, "\n");
+ };
+
-+ fprintf(f, " thresh ");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12s", sprint_rate(stnc->threshold_rate[i], b1));
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " target ");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12s", sprint_time(stnc->target_us[i], b1));
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " interval");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12s", sprint_time(stnc->interval_us[i], b1));
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " pk_delay");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12s", sprint_time(stnc->peak_delay_us[i], b1));
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " av_delay");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12s", sprint_time(stnc->avge_delay_us[i], b1));
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " sp_delay");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12s", sprint_time(stnc->base_delay_us[i], b1));
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " pkts ");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->sent[i].packets);
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " bytes ");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12llu", stnc->sent[i].bytes);
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " way_inds");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->way_indirect_hits[i]);
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " way_miss");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->way_misses[i]);
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " way_cols");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->way_collisions[i]);
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " drops ");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->dropped[i].packets);
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " marks ");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->ecn_marked[i].packets);
-+ fprintf(f, "\n");
-+
-+ if(stnc->version >= 5) {
-+ fprintf(f, " ack_drop");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->ack_drops[i].packets);
-+ fprintf(f, "\n");
-+ }
-+
-+ fprintf(f, " sp_flows");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->sparse_flows[i]);
-+ fprintf(f, "\n");
-+
-+ fprintf(f, " bk_flows");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->bulk_flows[i]);
-+ fprintf(f, "\n");
-+
-+ if(stnc->version >= 4) {
-+ fprintf(f, " un_flows");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->unresponse_flows[i]);
-+ fprintf(f, "\n");
-+ }
-+
-+ fprintf(f, " max_len ");
-+ for(i=0; i < stnc->tin_cnt; i++)
-+ fprintf(f, " %12u", stnc->max_skblen[i]);
-+ fprintf(f, "\n");
-+ } else {
-+ return -1;
++#define GET_TSTAT(i, attr) (tstat[i][TCA_CAKE_TIN_STATS_ ## attr])
++#define PRINT_TSTAT(name, attr, fmts, val) do { \
++ if (GET_TSTAT(0, attr)) { \
++ fprintf(f, name); \
++ for (i = 0; i < num_tins; i++) \
++ fprintf(f, " %12" fmts, val); \
++ fprintf(f, "\n"); \
++ } \
++ } while (0)
++
++#define SPRINT_TSTAT(pfunc, type, name, attr) PRINT_TSTAT( \
++ name, attr, "s", sprint_ ## pfunc( \
++ rta_getattr_ ## type(GET_TSTAT(i, attr)), b1))
++
++#define PRINT_TSTAT_U32(name, attr) PRINT_TSTAT( \
++ name, attr, "u", rta_getattr_u32(GET_TSTAT(i, attr)))
++
++#define PRINT_TSTAT_U64(name, attr) PRINT_TSTAT( \
++ name, attr, "llu", rta_getattr_u64(GET_TSTAT(i, attr)))
++
++ SPRINT_TSTAT(rate, u64, " thresh ", THRESHOLD_RATE64);
++ SPRINT_TSTAT(time, u32, " target ", TARGET_US);
++ SPRINT_TSTAT(time, u32, " interval", INTERVAL_US);
++ SPRINT_TSTAT(time, u32, " pk_delay", PEAK_DELAY_US);
++ SPRINT_TSTAT(time, u32, " av_delay", AVG_DELAY_US);
++ SPRINT_TSTAT(time, u32, " sp_delay", BASE_DELAY_US);
++ SPRINT_TSTAT(size, u32, " backlog ", BACKLOG_BYTES);
++
++ PRINT_TSTAT_U32(" pkts ", SENT_PACKETS);
++ PRINT_TSTAT_U64(" bytes ", SENT_BYTES64);
++
++ PRINT_TSTAT_U32(" way_inds", WAY_INDIRECT_HITS);
++ PRINT_TSTAT_U32(" way_miss", WAY_MISSES);
++ PRINT_TSTAT_U32(" way_cols", WAY_COLLISIONS);
++ PRINT_TSTAT_U32(" drops ", DROPPED_PACKETS);
++ PRINT_TSTAT_U32(" marks ", ECN_MARKED_PACKETS);
++ PRINT_TSTAT_U32(" ack_drop", ACKS_DROPPED_PACKETS);
++ PRINT_TSTAT_U32(" sp_flows", SPARSE_FLOWS);
++ PRINT_TSTAT_U32(" bk_flows", BULK_FLOWS);
++ PRINT_TSTAT_U32(" un_flows", UNRESPONSIVE_FLOWS);
++ PRINT_TSTAT_U32(" max_len ", MAX_SKBLEN);
++ PRINT_TSTAT_U32(" quantum ", FLOW_QUANTUM);
++
++#undef GET_STAT
++#undef PRINT_TSTAT
++#undef SPRINT_TSTAT
++#undef PRINT_TSTAT_U32
++#undef PRINT_TSTAT_U64
+ }
+ return 0;
+}