diff options
-rw-r--r-- | package/network/utils/iproute2/patches/950-add-cake-to-tc.patch | 1391 |
1 files changed, 1019 insertions, 372 deletions
diff --git a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch index 882db8af19..c2a9bdef1a 100644 --- a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch +++ b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch @@ -1,14 +1,12 @@ --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h -@@ -850,4 +850,63 @@ struct tc_pie_xstats { - __u32 maxq; /* maximum queue size */ - __u32 ecn_mark; /* packets marked with ecn*/ +@@ -852,2 +852,116 @@ enum { }; -+ +/* CAKE */ +enum { + TCA_CAKE_UNSPEC, -+ TCA_CAKE_BASE_RATE, ++ TCA_CAKE_PAD, ++ TCA_CAKE_BASE_RATE64, + TCA_CAKE_DIFFSERV_MODE, + TCA_CAKE_ATM, + TCA_CAKE_FLOW_MODE, @@ -18,100 +16,761 @@ + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, -+ TCA_CAKE_ETHERNET, ++ TCA_CAKE_RAW, // was _ETHERNET + TCA_CAKE_WASH, + TCA_CAKE_MPU, + TCA_CAKE_INGRESS, + TCA_CAKE_ACK_FILTER, ++ TCA_CAKE_SPLIT_GSO, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) + -+struct tc_cake_traffic_stats { -+ __u32 packets; -+ __u32 link_ms; -+ __u64 bytes; ++enum { ++ __TCA_CAKE_STATS_INVALID, ++ TCA_CAKE_STATS_PAD, ++ TCA_CAKE_STATS_CAPACITY_ESTIMATE64, ++ TCA_CAKE_STATS_MEMORY_LIMIT, ++ TCA_CAKE_STATS_MEMORY_USED, ++ TCA_CAKE_STATS_AVG_NETOFF, ++ TCA_CAKE_STATS_MIN_NETLEN, ++ TCA_CAKE_STATS_MAX_NETLEN, ++ TCA_CAKE_STATS_MIN_ADJLEN, ++ TCA_CAKE_STATS_MAX_ADJLEN, ++ TCA_CAKE_STATS_TIN_STATS, ++ TCA_CAKE_STATS_DEFICIT, ++ TCA_CAKE_STATS_COBALT_COUNT, ++ TCA_CAKE_STATS_DROPPING, ++ TCA_CAKE_STATS_DROP_NEXT_US, ++ TCA_CAKE_STATS_P_DROP, ++ TCA_CAKE_STATS_BLUE_TIMER_US, ++ __TCA_CAKE_STATS_MAX +}; ++#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) + ++enum { ++ __TCA_CAKE_TIN_STATS_INVALID, ++ TCA_CAKE_TIN_STATS_PAD, ++ TCA_CAKE_TIN_STATS_SENT_PACKETS, ++ TCA_CAKE_TIN_STATS_SENT_BYTES64, ++ TCA_CAKE_TIN_STATS_DROPPED_PACKETS, ++ TCA_CAKE_TIN_STATS_DROPPED_BYTES64, ++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, ++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, ++ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, ++ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, ++ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, ++ TCA_CAKE_TIN_STATS_BACKLOG_BYTES, ++ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, ++ TCA_CAKE_TIN_STATS_TARGET_US, ++ TCA_CAKE_TIN_STATS_INTERVAL_US, ++ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, ++ TCA_CAKE_TIN_STATS_WAY_MISSES, ++ TCA_CAKE_TIN_STATS_WAY_COLLISIONS, ++ TCA_CAKE_TIN_STATS_PEAK_DELAY_US, ++ TCA_CAKE_TIN_STATS_AVG_DELAY_US, ++ TCA_CAKE_TIN_STATS_BASE_DELAY_US, ++ TCA_CAKE_TIN_STATS_SPARSE_FLOWS, ++ TCA_CAKE_TIN_STATS_BULK_FLOWS, ++ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, ++ TCA_CAKE_TIN_STATS_MAX_SKBLEN, ++ TCA_CAKE_TIN_STATS_FLOW_QUANTUM, ++ __TCA_CAKE_TIN_STATS_MAX ++}; ++#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) +#define TC_CAKE_MAX_TINS (8) -+struct tc_cake_xstats { -+ __u16 version; /* == 5, increments when struct extended */ -+ __u8 max_tins; /* == TC_CAKE_MAX_TINS */ -+ __u8 tin_cnt; /* <= TC_CAKE_MAX_TINS */ -+ -+ __u32 threshold_rate [TC_CAKE_MAX_TINS]; -+ __u32 target_us [TC_CAKE_MAX_TINS]; -+ struct tc_cake_traffic_stats sent [TC_CAKE_MAX_TINS]; -+ struct tc_cake_traffic_stats dropped [TC_CAKE_MAX_TINS]; -+ struct tc_cake_traffic_stats ecn_marked[TC_CAKE_MAX_TINS]; -+ struct tc_cake_traffic_stats backlog [TC_CAKE_MAX_TINS]; -+ __u32 interval_us [TC_CAKE_MAX_TINS]; -+ __u32 way_indirect_hits[TC_CAKE_MAX_TINS]; -+ __u32 way_misses [TC_CAKE_MAX_TINS]; -+ __u32 way_collisions [TC_CAKE_MAX_TINS]; -+ __u32 peak_delay_us [TC_CAKE_MAX_TINS]; /* ~= bulk flow delay */ -+ __u32 avge_delay_us [TC_CAKE_MAX_TINS]; -+ __u32 base_delay_us [TC_CAKE_MAX_TINS]; /* ~= sparse flows delay */ -+ __u16 sparse_flows [TC_CAKE_MAX_TINS]; -+ __u16 bulk_flows [TC_CAKE_MAX_TINS]; -+ __u16 unresponse_flows [TC_CAKE_MAX_TINS]; /* v4 - was u32 last_len */ -+ __u16 spare [TC_CAKE_MAX_TINS]; /* v4 - split last_len */ -+ __u32 max_skblen [TC_CAKE_MAX_TINS]; -+ __u32 capacity_estimate; /* version 2 */ -+ __u32 memory_limit; /* version 3 */ -+ __u32 memory_used; /* version 3 */ -+ struct tc_cake_traffic_stats ack_drops [TC_CAKE_MAX_TINS]; /* v5 */ ++ ++enum { ++ CAKE_FLOW_NONE = 0, ++ CAKE_FLOW_SRC_IP, ++ CAKE_FLOW_DST_IP, ++ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ ++ CAKE_FLOW_FLOWS, ++ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_MAX, +}; + ++enum { ++ CAKE_DIFFSERV_DIFFSERV3 = 0, ++ CAKE_DIFFSERV_DIFFSERV4, ++ CAKE_DIFFSERV_DIFFSERV8, ++ CAKE_DIFFSERV_BESTEFFORT, ++ CAKE_DIFFSERV_PRECEDENCE, ++ CAKE_DIFFSERV_MAX ++}; ++ ++enum { ++ CAKE_ACK_NONE = 0, ++ CAKE_ACK_FILTER, ++ CAKE_ACK_AGGRESSIVE, ++ CAKE_ACK_MAX ++}; ++ ++enum { ++ CAKE_ATM_NONE = 0, ++ CAKE_ATM_ATM, ++ CAKE_ATM_PTM, ++ CAKE_ATM_MAX ++}; ++ ++ #endif +--- /dev/null ++++ b/man/man8/tc-cake.8 +@@ -0,0 +1,632 @@ ++.TH CAKE 8 "23 November 2017" "iproute2" "Linux" ++.SH NAME ++CAKE \- Common Applications Kept Enhanced (CAKE) ++.SH SYNOPSIS ++.B tc qdisc ... cake ++.br ++[ ++.BR bandwidth ++RATE | ++.BR unlimited* ++| ++.BR autorate_ingress ++] ++.br ++[ ++.BR rtt ++TIME | ++.BR datacentre ++| ++.BR lan ++| ++.BR metro ++| ++.BR regional ++| ++.BR internet* ++| ++.BR oceanic ++| ++.BR satellite ++| ++.BR interplanetary ++] ++.br ++[ ++.BR besteffort ++| ++.BR diffserv8 ++| ++.BR diffserv4 ++| ++.BR diffserv3* ++] ++.br ++[ ++.BR flowblind ++| ++.BR srchost ++| ++.BR dsthost ++| ++.BR hosts ++| ++.BR flows ++| ++.BR dual-srchost ++| ++.BR dual-dsthost ++| ++.BR triple-isolate* ++] ++.br ++[ ++.BR nat ++| ++.BR nonat* ++] ++.br ++[ ++.BR wash ++| ++.BR nowash* ++] ++.br ++[ ++.BR ack-filter ++| ++.BR ack-filter-aggressive ++| ++.BR no-ack-filter* ++] ++.br ++[ ++.BR memlimit ++LIMIT ] ++.br ++[ ++.BR ptm ++| ++.BR atm ++| ++.BR noatm* ++] ++.br ++[ ++.BR overhead ++N | ++.BR conservative ++| ++.BR raw* ++] ++.br ++[ ++.BR mpu ++N ] ++.br ++[ ++.BR ingress ++| ++.BR egress* ++] ++.br ++(* marks defaults) ++ ++ ++.SH DESCRIPTION ++CAKE (Common Applications Kept Enhanced) is a shaping-capable queue discipline ++which uses both AQM and FQ. It combines COBALT, which is an AQM algorithm ++combining Codel and BLUE, a shaper which operates in deficit mode, and a variant ++of DRR++ for flow isolation. 8-way set-associative hashing is used to virtually ++eliminate hash collisions. Priority queuing is available through a simplified ++diffserv implementation. Overhead compensation for various encapsulation ++schemes is tightly integrated. ++ ++All settings are optional; the default settings are chosen to be sensible in ++most common deployments. Most people will only need to set the ++.B bandwidth ++parameter to get useful results, but reading the ++.B Overhead Compensation ++and ++.B Round Trip Time ++sections is strongly encouraged. ++ ++.SH SHAPER PARAMETERS ++CAKE uses a deficit-mode shaper, which does not exhibit the initial burst ++typical of token-bucket shapers. It will automatically burst precisely as much ++as required to maintain the configured throughput. As such, it is very ++straightforward to configure. ++.PP ++.B unlimited ++(default) ++.br ++ No limit on the bandwidth. ++.PP ++.B bandwidth ++RATE ++.br ++ Set the shaper bandwidth. See ++.BR tc(8) ++or examples below for details of the RATE value. ++.PP ++.B autorate_ingress ++.br ++ Automatic capacity estimation based on traffic arriving at this qdisc. ++This is most likely to be useful with cellular links, which tend to change ++quality randomly. A ++.B bandwidth ++parameter can be used in conjunction to specify an initial estimate. The shaper ++will periodically be set to a bandwidth slightly below the estimated rate. This ++estimator cannot estimate the bandwidth of links downstream of itself. ++ ++.SH OVERHEAD COMPENSATION PARAMETERS ++The size of each packet on the wire may differ from that seen by Linux. The ++following parameters allow CAKE to compensate for this difference by internally ++considering each packet to be bigger than Linux informs it. To assist users who ++are not expert network engineers, keywords have been provided to represent a ++number of common link technologies. ++ ++.SS Manual Overhead Specification ++.B overhead ++BYTES ++.br ++ Adds BYTES to the size of each packet. BYTES may be negative; values ++between -64 and 256 (inclusive) are accepted. ++.PP ++.B mpu ++BYTES ++.br ++ Rounds each packet (including overhead) up to a minimum length ++BYTES. BYTES may not be negative; values between 0 and 256 (inclusive) ++are accepted. ++.PP ++.B atm ++.br ++ Compensates for ATM cell framing, which is normally found on ADSL links. ++This is performed after the ++.B overhead ++parameter above. ATM uses fixed 53-byte cells, each of which can carry 48 bytes ++payload. ++.PP ++.B ptm ++.br ++ Compensates for PTM encoding, which is normally found on VDSL2 links and ++uses a 64b/65b encoding scheme. It is even more efficient to simply ++derate the specified shaper bandwidth by a factor of 64/65 or 0.984. See ++ITU G.992.3 Annex N and IEEE 802.3 Section 61.3 for details. ++.PP ++.B noatm ++.br ++ Disables ATM and PTM compensation. ++ ++.SS Failsafe Overhead Keywords ++These two keywords are provided for quick-and-dirty setup. Use them if you ++can't be bothered to read the rest of this section. ++.PP ++.B raw ++(default) ++.br ++ Turns off all overhead compensation in CAKE. The packet size reported ++by Linux will be used directly. ++.PP ++ Other overhead keywords may be added after "raw". The effect of this is ++to make the overhead compensation operate relative to the reported packet size, ++not the underlying IP packet size. ++.PP ++.B conservative ++.br ++ Compensates for more overhead than is likely to occur on any ++widely-deployed link technology. ++.br ++ Equivalent to ++.B overhead 48 atm. ++ ++.SS ADSL Overhead Keywords ++Most ADSL modems have a way to check which framing scheme is in use. Often this ++is also specified in the settings document provided by the ISP. The keywords in ++this section are intended to correspond with these sources of information. All ++of them implicitly set the ++.B atm ++flag. ++.PP ++.B pppoa-vcmux ++.br ++ Equivalent to ++.B overhead 10 atm ++.PP ++.B pppoa-llc ++.br ++ Equivalent to ++.B overhead 14 atm ++.PP ++.B pppoe-vcmux ++.br ++ Equivalent to ++.B overhead 32 atm ++.PP ++.B pppoe-llcsnap ++.br ++ Equivalent to ++.B overhead 40 atm ++.PP ++.B bridged-vcmux ++.br ++ Equivalent to ++.B overhead 24 atm ++.PP ++.B bridged-llcsnap ++.br ++ Equivalent to ++.B overhead 32 atm ++.PP ++.B ipoa-vcmux ++.br ++ Equivalent to ++.B overhead 8 atm ++.PP ++.B ipoa-llcsnap ++.br ++ Equivalent to ++.B overhead 16 atm ++.PP ++See also the Ethernet Correction Factors section below. ++ ++.SS VDSL2 Overhead Keywords ++ATM was dropped from VDSL2 in favour of PTM, which is a much more ++straightforward framing scheme. Some ISPs retained PPPoE for compatibility with ++their existing back-end systems. ++.PP ++.B pppoe-ptm ++.br ++ Equivalent to ++.B overhead 30 ptm ++ ++.br ++ PPPoE: 2B PPP + 6B PPPoE + ++.br ++ ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence + ++.br ++ PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS) ++.br ++.PP ++.B bridged-ptm ++.br ++ Equivalent to ++.B overhead 22 ptm ++.br ++ ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence + ++.br ++ PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS) ++.br ++.PP ++See also the Ethernet Correction Factors section below. ++ ++.SS DOCSIS Cable Overhead Keyword ++DOCSIS is the universal standard for providing Internet service over cable-TV ++infrastructure. ++ ++In this case, the actual on-wire overhead is less important than the packet size ++the head-end equipment uses for shaping and metering. This is specified to be ++an Ethernet frame including the CRC (aka FCS). ++.PP ++.B docsis ++.br ++ Equivalent to ++.B overhead 18 mpu 64 noatm ++ ++.SS Ethernet Overhead Keywords ++.PP ++.B ethernet ++.br ++ Accounts for Ethernet's preamble, inter-frame gap, and Frame Check ++Sequence. Use this keyword when the bottleneck being shaped for is an ++actual Ethernet cable. ++.br ++ Equivalent to ++.B overhead 38 mpu 84 noatm ++.PP ++.B ether-vlan ++.br ++ Adds 4 bytes to the overhead compensation, accounting for an IEEE 802.1Q ++VLAN header appended to the Ethernet frame header. NB: Some ISPs use one or ++even two of these within PPPoE; this keyword may be repeated as necessary to ++express this. ++ ++.SH ROUND TRIP TIME PARAMETERS ++Active Queue Management (AQM) consists of embedding congestion signals in the ++packet flow, which receivers use to instruct senders to slow down when the queue ++is persistently occupied. CAKE uses ECN signalling when available, and packet ++drops otherwise, according to a combination of the Codel and BLUE AQM algorithms ++called COBALT. ++ ++Very short latencies require a very rapid AQM response to adequately control ++latency. However, such a rapid response tends to impair throughput when the ++actual RTT is relatively long. CAKE allows specifying the RTT it assumes for ++tuning various parameters. Actual RTTs within an order of magnitude of this ++will generally work well for both throughput and latency management. ++ ++At the 'lan' setting and below, the time constants are similar in magnitude to ++the jitter in the Linux kernel itself, so congestion might be signalled ++prematurely. The flows will then become sparse and total throughput reduced, ++leaving little or no back-pressure for the fairness logic to work against. Use ++the "metro" setting for local lans unless you have a custom kernel. ++.PP ++.B rtt ++TIME ++.br ++ Manually specify an RTT. ++.PP ++.B datacentre ++.br ++ For extremely high-performance 10GigE+ networks only. Equivalent to ++.B rtt 100us. ++.PP ++.B lan ++.br ++ For pure Ethernet (not Wi-Fi) networks, at home or in the office. Don't ++use this when shaping for an Internet access link. Equivalent to ++.B rtt 1ms. ++.PP ++.B metro ++.br ++ For traffic mostly within a single city. Equivalent to ++.B rtt 10ms. ++.PP ++.B regional ++.br ++ For traffic mostly within a European-sized country. Equivalent to ++.B rtt 30ms. ++.PP ++.B internet ++(default) ++.br ++ This is suitable for most Internet traffic. Equivalent to ++.B rtt 100ms. ++.PP ++.B oceanic ++.br ++ For Internet traffic with generally above-average latency, such as that ++suffered by Australasian residents. Equivalent to ++.B rtt 300ms. ++.PP ++.B satellite ++.br ++ For traffic via geostationary satellites. Equivalent to ++.B rtt 1000ms. ++.PP ++.B interplanetary ++.br ++ So named because Jupiter is about 1 light-hour from Earth. Use this to ++(almost) completely disable AQM actions. Equivalent to ++.B rtt 1000s. ++ ++.SH FLOW ISOLATION PARAMETERS ++With flow isolation enabled, CAKE places packets from different flows into ++different queues, each of which carries its own AQM state. Packets from each ++queue are then delivered fairly, according to a DRR++ algorithm which minimises ++latency for "sparse" flows. CAKE uses a set-associative hashing algorithm to ++minimise flow collisions. ++ ++These keywords specify whether fairness based on source address, destination ++address, individual flows, or any combination of those is desired. ++.PP ++.B flowblind ++.br ++ Disables flow isolation; all traffic passes through a single queue for ++each tin. ++.PP ++.B srchost ++.br ++ Flows are defined only by source address. Could be useful on the egress ++path of an ISP backhaul. ++.PP ++.B dsthost ++.br ++ Flows are defined only by destination address. Could be useful on the ++ingress path of an ISP backhaul. ++.PP ++.B hosts ++.br ++ Flows are defined by source-destination host pairs. This is host ++isolation, rather than flow isolation. ++.PP ++.B flows ++.br ++ Flows are defined by the entire 5-tuple of source address, destination ++address, transport protocol, source port and destination port. This is the type ++of flow isolation performed by SFQ and fq_codel. ++.PP ++.B dual-srchost ++.br ++ Flows are defined by the 5-tuple, and fairness is applied first over ++source addresses, then over individual flows. Good for use on egress traffic ++from a LAN to the internet, where it'll prevent any one LAN host from ++monopolising the uplink, regardless of the number of flows they use. ++.PP ++.B dual-dsthost ++.br ++ Flows are defined by the 5-tuple, and fairness is applied first over ++destination addresses, then over individual flows. Good for use on ingress ++traffic to a LAN from the internet, where it'll prevent any one LAN host from ++monopolising the downlink, regardless of the number of flows they use. ++.PP ++.B triple-isolate ++(default) ++.br ++ Flows are defined by the 5-tuple, and fairness is applied over source ++*and* destination addresses intelligently (ie. not merely by host-pairs), and ++also over individual flows. Use this if you're not certain whether to use ++dual-srchost or dual-dsthost; it'll do both jobs at once, preventing any one ++host on *either* side of the link from monopolising it with a large number of ++flows. ++.PP ++.B nat ++.br ++ Instructs Cake to perform a NAT lookup before applying flow-isolation ++rules, to determine the true addresses and port numbers of the packet, to ++improve fairness between hosts "inside" the NAT. This has no practical effect ++in "flowblind" or "flows" modes, or if NAT is performed on a different host. ++.PP ++.B nonat ++(default) ++.br ++ Cake will not perform a NAT lookup. Flow isolation will be performed ++using the addresses and port numbers directly visible to the interface Cake is ++attached to. ++ ++.SH PRIORITY QUEUE PARAMETERS ++CAKE can divide traffic into "tins" based on the Diffserv field. Each tin has ++its own independent set of flow-isolation queues, and is serviced based on a WRR ++algorithm. To avoid perverse Diffserv marking incentives, tin weights have a ++"priority sharing" value when bandwidth used by that tin is below a threshold, ++and a lower "bandwidth sharing" value when above. Bandwidth is compared against ++the threshold using the same algorithm as the deficit-mode shaper. ++ ++Detailed customisation of tin parameters is not provided. The following presets ++perform all necessary tuning, relative to the current shaper bandwidth and RTT ++settings. ++.PP ++.B besteffort ++.br ++ Disables priority queuing by placing all traffic in one tin. ++.PP ++.B precedence ++.br ++ Enables legacy interpretation of TOS "Precedence" field. Use of this ++preset on the modern Internet is firmly discouraged. ++.PP ++.B diffserv4 ++.br ++ Provides a general-purpose Diffserv implementation with four tins: ++.br ++ Bulk (CS1), 6.25% threshold, generally low priority. ++.br ++ Best Effort (general), 100% threshold. ++.br ++ Video (AF4x, AF3x, CS3, AF2x, CS2, TOS4, TOS1), 50% threshold. ++.br ++ Voice (CS7, CS6, EF, VA, CS5, CS4), 25% threshold. ++.PP ++.B diffserv3 ++(default) ++.br ++ Provides a simple, general-purpose Diffserv implementation with three tins: ++.br ++ Bulk (CS1), 6.25% threshold, generally low priority. ++.br ++ Best Effort (general), 100% threshold. ++.br ++ Voice (CS7, CS6, EF, VA, TOS4), 25% threshold, reduced Codel interval. ++ ++.SH OTHER PARAMETERS ++.B memlimit ++LIMIT ++.br ++ Limit the memory consumed by Cake to LIMIT bytes. Note that this does ++not translate directly to queue size (so do not size this based on bandwidth ++delay product considerations, but rather on worst case acceptable memory ++consumption), as there is some overhead in the data structures containing the ++packets, especially for small packets. ++ ++ By default, the limit is calculated based on the bandwidth and RTT ++settings. ++ ++.PP ++.B wash ++ ++.br ++ Traffic entering your diffserv domain is frequently mis-marked in ++transit from the perspective of your network, and traffic exiting yours may be ++mis-marked from the perspective of the transiting provider. ++ ++Apply the wash option to clear all extra diffserv (but not ECN bits), after ++priority queuing has taken place. ++ ++If you are shaping inbound, and cannot trust the diffserv markings (as is the ++case for Comcast Cable, among others), it is best to use a single queue ++"besteffort" mode with wash. ++ ++.SH EXAMPLES ++# tc qdisc delete root dev eth0 ++.br ++# tc qdisc add root dev eth0 cake bandwidth 100Mbit ethernet ++.br ++# tc -s qdisc show dev eth0 ++.br ++qdisc cake 1: dev eth0 root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84 ++ Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) ++ backlog 0b 0p requeues 0 ++ memory used: 0b of 5000000b ++ capacity estimate: 100Mbit ++ min/max network layer size: 65535 / 0 ++ min/max overhead-adjusted size: 65535 / 0 ++ average network hdr offset: 0 ++ ++ Bulk Best Effort Voice ++ thresh 6250Kbit 100Mbit 25Mbit ++ target 5.0ms 5.0ms 5.0ms ++ interval 100.0ms 100.0ms 100.0ms ++ pk_delay 0us 0us 0us ++ av_delay 0us 0us 0us ++ sp_delay 0us 0us 0us ++ pkts 0 0 0 ++ bytes 0 0 0 ++ way_inds 0 0 0 ++ way_miss 0 0 0 ++ way_cols 0 0 0 ++ drops 0 0 0 ++ marks 0 0 0 ++ ack_drop 0 0 0 ++ sp_flows 0 0 0 ++ bk_flows 0 0 0 ++ un_flows 0 0 0 ++ max_len 0 0 0 ++ quantum 300 1514 762 ++ ++After some use: ++.br ++# tc -s qdisc show dev eth0 ++ ++qdisc cake 1: root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84 ++ Sent 44709231 bytes 31931 pkt (dropped 45, overlimits 93782 requeues 0) ++ backlog 33308b 22p requeues 0 ++ memory used: 292352b of 5000000b ++ capacity estimate: 100Mbit ++ min/max network layer size: 28 / 1500 ++ min/max overhead-adjusted size: 84 / 1538 ++ average network hdr offset: 14 ++ ++ Bulk Best Effort Voice ++ thresh 6250Kbit 100Mbit 25Mbit ++ target 5.0ms 5.0ms 5.0ms ++ interval 100.0ms 100.0ms 100.0ms ++ pk_delay 8.7ms 6.9ms 5.0ms ++ av_delay 4.9ms 5.3ms 3.8ms ++ sp_delay 727us 1.4ms 511us ++ pkts 2590 21271 8137 ++ bytes 3081804 30302659 11426206 ++ way_inds 0 46 0 ++ way_miss 3 17 4 ++ way_cols 0 0 0 ++ drops 20 15 10 ++ marks 0 0 0 ++ ack_drop 0 0 0 ++ sp_flows 2 4 1 ++ bk_flows 1 2 1 ++ un_flows 0 0 0 ++ max_len 1514 1514 1514 ++ quantum 300 1514 762 ++ ++.SH SEE ALSO ++.BR tc (8), ++.BR tc-codel (8), ++.BR tc-fq_codel (8), ++.BR tc-red (8) ++ ++.SH AUTHORS ++Cake's principal author is Jonathan Morton, with contributions from ++Tony Ambardar, Kevin Darbyshire-Bryant, Toke Høiland-Jørgensen, ++Sebastian Moeller, Ryan Mounce, Dean Scarff, Nils Andreas Svee, and Dave Täht. ++ ++This manual page was written by Loganaden Velvindron. Please report corrections ++to the Linux Networking mailing list <netdev@vger.kernel.org>. --- a/tc/Makefile +++ b/tc/Makefile -@@ -63,6 +63,7 @@ TCMODULES += q_codel.o +@@ -64,6 +64,7 @@ TCMODULES += em_meta.o + TCMODULES += q_mqprio.o + TCMODULES += q_codel.o TCMODULES += q_fq_codel.o ++TCMODULES += q_cake.o TCMODULES += q_fq.o TCMODULES += q_pie.o -+TCMODULES += q_cake.o TCMODULES += q_hhf.o - TCMODULES += e_bpf.o - --- /dev/null +++ b/tc/q_cake.c -@@ -0,0 +1,771 @@ +@@ -0,0 +1,730 @@ ++/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Common Applications Kept Enhanced -- CAKE + * -+ * Copyright (C) 2014-2015 Jonathan Morton <chromatix99@gmail.com> -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions, and the following disclaimer, -+ * without modification. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. The names of the authors may not be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * Alternatively, provided that this notice is retained in full, this -+ * software may be distributed under the terms of the GNU General -+ * Public License ("GPL") version 2, in which case the provisions of the -+ * GPL apply INSTEAD OF those given above. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -+ * DAMAGE. -+ * ++ * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com> ++ * Copyright (C) 2017-2018 Toke Høiland-Jørgensen <toke@toke.dk> + */ + +#include <stddef.h> @@ -128,19 +787,46 @@ +#include "utils.h" +#include "tc_util.h" + ++struct cake_preset { ++ char *name; ++ unsigned int target; ++ unsigned int interval; ++}; ++ ++static struct cake_preset presets[] = { ++ {"datacentre", 5, 100}, ++ {"lan", 50, 1000}, ++ {"metro", 500, 10000}, ++ {"regional", 1500, 30000}, ++ {"internet", 5000, 100000}, ++ {"oceanic", 15000, 300000}, ++ {"satellite", 50000, 1000000}, ++ {"interplanetary", 50000000, 1000000000}, ++}; ++ ++ ++static struct cake_preset *find_preset(char *argv) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(presets); i++) ++ if (!strcmp(argv, presets[i].name)) ++ return &presets[i]; ++ return NULL; ++} ++ +static void explain(void) +{ + fprintf(stderr, +"Usage: ... cake [ bandwidth RATE | unlimited* | autorate_ingress ]\n" +" [ rtt TIME | datacentre | lan | metro | regional |\n" +" internet* | oceanic | satellite | interplanetary ]\n" -+" [ besteffort | diffserv8 | diffserv4 | diffserv-llt |\n" -+" diffserv3* ]\n" ++" [ besteffort | diffserv8 | diffserv4 | diffserv3* ]\n" +" [ flowblind | srchost | dsthost | hosts | flows |\n" +" dual-srchost | dual-dsthost | triple-isolate* ]\n" +" [ nat | nonat* ]\n" -+" [ wash | nowash * ]\n" -+" [ ack-filter | ack-filter-aggressive | no-ack-filter * ]\n" ++" [ wash | nowash* ]\n" ++" [ ack-filter | ack-filter-aggressive | no-ack-filter* ]\n" +" [ memlimit LIMIT ]\n" +" [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n" +" [ mpu N ] [ ingress | egress* ]\n" @@ -148,10 +834,10 @@ +} + +static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv, -+ struct nlmsghdr *n) ++ struct nlmsghdr *n, const char *dev) +{ + int unlimited = 0; -+ unsigned bandwidth = 0; ++ __u64 bandwidth = 0; + unsigned interval = 0; + unsigned target = 0; + unsigned diffserv = 0; @@ -168,11 +854,12 @@ + int ingress = -1; + int ack_filter = -1; + struct rtattr *tail; ++ struct cake_preset *preset, *preset_set = NULL; + + while (argc > 0) { + if (strcmp(*argv, "bandwidth") == 0) { + NEXT_ARG(); -+ if (get_rate(&bandwidth, *argv)) { ++ if (get_rate64(&bandwidth, *argv)) { + fprintf(stderr, "Illegal \"bandwidth\"\n"); + return -1; + } @@ -194,45 +881,25 @@ + target = interval / 20; + if(!target) + target = 1; -+ } else if (strcmp(*argv, "datacentre") == 0) { -+ interval = 100; -+ target = 5; -+ } else if (strcmp(*argv, "lan") == 0) { -+ interval = 1000; -+ target = 50; -+ } else if (strcmp(*argv, "metro") == 0) { -+ interval = 10000; -+ target = 500; -+ } else if (strcmp(*argv, "regional") == 0) { -+ interval = 30000; -+ target = 1500; -+ } else if (strcmp(*argv, "internet") == 0) { -+ interval = 100000; -+ target = 5000; -+ } else if (strcmp(*argv, "oceanic") == 0) { -+ interval = 300000; -+ target = 15000; -+ } else if (strcmp(*argv, "satellite") == 0) { -+ interval = 1000000; -+ target = 50000; -+ } else if (strcmp(*argv, "interplanetary") == 0) { -+ interval = 3600000000U; -+ target = 5000; ++ } else if ((preset = find_preset(*argv))) { ++ if (preset_set) ++ duparg(*argv, preset_set->name); ++ preset_set = preset; ++ target = preset->target; ++ interval = preset->interval; + + } else if (strcmp(*argv, "besteffort") == 0) { -+ diffserv = 1; ++ diffserv = CAKE_DIFFSERV_BESTEFFORT; + } else if (strcmp(*argv, "precedence") == 0) { -+ diffserv = 2; ++ diffserv = CAKE_DIFFSERV_PRECEDENCE; + } else if (strcmp(*argv, "diffserv8") == 0) { -+ diffserv = 3; ++ diffserv = CAKE_DIFFSERV_DIFFSERV8; + } else if (strcmp(*argv, "diffserv4") == 0) { -+ diffserv = 4; ++ diffserv = CAKE_DIFFSERV_DIFFSERV4; + } else if (strcmp(*argv, "diffserv") == 0) { -+ diffserv = 4; -+ } else if (strcmp(*argv, "diffserv-llt") == 0) { -+ diffserv = 5; ++ diffserv = CAKE_DIFFSERV_DIFFSERV4; + } else if (strcmp(*argv, "diffserv3") == 0) { -+ diffserv = 6; ++ diffserv = CAKE_DIFFSERV_DIFFSERV3; + + } else if (strcmp(*argv, "nowash") == 0) { + wash = 0; @@ -240,21 +907,21 @@ + wash = 1; + + } else if (strcmp(*argv, "flowblind") == 0) { -+ flowmode = 0; ++ flowmode = CAKE_FLOW_NONE; + } else if (strcmp(*argv, "srchost") == 0) { -+ flowmode = 1; ++ flowmode = CAKE_FLOW_SRC_IP; + } else if (strcmp(*argv, "dsthost") == 0) { -+ flowmode = 2; ++ flowmode = CAKE_FLOW_DST_IP; + } else if (strcmp(*argv, "hosts") == 0) { -+ flowmode = 3; ++ flowmode = CAKE_FLOW_HOSTS; + } else if (strcmp(*argv, "flows") == 0) { -+ flowmode = 4; ++ flowmode = CAKE_FLOW_FLOWS; + } else if (strcmp(*argv, "dual-srchost") == 0) { -+ flowmode = 5; ++ flowmode = CAKE_FLOW_DUAL_SRC; + } else if (strcmp(*argv, "dual-dsthost") == 0) { -+ flowmode = 6; ++ flowmode = CAKE_FLOW_DUAL_DST; + } else if (strcmp(*argv, "triple-isolate") == 0) { -+ flowmode = 7; ++ flowmode = CAKE_FLOW_TRIPLE; + + } else if (strcmp(*argv, "nat") == 0) { + nat = 1; @@ -262,14 +929,14 @@ + nat = 0; + + } else if (strcmp(*argv, "ptm") == 0) { -+ atm = 2; ++ atm = CAKE_ATM_PTM; + } else if (strcmp(*argv, "atm") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + } else if (strcmp(*argv, "noatm") == 0) { -+ atm = 0; ++ atm = CAKE_ATM_NONE; + + } else if (strcmp(*argv, "raw") == 0) { -+ atm = 0; ++ atm = CAKE_ATM_NONE; + overhead = 0; + overhead_set = true; + overhead_override = true; @@ -279,41 +946,41 @@ + * one whole ATM cell plus ATM framing. + * A safe choice if the actual overhead is unknown. + */ -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead = 48; + overhead_set = true; + + /* Various ADSL framing schemes, all over ATM cells */ + } else if (strcmp(*argv, "ipoa-vcmux") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 8; + overhead_set = true; + } else if (strcmp(*argv, "ipoa-llcsnap") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 16; + overhead_set = true; + } else if (strcmp(*argv, "bridged-vcmux") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 24; + overhead_set = true; + } else if (strcmp(*argv, "bridged-llcsnap") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 32; + overhead_set = true; + } else if (strcmp(*argv, "pppoa-vcmux") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 10; + overhead_set = true; + } else if (strcmp(*argv, "pppoa-llc") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 14; + overhead_set = true; + } else if (strcmp(*argv, "pppoe-vcmux") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 32; + overhead_set = true; + } else if (strcmp(*argv, "pppoe-llcsnap") == 0) { -+ atm = 1; ++ atm = CAKE_ATM_ATM; + overhead += 40; + overhead_set = true; + @@ -325,7 +992,7 @@ + * + 1B Start of Frame (S) + 1B End of Frame (Ck) + * + 2B TC-CRC (PTM-FCS) = 30B + */ -+ atm = 2; ++ atm = CAKE_ATM_PTM; + overhead += 30; + overhead_set = true; + } else if (strcmp(*argv, "bridged-ptm") == 0) { @@ -334,7 +1001,7 @@ + * + 1B Start of Frame (S) + 1B End of Frame (Ck) + * + 2B TC-CRC (PTM-FCS) = 22B + */ -+ atm = 2; ++ atm = CAKE_ATM_PTM; + overhead += 22; + overhead_set = true; + @@ -352,23 +1019,6 @@ + * active. + */ + -+ } else if (strcmp(*argv, "total_overhead") == 0) { -+ /* -+ * This is the overhead cake accounts for; added here so -+ * that cake's "tc -s qdisc" output can be directly -+ * pasted into the tc command to instantate a new cake.. -+ */ -+ NEXT_ARG(); -+ -+ } else if (strcmp(*argv, "hard_header_len") == 0) { -+ /* -+ * This is the overhead the kernel automatically -+ * accounted for; added here so that cake's "tc -s -+ * qdisc" output can be directly pasted into the tc -+ * command to instantiate a new cake.. -+ */ -+ NEXT_ARG(); -+ + } else if (strcmp(*argv, "ethernet") == 0) { + /* ethernet pre-amble & interframe gap & FCS + * you may need to add vlan tag */ @@ -387,7 +1037,7 @@ + * but not interframe gap or preamble. + */ + } else if (strcmp(*argv, "docsis") == 0) { -+ atm = 0; ++ atm = CAKE_ATM_NONE; + overhead += 18; + overhead_set = true; + mpu = 64; @@ -417,11 +1067,11 @@ + ingress = 0; + + } else if (strcmp(*argv, "no-ack-filter") == 0) { -+ ack_filter = 0; ++ ack_filter = CAKE_ACK_NONE; + } else if (strcmp(*argv, "ack-filter") == 0) { -+ ack_filter = 0x0200; ++ ack_filter = CAKE_ACK_FILTER; + } else if (strcmp(*argv, "ack-filter-aggressive") == 0) { -+ ack_filter = 0x0600; ++ ack_filter = CAKE_ACK_AGGRESSIVE; + + } else if (strcmp(*argv, "memlimit") == 0) { + NEXT_ARG(); @@ -444,7 +1094,7 @@ + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + if (bandwidth || unlimited) -+ addattr_l(n, 1024, TCA_CAKE_BASE_RATE, &bandwidth, sizeof(bandwidth)); ++ addattr_l(n, 1024, TCA_CAKE_BASE_RATE64, &bandwidth, sizeof(bandwidth)); + if (diffserv) + addattr_l(n, 1024, TCA_CAKE_DIFFSERV_MODE, &diffserv, sizeof(diffserv)); + if (atm != -1) @@ -455,7 +1105,7 @@ + addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead)); + if (overhead_override) { + unsigned zero = 0; -+ addattr_l(n, 1024, TCA_CAKE_ETHERNET, &zero, sizeof(zero)); ++ addattr_l(n, 1024, TCA_CAKE_RAW, &zero, sizeof(zero)); + } + if (mpu > 0) + addattr_l(n, 1024, TCA_CAKE_MPU, &mpu, sizeof(mpu)); @@ -484,13 +1134,13 @@ +static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_CAKE_MAX + 1]; -+ unsigned bandwidth = 0; ++ __u64 bandwidth = 0; + unsigned diffserv = 0; + unsigned flowmode = 0; + unsigned interval = 0; + unsigned memlimit = 0; + int overhead = 0; -+ int ethernet = 0; ++ int raw = 0; + int mpu = 0; + int atm = 0; + int nat = 0; @@ -498,6 +1148,7 @@ + int wash = 0; + int ingress = 0; + int ack_filter = 0; ++ int split_gso = 0; + SPRINT_BUF(b1); + SPRINT_BUF(b2); + @@ -506,87 +1157,89 @@ + + parse_rtattr_nested(tb, TCA_CAKE_MAX, opt); + -+ if (tb[TCA_CAKE_BASE_RATE] && -+ RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE]) >= sizeof(__u32)) { -+ bandwidth = rta_getattr_u32(tb[TCA_CAKE_BASE_RATE]); -+ if(bandwidth) ++ if (tb[TCA_CAKE_BASE_RATE64] && ++ RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE64]) >= sizeof(bandwidth)) { ++ bandwidth = rta_getattr_u64(tb[TCA_CAKE_BASE_RATE64]); ++ if(bandwidth) { + fprintf(f, "bandwidth %s ", sprint_rate(bandwidth, b1)); -+ else ++ } else + fprintf(f, "unlimited "); + } + if (tb[TCA_CAKE_AUTORATE] && + RTA_PAYLOAD(tb[TCA_CAKE_AUTORATE]) >= sizeof(__u32)) { + autorate = rta_getattr_u32(tb[TCA_CAKE_AUTORATE]); + if(autorate == 1) -+ fprintf(f, "autorate_ingress "); ++ fprintf(f, "ingress"); + else if(autorate) -+ fprintf(f, "(?autorate?) "); ++ fprintf(f, "unknown"); + } + if (tb[TCA_CAKE_DIFFSERV_MODE] && + RTA_PAYLOAD(tb[TCA_CAKE_DIFFSERV_MODE]) >= sizeof(__u32)) { + diffserv = rta_getattr_u32(tb[TCA_CAKE_DIFFSERV_MODE]); + switch(diffserv) { -+ case 1: -+ fprintf(f, "besteffort "); ++ case CAKE_DIFFSERV_DIFFSERV3: ++ fprintf(f, "diffserv3 "); + break; -+ case 2: -+ fprintf(f, "precedence "); ++ case CAKE_DIFFSERV_DIFFSERV4: ++ fprintf(f, "diffserv4 "); + break; -+ case 3: ++ case CAKE_DIFFSERV_DIFFSERV8: + fprintf(f, "diffserv8 "); + break; -+ case 4: -+ fprintf(f, "diffserv4 "); -+ break; -+ case 5: -+ fprintf(f, "diffserv-llt "); ++ case CAKE_DIFFSERV_BESTEFFORT: ++ fprintf(f, "besteffort "); + break; -+ case 6: -+ fprintf(f, "diffserv3 "); ++ case CAKE_DIFFSERV_PRECEDENCE: ++ fprintf(f, "precedence "); + break; + default: -+ fprintf(f, "(?diffserv?) "); ++ fprintf(f, "unknown "); + break; + }; + } + if (tb[TCA_CAKE_FLOW_MODE] && + RTA_PAYLOAD(tb[TCA_CAKE_FLOW_MODE]) >= sizeof(__u32)) { + flowmode = rta_getattr_u32(tb[TCA_CAKE_FLOW_MODE]); -+ nat = !!(flowmode & 64); -+ flowmode &= ~64; + switch(flowmode) { -+ case 0: ++ case CAKE_FLOW_NONE: + fprintf(f, "flowblind "); + break; -+ case 1: ++ case CAKE_FLOW_SRC_IP: + fprintf(f, "srchost "); + break; -+ case 2: ++ case CAKE_FLOW_DST_IP: + fprintf(f, "dsthost "); + break; -+ case 3: ++ case CAKE_FLOW_HOSTS: + fprintf(f, "hosts "); + break; -+ case 4: ++ case CAKE_FLOW_FLOWS: + fprintf(f, "flows "); + break; -+ case 5: ++ case CAKE_FLOW_DUAL_SRC: + fprintf(f, "dual-srchost "); + break; -+ case 6: ++ case CAKE_FLOW_DUAL_DST: + fprintf(f, "dual-dsthost "); + break; -+ case 7: ++ case CAKE_FLOW_TRIPLE: + fprintf(f, "triple-isolate "); + break; + default: -+ fprintf(f, "(?flowmode?) "); ++ fprintf(f, "unknown "); + break; + }; + -+ if(nat) -+ fprintf(f, "nat "); + } ++ ++ if (tb[TCA_CAKE_NAT] && ++ RTA_PAYLOAD(tb[TCA_CAKE_NAT]) >= sizeof(__u32)) { ++ nat = rta_getattr_u32(tb[TCA_CAKE_NAT]); ++ } ++ ++ if(nat) ++ fprintf(f, "nat "); ++ + if (tb[TCA_CAKE_WASH] && + RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) { + wash = rta_getattr_u32(tb[TCA_CAKE_WASH]); @@ -596,8 +1249,8 @@ + atm = rta_getattr_u32(tb[TCA_CAKE_ATM]); + } + if (tb[TCA_CAKE_OVERHEAD] && -+ RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) { -+ overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]); ++ RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__s32)) { ++ overhead = *(__s32 *) RTA_DATA(tb[TCA_CAKE_OVERHEAD]); + } + if (tb[TCA_CAKE_MPU] && + RTA_PAYLOAD(tb[TCA_CAKE_MPU]) >= sizeof(__u32)) { @@ -611,9 +1264,12 @@ + RTA_PAYLOAD(tb[TCA_CAKE_ACK_FILTER]) >= sizeof(__u32)) { + ack_filter = rta_getattr_u32(tb[TCA_CAKE_ACK_FILTER]); + } -+ if (tb[TCA_CAKE_ETHERNET] && -+ RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) { -+ ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]); ++ if (tb[TCA_CAKE_SPLIT_GSO] && ++ RTA_PAYLOAD(tb[TCA_CAKE_SPLIT_GSO]) >= sizeof(__u32)) { ++ split_gso = rta_getattr_u32(tb[TCA_CAKE_SPLIT_GSO]); ++ } ++ if (tb[TCA_CAKE_RAW]) { ++ raw = 1; + } + if (tb[TCA_CAKE_RTT] && + RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) { @@ -621,50 +1277,42 @@ + } + + if (wash) -+ fprintf(f,"wash "); ++ fprintf(f, "wash "); + + if (ingress) -+ fprintf(f,"ingress "); ++ fprintf(f, "ingress "); + -+ if (ack_filter == 0x0600) -+ fprintf(f,"ack-filter-aggressive "); -+ else if (ack_filter) -+ fprintf(f,"ack-filter "); ++ if (ack_filter == CAKE_ACK_AGGRESSIVE) ++ fprintf(f, "ack-filter-aggresssive "); ++ else if (ack_filter == CAKE_ACK_FILTER) ++ fprintf(f, "ack-filter "); ++ else ++ fprintf(f, "no-ack-filter "); ++ ++ if (split_gso) ++ fprintf(f, "split-gso "); + + if (interval) + fprintf(f, "rtt %s ", sprint_time(interval, b2)); + -+ if (!atm && overhead == ethernet) { ++ if (raw) + fprintf(f, "raw "); -+ } else { -+ if (atm == 1) -+ fprintf(f, "atm "); -+ else if (atm == 2) -+ fprintf(f, "ptm "); -+ else -+ fprintf(f, "noatm "); -+ -+ fprintf(f, "overhead %d ", overhead); -+ -+ /* This is actually the *amount* of automatic compensation, but -+ * we only report its presence as a boolean for now. -+ */ -+ if (ethernet) -+ fprintf(f, "via-ethernet "); -+ } + -+ /* unconditionally report the overhead and hard_header_len overhead the -+ * kernel added automatically -+ */ -+ fprintf(f, "total_overhead %d ", overhead); -+ fprintf(f, "hard_header_len %d ", ethernet); ++ if (atm == CAKE_ATM_ATM) ++ fprintf(f, "atm "); ++ else if (atm == CAKE_ATM_PTM) ++ fprintf(f, "ptm "); ++ else if (!raw) ++ fprintf(f, "noatm "); + -+ if (mpu) { -+ fprintf(f, "mpu %d ", mpu); -+ } ++ fprintf(f, "overhead %d ", overhead); + -+ if (memlimit) ++ if (mpu) ++ fprintf(f, "mpu %u ", mpu); ++ ++ if (memlimit) { + fprintf(f, "memlimit %s", sprint_size(memlimit, b1)); ++ } + + return 0; +} @@ -672,58 +1320,108 @@ +static int cake_print_xstats(struct qdisc_util *qu, FILE *f, + struct rtattr *xstats) +{ -+ /* fq_codel stats format borrowed */ -+ struct tc_fq_codel_xstats *st; -+ struct tc_cake_xstats *stnc; + SPRINT_BUF(b1); -+ SPRINT_BUF(b2); ++ struct rtattr *st[TCA_CAKE_STATS_MAX + 1]; ++ int i; + + if (xstats == NULL) + return 0; + -+ if (RTA_PAYLOAD(xstats) < sizeof(st->type)) -+ return -1; -+ -+ st = RTA_DATA(xstats); -+ stnc = RTA_DATA(xstats); -+ -+ if (st->type == TCA_FQ_CODEL_XSTATS_QDISC && RTA_PAYLOAD(xstats) >= sizeof(*st)) { -+ fprintf(f, " maxpacket %u drop_overlimit %u new_flow_count %u ecn_mark %u", -+ st->qdisc_stats.maxpacket, -+ st->qdisc_stats.drop_overlimit, -+ st->qdisc_stats.new_flow_count, -+ st->qdisc_stats.ecn_mark); -+ fprintf(f, "\n new_flows_len %u old_flows_len %u", -+ st->qdisc_stats.new_flows_len, -+ st->qdisc_stats.old_flows_len); -+ } else if (st->type == TCA_FQ_CODEL_XSTATS_CLASS && RTA_PAYLOAD(xstats) >= sizeof(*st)) { -+ fprintf(f, " deficit %d count %u lastcount %u ldelay %s", -+ st->class_stats.deficit, -+ st->class_stats.count, -+ st->class_stats.lastcount, -+ sprint_time(st->class_stats.ldelay, b1)); -+ if (st->class_stats.dropping) { -+ fprintf(f, " dropping"); -+ if (st->class_stats.drop_next < 0) ++#define GET_STAT_U32(attr) rta_getattr_u32(st[TCA_CAKE_STATS_ ## attr]) ++#define GET_STAT_S32(attr) (*(__s32*)RTA_DATA(st[TCA_CAKE_STATS_ ## attr])) ++#define GET_STAT_U64(attr) rta_getattr_u64(st[TCA_CAKE_STATS_ ## attr]) ++ ++ parse_rtattr_nested(st, TCA_CAKE_STATS_MAX, xstats); ++ ++ if (st[TCA_CAKE_STATS_MEMORY_USED] && ++ st[TCA_CAKE_STATS_MEMORY_LIMIT]) { ++ fprintf(f, " memory used: %s", ++ sprint_size(GET_STAT_U32(MEMORY_USED), b1)); ++ ++ fprintf(f, " of %s\n", ++ sprint_size(GET_STAT_U32(MEMORY_LIMIT), b1)); ++ } ++ ++ if (st[TCA_CAKE_STATS_CAPACITY_ESTIMATE64]) { ++ fprintf(f, " capacity estimate: %s\n", ++ sprint_rate(GET_STAT_U64(CAPACITY_ESTIMATE64), b1)); ++ } ++ ++ if (st[TCA_CAKE_STATS_MIN_NETLEN] && ++ st[TCA_CAKE_STATS_MAX_NETLEN]) { ++ fprintf(f, " min/max network layer size: %8u", ++ GET_STAT_U32(MIN_NETLEN)); ++ fprintf(f, " /%8u\n", GET_STAT_U32(MAX_NETLEN)); ++ } ++ ++ if (st[TCA_CAKE_STATS_MIN_ADJLEN] && ++ st[TCA_CAKE_STATS_MAX_ADJLEN]) { ++ fprintf(f, " min/max overhead-adjusted size: %8u", ++ GET_STAT_U32(MIN_ADJLEN)); ++ fprintf(f, " /%8u\n", GET_STAT_U32(MAX_ADJLEN)); ++ } ++ ++ if (st[TCA_CAKE_STATS_AVG_NETOFF]) ++ fprintf(f, " average network hdr offset: %8u\n\n", ++ GET_STAT_U32(AVG_NETOFF)); ++ ++ /* class stats */ ++ if (st[TCA_CAKE_STATS_DEFICIT]) ++ fprintf(f, "deficit %u", ++ GET_STAT_S32(DEFICIT)); ++ if (st[TCA_CAKE_STATS_COBALT_COUNT]) ++ fprintf(f, "count %u", ++ GET_STAT_U32(COBALT_COUNT)); ++ ++ if (st[TCA_CAKE_STATS_DROPPING] && GET_STAT_U32(DROPPING)) { ++ fprintf(f, " dropping"); ++ if (st[TCA_CAKE_STATS_DROP_NEXT_US]) { ++ int drop_next = GET_STAT_S32(DROP_NEXT_US); ++ if (drop_next < 0) { + fprintf(f, " drop_next -%s", -+ sprint_time(-st->class_stats.drop_next, b1)); -+ else ++ sprint_time(drop_next, b1)); ++ } else { + fprintf(f, " drop_next %s", -+ sprint_time(st->class_stats.drop_next, b1)); ++ sprint_time(drop_next, b1)); ++ } ++ } ++ } ++ ++ if (st[TCA_CAKE_STATS_P_DROP]) { ++ fprintf(f, " blue_prob %u", ++ GET_STAT_U32(P_DROP)); ++ if (st[TCA_CAKE_STATS_BLUE_TIMER_US]) { ++ int blue_timer = GET_STAT_S32(BLUE_TIMER_US); ++ if (blue_timer < 0) { ++ fprintf(f, " blue_timer -%s", ++ sprint_time(blue_timer, b1)); ++ } else { ++ fprintf(f, " blue_timer %s", ++ sprint_time(blue_timer, b1)); ++ } + } -+ } else if (stnc->version >= 1 && stnc->version < 0xFF -+ && stnc->max_tins == TC_CAKE_MAX_TINS -+ && RTA_PAYLOAD(xstats) >= offsetof(struct tc_cake_xstats, capacity_estimate)) -+ { -+ int i; ++ } ++ ++#undef GET_STAT_U32 ++#undef GET_STAT_S32 ++#undef GET_STAT_U64 ++ ++ if (st[TCA_CAKE_STATS_TIN_STATS]) { ++ struct rtattr *tins[TC_CAKE_MAX_TINS + 1]; ++ struct rtattr *tstat[TC_CAKE_MAX_TINS][TCA_CAKE_TIN_STATS_MAX + 1]; ++ int num_tins = 0; + -+ if(stnc->version >= 3) -+ fprintf(f, " memory used: %s of %s\n", sprint_size(stnc->memory_used, b1), sprint_size(stnc->memory_limit, b2)); ++ parse_rtattr_nested(tins, TC_CAKE_MAX_TINS, st[TCA_CAKE_STATS_TIN_STATS]); + -+ if(stnc->version >= 2) -+ fprintf(f, " capacity estimate: %s\n", sprint_rate(stnc->capacity_estimate, b1)); ++ for (i = 1; i <= TC_CAKE_MAX_TINS && tins[i]; i++) { ++ parse_rtattr_nested(tstat[i-1], TCA_CAKE_TIN_STATS_MAX, tins[i]); ++ num_tins++; ++ } ++ ++ if (!num_tins) ++ return 0; + -+ switch(stnc->tin_cnt) { ++ switch(num_tins) { + case 3: + fprintf(f, " Bulk Best Effort Voice\n"); + break; @@ -732,112 +1430,61 @@ + fprintf(f, " Bulk Best Effort Video Voice\n"); + break; + -+ case 5: -+ fprintf(f, " Low Loss Best Effort Low Delay Bulk Net Control\n"); -+ break; -+ + default: + fprintf(f, " "); -+ for(i=0; i < stnc->tin_cnt; i++) ++ for(i=0; i < num_tins; i++) + fprintf(f, " Tin %u", i); + fprintf(f, "\n"); + }; + -+ fprintf(f, " thresh "); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12s", sprint_rate(stnc->threshold_rate[i], b1)); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " target "); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12s", sprint_time(stnc->target_us[i], b1)); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " interval"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12s", sprint_time(stnc->interval_us[i], b1)); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " pk_delay"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12s", sprint_time(stnc->peak_delay_us[i], b1)); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " av_delay"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12s", sprint_time(stnc->avge_delay_us[i], b1)); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " sp_delay"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12s", sprint_time(stnc->base_delay_us[i], b1)); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " pkts "); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->sent[i].packets); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " bytes "); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12llu", stnc->sent[i].bytes); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " way_inds"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->way_indirect_hits[i]); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " way_miss"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->way_misses[i]); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " way_cols"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->way_collisions[i]); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " drops "); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->dropped[i].packets); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " marks "); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->ecn_marked[i].packets); -+ fprintf(f, "\n"); -+ -+ if(stnc->version >= 5) { -+ fprintf(f, " ack_drop"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->ack_drops[i].packets); -+ fprintf(f, "\n"); -+ } -+ -+ fprintf(f, " sp_flows"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->sparse_flows[i]); -+ fprintf(f, "\n"); -+ -+ fprintf(f, " bk_flows"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->bulk_flows[i]); -+ fprintf(f, "\n"); -+ -+ if(stnc->version >= 4) { -+ fprintf(f, " un_flows"); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->unresponse_flows[i]); -+ fprintf(f, "\n"); -+ } -+ -+ fprintf(f, " max_len "); -+ for(i=0; i < stnc->tin_cnt; i++) -+ fprintf(f, " %12u", stnc->max_skblen[i]); -+ fprintf(f, "\n"); -+ } else { -+ return -1; ++#define GET_TSTAT(i, attr) (tstat[i][TCA_CAKE_TIN_STATS_ ## attr]) ++#define PRINT_TSTAT(name, attr, fmts, val) do { \ ++ if (GET_TSTAT(0, attr)) { \ ++ fprintf(f, name); \ ++ for (i = 0; i < num_tins; i++) \ ++ fprintf(f, " %12" fmts, val); \ ++ fprintf(f, "\n"); \ ++ } \ ++ } while (0) ++ ++#define SPRINT_TSTAT(pfunc, type, name, attr) PRINT_TSTAT( \ ++ name, attr, "s", sprint_ ## pfunc( \ ++ rta_getattr_ ## type(GET_TSTAT(i, attr)), b1)) ++ ++#define PRINT_TSTAT_U32(name, attr) PRINT_TSTAT( \ ++ name, attr, "u", rta_getattr_u32(GET_TSTAT(i, attr))) ++ ++#define PRINT_TSTAT_U64(name, attr) PRINT_TSTAT( \ ++ name, attr, "llu", rta_getattr_u64(GET_TSTAT(i, attr))) ++ ++ SPRINT_TSTAT(rate, u64, " thresh ", THRESHOLD_RATE64); ++ SPRINT_TSTAT(time, u32, " target ", TARGET_US); ++ SPRINT_TSTAT(time, u32, " interval", INTERVAL_US); ++ SPRINT_TSTAT(time, u32, " pk_delay", PEAK_DELAY_US); ++ SPRINT_TSTAT(time, u32, " av_delay", AVG_DELAY_US); ++ SPRINT_TSTAT(time, u32, " sp_delay", BASE_DELAY_US); ++ SPRINT_TSTAT(size, u32, " backlog ", BACKLOG_BYTES); ++ ++ PRINT_TSTAT_U32(" pkts ", SENT_PACKETS); ++ PRINT_TSTAT_U64(" bytes ", SENT_BYTES64); ++ ++ PRINT_TSTAT_U32(" way_inds", WAY_INDIRECT_HITS); ++ PRINT_TSTAT_U32(" way_miss", WAY_MISSES); ++ PRINT_TSTAT_U32(" way_cols", WAY_COLLISIONS); ++ PRINT_TSTAT_U32(" drops ", DROPPED_PACKETS); ++ PRINT_TSTAT_U32(" marks ", ECN_MARKED_PACKETS); ++ PRINT_TSTAT_U32(" ack_drop", ACKS_DROPPED_PACKETS); ++ PRINT_TSTAT_U32(" sp_flows", SPARSE_FLOWS); ++ PRINT_TSTAT_U32(" bk_flows", BULK_FLOWS); ++ PRINT_TSTAT_U32(" un_flows", UNRESPONSIVE_FLOWS); ++ PRINT_TSTAT_U32(" max_len ", MAX_SKBLEN); ++ PRINT_TSTAT_U32(" quantum ", FLOW_QUANTUM); ++ ++#undef GET_STAT ++#undef PRINT_TSTAT ++#undef SPRINT_TSTAT ++#undef PRINT_TSTAT_U32 ++#undef PRINT_TSTAT_U64 + } + return 0; +} |