--- a/doc/ip-cref.tex +++ b/doc/ip-cref.tex @@ -1324,6 +1324,19 @@ peers are allowed to send to us. If it is not given, Linux uses the value selected with \verb|sysctl| variable \verb|net/ipv4/tcp_reordering|. +\item \verb|hoplimit NUMBER| + +--- [2.5.74+ only] Hop limit on the path to this destination. If it is not + given, Linux uses the value selected with \verb|sysctl| variable + \verb|net/ipv4/ip_default_ttl|. + +\item \verb|initcwnd NUMBER| + +--- [2.5.70+ only] Initial congestion window size when establishing + connections to this destination. This value is multiplied with the + MSS (``Maximal Segment Size'') for the connection to get the actual + window size. If it is not given (or set to zero), Linux uses the + values specified in~\cite{RFC2414}. \item \verb|nexthop NEXTHOP| @@ -2653,6 +2666,9 @@ http://www.cisco.com/univercd/cc/td/doc/ \bibitem{RFC-DHCP} R.~Droms. ``Dynamic Host Configuration Protocol.'', RFC-2131 +\bibitem{RFC2414} M.~Allman, S.~Floyd, C.~Partridge. +``Increasing TCP's Initial Window'', RFC-2414. + \end{thebibliography} --- a/doc/Makefile +++ b/doc/Makefile @@ -14,6 +14,7 @@ PAGESIZE=a4 PAGESPERPAGE=2 HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml)) +TXTFILES=$(subst .sgml,.txt,$(shell echo *.sgml)) DVIFILES=$(subst .ps,.dvi,$(PSFILES)) @@ -23,6 +24,8 @@ pstwocol: $(PSFILES) html: $(HTMLFILES) +txt: $(TXTFILES) + dvi: $(DVIFILES) print: $(PSFILES) @@ -47,9 +50,12 @@ print: $(PSFILES) %.html: %.sgml $(SGML2HTML) $< +%.txt: %.html + lynx -nolist -dump $< > $@ + install: install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR) install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR) clean: - rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html + rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html $(TXTFILES) --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -1,3 +1,409 @@ +#if 0 +#ifndef __LINUX_PKT_SCHED_H +#define __LINUX_PKT_SCHED_H + +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats +{ + __u64 bytes; /* NUmber of enqueues bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +#ifdef __KERNEL__ + spinlock_t *lock; +#endif +}; + +struct tc_estimator +{ + char interval; + unsigned char ewma_log; +}; + +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) +#define TC_H_INGRESS (0xFFFFFFF1U) + +struct tc_ratespec +{ + unsigned char cell_log; + unsigned char __reserved; + unsigned short feature; + short addend; + unsigned short mpu; + __u32 rate; +}; + +/* FIFO section */ + +struct tc_fifo_qopt +{ + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 + +struct tc_prio_qopt +{ + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; + +/* CSZ section */ + +struct tc_csz_qopt +{ + int flows; /* Maximal number of guaranteed flows */ + unsigned char R_log; /* Fixed point position for round number */ + unsigned char delta_log; /* Log of maximal managed time interval */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> CSZ band */ +}; + +struct tc_csz_copt +{ + struct tc_ratespec slice; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum +{ + TCA_CSZ_UNSPEC, + TCA_CSZ_PARMS, + TCA_CSZ_RTAB, + TCA_CSZ_PTAB, +}; + +/* TBF section */ + +struct tc_tbf_qopt +{ + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum +{ + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, +}; + + +/* TEQL section */ + +/* TEQL does not require any parameters */ + +/* SFQ section */ + +struct tc_sfq_qopt +{ + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +/* + * NOTE: limit, divisor and flows are hardwired to code at the moment. + * + * limit=flows=128, divisor=1024; + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. + */ + +/* RED section */ + +enum +{ + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, +}; + +struct tc_red_qopt +{ + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; +#define TC_RED_ECN 1 +}; + +struct tc_red_xstats +{ + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ +}; + +/* GRED section */ + +#define MAX_DPs 16 + +enum +{ + TCA_GRED_UNSPEC, + TCA_GRED_PARMS, + TCA_GRED_STAB, + TCA_GRED_DPS, +}; + +#define TCA_SET_OFF TCA_GRED_PARMS +struct tc_gred_qopt +{ + __u32 limit; /* HARD maximal queue length (bytes) +*/ + __u32 qth_min; /* Min average length threshold (bytes) +*/ + __u32 qth_max; /* Max average length threshold (bytes) +*/ + __u32 DP; /* upto 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; +}; +/* gred setup */ +struct tc_gred_sopt +{ + __u32 DPs; + __u32 def_DP; + __u8 grio; +}; + +/* HTB section */ +#define TC_HTB_NUMPRIO 8 +#define TC_HTB_MAXDEPTH 8 +#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ + +struct tc_htb_opt +{ + struct tc_ratespec rate; + struct tc_ratespec ceil; + __u32 buffer; + __u32 cbuffer; + __u32 quantum; + __u32 level; /* out only */ + __u32 prio; +}; +struct tc_htb_glob +{ + __u32 version; /* to match HTB/TC */ + __u32 rate2quantum; /* bps->quantum divisor */ + __u32 defcls; /* default class number */ + __u32 debug; /* debug flags */ + + /* stats */ + __u32 direct_pkts; /* count of non shapped packets */ +}; +enum +{ + TCA_HTB_UNSPEC, + TCA_HTB_PARMS, + TCA_HTB_INIT, + TCA_HTB_CTAB, + TCA_HTB_RTAB, +}; +struct tc_htb_xstats +{ + __u32 lends; + __u32 borrows; + __u32 giants; /* too big packets (rate will not be accurate) */ + __u32 tokens; + __u32 ctokens; +}; + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt +{ + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt +{ + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl +{ + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u32 penalty; +}; + +struct tc_cbq_police +{ + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt +{ + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats +{ + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum +{ + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, +}; + +#define TCA_CBQ_MAX TCA_CBQ_POLICE + +/* dsmark section */ + +enum { + TCA_DSMARK_UNSPEC, + TCA_DSMARK_INDICES, + TCA_DSMARK_DEFAULT_INDEX, + TCA_DSMARK_SET_TC_INDEX, + TCA_DSMARK_MASK, + TCA_DSMARK_VALUE +}; + +#define TCA_DSMARK_MAX TCA_DSMARK_VALUE + +/* ATM section */ + +enum { + TCA_ATM_UNSPEC, + TCA_ATM_FD, /* file/socket descriptor */ + TCA_ATM_PTR, /* pointer to descriptor - later */ + TCA_ATM_HDR, /* LL header */ + TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ + TCA_ATM_ADDR, /* PVC address (for output only) */ + TCA_ATM_STATE /* VC state (ATM_VS_*; for output only) */ +}; + +#define TCA_ATM_MAX TCA_ATM_STATE + +#endif +#endif #ifndef __LINUX_PKT_SCHED_H #define __LINUX_PKT_SCHED_H @@ -518,4 +924,116 @@ struct tc_drr_stats __u32 deficit; }; +/* WRR section */ + +/* Other includes */ +#include <linux/if_ether.h> + +// A sub weight and of a class +// All numbers are represented as parts of (2^64-1). +struct tc_wrr_class_weight { + __u64 val; // Current value (0 is not valid) + __u64 decr; // Value pr bytes (2^64-1 is not valid) + __u64 incr; // Value pr seconds (2^64-1 is not valid) + __u64 min; // Minimal value (0 is not valid) + __u64 max; // Minimal value (0 is not valid) + +// The time where the above information was correct: + time_t tim; +}; + +// Packet send when modifying a class: +struct tc_wrr_class_modf { + // Not-valid values are ignored. + struct tc_wrr_class_weight weight1; + struct tc_wrr_class_weight weight2; +}; + +// Packet returned when quering a class: +struct tc_wrr_class_stats { + char used; // If this is false the information below is invalid + + struct tc_wrr_class_modf class_modf; + + unsigned char addr[ETH_ALEN]; + char usemac; // True if addr is a MAC address, else it is an IP address + // (this value is only for convience, it is always the same + // value as in the qdisc) + int heappos; // Current heap position or 0 if not in heap + __u64 penal_ls; // Penalty value in heap (ls) + __u64 penal_ms; // Penalty value in heap (ms) +}; + +// Qdisc-wide penalty information (boolean values - 2 not valid) +struct tc_wrr_qdisc_weight { + char weight_mode; // 0=No automatic change to weight + // 1=Decrease normally + // 2=Also multiply with number of machines + // 3=Instead multiply with priority divided + // with priority of the other. + // -1=no change +}; + +// Packet send when modifing a qdisc: +struct tc_wrr_qdisc_modf { + // Not-valid values are ignored: + struct tc_wrr_qdisc_weight weight1; + struct tc_wrr_qdisc_weight weight2; +}; + +// Packet send when creating a qdisc: +struct tc_wrr_qdisc_crt { + struct tc_wrr_qdisc_modf qdisc_modf; + + char srcaddr; // 1=lookup source, 0=lookup destination + char usemac; // 1=Classify on MAC addresses, 0=classify on IP + char usemasq; // 1=Classify based on masqgrading - only valid + // if usemac is zero + int bands_max; // Maximal number of bands (i.e.: classes) + int proxy_maxconn;// If differnt from 0 then we support proxy remapping + // of packets. And this is the number of maximal + // concurrent proxy connections. +}; + +// Packet returned when quering a qdisc: +struct tc_wrr_qdisc_stats { + struct tc_wrr_qdisc_crt qdisc_crt; + int proxy_curconn; + int nodes_in_heap; // Current number of bands wanting to send something + int bands_cur; // Current number of bands used (i.e.: MAC/IP addresses seen) + int bands_reused; // Number of times this band has been reused. + int packets_requed; // Number of times packets have been requeued. + __u64 priosum; // Sum of priorities in heap where 1 is 2^32 +}; + +struct tc_wrr_qdisc_modf_std { + // This indicates which of the tc_wrr_qdisc_modf structers this is: + char proxy; // 0=This struct + + // Should we also change a class? + char change_class; + + // Only valid if change_class is false + struct tc_wrr_qdisc_modf qdisc_modf; + + // Only valid if change_class is true: + unsigned char addr[ETH_ALEN]; // Class to change (non-used bytes should be 0) + struct tc_wrr_class_modf class_modf; // The change +}; + +// Used for proxyrempping: +struct tc_wrr_qdisc_modf_proxy { + // This indicates which of the tc_wrr_qdisc_modf structers this is: + char proxy; // 1=This struct + + // This is 1 if the proxyremap information should be reset + char reset; + + // changec is the number of elements in changes. + int changec; + + // This is an array of type ProxyRemapBlock: + long changes[0]; +}; + #endif --- a/ip/iproute.c +++ b/ip/iproute.c @@ -73,7 +73,7 @@ static void usage(void) fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ]\n"); fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n"); - fprintf(stderr, " [ rto_min TIME ]\n"); + fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ]\n"); fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n"); fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n"); fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); @@ -792,6 +792,30 @@ int iproute_modify(int cmd, unsigned fla invarg("\"reordering\" value is invalid\n", *argv); rta_addattr32(mxrta, sizeof(mxbuf), RTAX_REORDERING, reord); #endif +#ifdef RTAX_HOPLIMIT + } else if (strcmp(*argv, "hoplimit") == 0) { + unsigned hoplim; + NEXT_ARG(); + if (strcmp(*argv, "lock") == 0) { + mxlock |= (1<<RTAX_HOPLIMIT); + NEXT_ARG(); + } + if (get_unsigned(&hoplim, *argv, 0)) + invarg("\"hoplimit\" value is invalid\n", *argv); + rta_addattr32(mxrta, sizeof(mxbuf), RTAX_HOPLIMIT, hoplim); +#endif +#ifdef RTAX_INITCWND + } else if (strcmp(*argv, "initcwnd") == 0) { + unsigned initcwnd; + NEXT_ARG(); + if (strcmp(*argv, "lock") == 0) { + mxlock |= (1<<RTAX_HOPLIMIT); + NEXT_ARG(); + } + if (get_unsigned(&initcwnd, *argv, 0)) + invarg("\"initcwnd\" value is invalid\n", *argv); + rta_addattr32(mxrta, sizeof(mxbuf), RTAX_INITCWND, initcwnd); +#endif } else if (strcmp(*argv, "rtt") == 0) { unsigned rtt; NEXT_ARG(); --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -134,7 +134,7 @@ static int parse_args(int argc, char **a NEXT_ARG(); p->o_flags |= GRE_KEY; if (strchr(*argv, '.')) - p->o_key = get_addr32(*argv); + p->i_key = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0)<0) { fprintf(stderr, "invalid value of \"okey\"\n"); --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ install: all $(DESTDIR)$(DOCDIR)/examples install -m 0644 $(shell find examples/diffserv -maxdepth 1 -type f) \ $(DESTDIR)$(DOCDIR)/examples/diffserv - @for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done + @set -e; for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR) install -m 0755 -d $(DESTDIR)$(MANDIR)/man8 install -m 0644 $(shell find man/man8 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man8 @@ -67,7 +67,7 @@ snapshot: clean: rm -f cscope.* - @for i in $(SUBDIRS) doc; \ + @set -e; for i in $(SUBDIRS) doc; \ do $(MAKE) $(MFLAGS) -C $$i clean; done clobber: clean --- a/misc/Makefile +++ b/misc/Makefile @@ -1,7 +1,8 @@ SSOBJ=ss.o ssfilter.o LNSTATOBJ=lnstat.o lnstat_util.o -TARGETS=ss nstat ifstat rtacct arpd lnstat +#TARGETS=ss nstat ifstat rtacct arpd lnstat +TARGETS=ss nstat rtacct lnstat include ../Config --- a/tc/Makefile +++ b/tc/Makefile @@ -14,6 +14,7 @@ TCMODULES += q_cbq.o TCMODULES += q_rr.o TCMODULES += q_multiq.o TCMODULES += q_netem.o +TCMODULES += q_wrr.o TCMODULES += f_rsvp.o TCMODULES += f_u32.o TCMODULES += f_route.o --- a/tc/q_htb.c +++ b/tc/q_htb.c @@ -1,3 +1,311 @@ +#if 0 +/* + * q_htb.c HTB. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Martin Devera, devik@cdi.cz + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <syslog.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <string.h> + +#include "utils.h" +#include "tc_util.h" + +#define HTB_TC_VER 0x30003 +#if HTB_TC_VER >> 16 != TC_HTB_PROTOVER +#error "Different kernel and TC HTB versions" +#endif + +static void explain(void) +{ + fprintf(stderr, "Usage: ... qdisc add ... htb [default N] [r2q N]\n" + " default minor id of class to which unclassified packets are sent {0}\n" + " r2q DRR quantums are computed as rate in Bps/r2q {10}\n" + " debug string of 16 numbers each 0-3 {0}\n\n" + "... class add ... htb rate R1 burst B1 [prio P] [slot S] [pslot PS]\n" + " [ceil R2] [cburst B2] [mtu MTU] [quantum Q]\n" + " rate rate allocated to this class (class can still borrow)\n" + " burst max bytes burst which can be accumulated during idle period {computed}\n" + " ceil definite upper class rate (no borrows) {rate}\n" + " cburst burst but for ceil {computed}\n" + " mtu max packet size we create rate map for {1600}\n" + " prio priority of leaf; lower are served first {0}\n" + " quantum how much bytes to serve from leaf at once {use r2q}\n" + "\nTC HTB version %d.%d\n",HTB_TC_VER>>16,HTB_TC_VER&0xffff + ); +} + +static void explain1(char *arg) +{ + fprintf(stderr, "Illegal \"%s\"\n", arg); + explain(); +} + + +#define usage() return(-1) + +static int htb_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n) +{ + struct tc_htb_glob opt; + struct rtattr *tail; + unsigned i; char *p; + memset(&opt,0,sizeof(opt)); + opt.rate2quantum = 10; + opt.version = 3; + + while (argc > 0) { + if (matches(*argv, "r2q") == 0) { + NEXT_ARG(); + if (get_u32(&opt.rate2quantum, *argv, 10)) { + explain1("r2q"); return -1; + } + } else if (matches(*argv, "default") == 0) { + NEXT_ARG(); + if (get_u32(&opt.defcls, *argv, 16)) { + explain1("default"); return -1; + } + } else if (matches(*argv, "debug") == 0) { + NEXT_ARG(); p = *argv; + for (i=0; i<16; i++,p++) { + if (*p<'0' || *p>'3') break; + opt.debug |= (*p-'0')<<(2*i); + } + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; argv++; + } + tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len)); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + addattr_l(n, 2024, TCA_HTB_INIT, &opt, NLMSG_ALIGN(sizeof(opt))); + tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail; + return 0; +} + +static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n) +{ + int ok=0; + struct tc_htb_opt opt; + __u32 rtab[256],ctab[256]; + unsigned buffer=0,cbuffer=0; + int cell_log=-1,ccell_log = -1,mtu; + struct rtattr *tail; + + memset(&opt, 0, sizeof(opt)); mtu = 1600; /* eth packet len */ + + while (argc > 0) { + if (matches(*argv, "prio") == 0) { + NEXT_ARG(); + if (get_u32(&opt.prio, *argv, 10)) { + explain1("prio"); return -1; + } + ok++; + } else if (matches(*argv, "mtu") == 0) { + NEXT_ARG(); + if (get_u32(&mtu, *argv, 10)) { + explain1("mtu"); return -1; + } + } else if (matches(*argv, "quantum") == 0) { + NEXT_ARG(); + if (get_u32(&opt.quantum, *argv, 10)) { + explain1("quantum"); return -1; + } + } else if (matches(*argv, "burst") == 0 || + strcmp(*argv, "buffer") == 0 || + strcmp(*argv, "maxburst") == 0) { + NEXT_ARG(); + if (get_size_and_cell(&buffer, &cell_log, *argv) < 0) { + explain1("buffer"); + return -1; + } + ok++; + } else if (matches(*argv, "cburst") == 0 || + strcmp(*argv, "cbuffer") == 0 || + strcmp(*argv, "cmaxburst") == 0) { + NEXT_ARG(); + if (get_size_and_cell(&cbuffer, &ccell_log, *argv) < 0) { + explain1("cbuffer"); + return -1; + } + ok++; + } else if (strcmp(*argv, "ceil") == 0) { + NEXT_ARG(); + if (opt.ceil.rate) { + fprintf(stderr, "Double \"ceil\" spec\n"); + return -1; + } + if (get_rate(&opt.ceil.rate, *argv)) { + explain1("ceil"); + return -1; + } + ok++; + } else if (strcmp(*argv, "rate") == 0) { + NEXT_ARG(); + if (opt.rate.rate) { + fprintf(stderr, "Double \"rate\" spec\n"); + return -1; + } + if (get_rate(&opt.rate.rate, *argv)) { + explain1("rate"); + return -1; + } + ok++; + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; argv++; + } + +/* if (!ok) + return 0;*/ + + if (opt.rate.rate == 0) { + fprintf(stderr, "\"rate\" is required.\n"); + return -1; + } + /* if ceil params are missing, use the same as rate */ + if (!opt.ceil.rate) opt.ceil = opt.rate; + + /* compute minimal allowed burst from rate; mtu is added here to make + sute that buffer is larger than mtu and to have some safeguard space */ + if (!buffer) buffer = opt.rate.rate / HZ + mtu; + if (!cbuffer) cbuffer = opt.ceil.rate / HZ + mtu; + + if ((cell_log = tc_calc_rtable(opt.rate.rate, rtab, cell_log, mtu, 0)) < 0) { + fprintf(stderr, "htb: failed to calculate rate table.\n"); + return -1; + } + opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer); + opt.rate.cell_log = cell_log; + + if ((ccell_log = tc_calc_rtable(opt.ceil.rate, ctab, cell_log, mtu, 0)) < 0) { + fprintf(stderr, "htb: failed to calculate ceil rate table.\n"); + return -1; + } + opt.cbuffer = tc_calc_xmittime(opt.ceil.rate, cbuffer); + opt.ceil.cell_log = ccell_log; + + tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len)); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + addattr_l(n, 2024, TCA_HTB_PARMS, &opt, sizeof(opt)); + addattr_l(n, 3024, TCA_HTB_RTAB, rtab, 1024); + addattr_l(n, 4024, TCA_HTB_CTAB, ctab, 1024); + tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail; + return 0; +} + +static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_HTB_RTAB+1]; + struct tc_htb_opt *hopt; + struct tc_htb_glob *gopt; + double buffer,cbuffer; + SPRINT_BUF(b1); + SPRINT_BUF(b2); + + if (opt == NULL) + return 0; + + memset(tb, 0, sizeof(tb)); + parse_rtattr(tb, TCA_HTB_RTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)); + + if (tb[TCA_HTB_PARMS]) { + + hopt = RTA_DATA(tb[TCA_HTB_PARMS]); + if (RTA_PAYLOAD(tb[TCA_HTB_PARMS]) < sizeof(*hopt)) return -1; + + if (!hopt->level) { + fprintf(f, "prio %d ", (int)hopt->prio); + if (show_details) + fprintf(f, "quantum %d ", (int)hopt->quantum); + } + fprintf(f, "rate %s ", sprint_rate(hopt->rate.rate, b1)); + buffer = ((double)hopt->rate.rate*tc_core_tick2usec(hopt->buffer))/1000000; + fprintf(f, "ceil %s ", sprint_rate(hopt->ceil.rate, b1)); + cbuffer = ((double)hopt->ceil.rate*tc_core_tick2usec(hopt->cbuffer))/1000000; + if (show_details) { + fprintf(f, "burst %s/%u mpu %s ", sprint_size(buffer, b1), + 1<<hopt->rate.cell_log, sprint_size(hopt->rate.mpu, b2)); + fprintf(f, "cburst %s/%u mpu %s ", sprint_size(cbuffer, b1), + 1<<hopt->ceil.cell_log, sprint_size(hopt->ceil.mpu, b2)); + fprintf(f, "level %d ", (int)hopt->level); + } else { + fprintf(f, "burst %s ", sprint_size(buffer, b1)); + fprintf(f, "cburst %s ", sprint_size(cbuffer, b1)); + } + if (show_raw) + fprintf(f, "buffer [%08x] cbuffer [%08x] ", + hopt->buffer,hopt->cbuffer); + } + if (tb[TCA_HTB_INIT]) { + gopt = RTA_DATA(tb[TCA_HTB_INIT]); + if (RTA_PAYLOAD(tb[TCA_HTB_INIT]) < sizeof(*gopt)) return -1; + + fprintf(f, "r2q %d default %x direct_packets_stat %u", + gopt->rate2quantum,gopt->defcls,gopt->direct_pkts); + if (show_details) + fprintf(f," ver %d.%d",gopt->version >> 16,gopt->version & 0xffff); + } + return 0; +} + +static int htb_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats) +{ + struct tc_htb_xstats *st; + if (xstats == NULL) + return 0; + + if (RTA_PAYLOAD(xstats) < sizeof(*st)) + return -1; + + st = RTA_DATA(xstats); + fprintf(f, " lended: %u borrowed: %u giants: %u\n", + st->lends,st->borrows,st->giants); + fprintf(f, " tokens: %d ctokens: %d\n", st->tokens,st->ctokens); + return 0; +} + +struct qdisc_util htb_util = { + NULL, + "htb", + htb_parse_opt, + htb_print_opt, + htb_print_xstats, + htb_parse_class_opt, + htb_print_opt, +}; + +/* for testing of old one */ +struct qdisc_util htb2_util = { + NULL, + "htb2", + htb_parse_opt, + htb_print_opt, + htb_print_xstats, + htb_parse_class_opt, + htb_print_opt, +}; +#endif /* * q_htb.c HTB. * --- /dev/null +++ b/tc/q_wrr.c @@ -0,0 +1,322 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <syslog.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <string.h> +#include <math.h> + +#include "utils.h" +#include "tc_util.h" + +#define usage() return(-1) + +// Returns -1 on error +static int wrr_parse_qdisc_weight(int argc, char** argv, + struct tc_wrr_qdisc_modf* opt) { + int i; + + opt->weight1.weight_mode=-1; + opt->weight2.weight_mode=-1; + + for(i=0; i<argc; i++) { + if(!memcmp(argv[i],"wmode1=",7)) { + opt->weight1.weight_mode=atoi(argv[i]+7); + } else if(!memcmp(argv[i],"wmode2=",7)) { + opt->weight2.weight_mode=atoi(argv[i]+7); + } else { + printf("Usage: ... [wmode1=0|1|2|3] [wmode2=0|1|2|3]\n"); + return -1; + } + } + return 0; +} + +static int wrr_parse_class_modf(int argc, char** argv, + struct tc_wrr_class_modf* modf) { + int i; + + if(argc<1) { + fprintf(stderr, "Usage: ... [weight1=val] [decr1=val] [incr1=val] [min1=val] [max1=val] [val2=val] ...\n"); + fprintf(stderr, " The values can be floating point like 0.42 or divisions like 42/100\n"); + return -1; + } + + // Set meaningless values: + modf->weight1.val=0; + modf->weight1.decr=(__u64)-1; + modf->weight1.incr=(__u64)-1; + modf->weight1.min=0; + modf->weight1.max=0; + modf->weight2.val=0; + modf->weight2.decr=(__u64)-1; + modf->weight2.incr=(__u64)-1; + modf->weight2.min=0; + modf->weight2.max=0; + + // And read values: + for(i=0; i<argc; i++) { + char arg[80]; + char* name,*value1=0,*value2=0; + long double f_val1,f_val2=1,value; + if(strlen(argv[i])>=sizeof(arg)) { + fprintf(stderr,"Argument too long: %s\n",argv[i]); + return -1; + } + strcpy(arg,argv[i]); + + name=strtok(arg,"="); + if(name) value1=strtok(0,"/"); + if(value1) value2=strtok(0,""); + + if(!value1) { + fprintf(stderr,"No = found in argument: %s\n",argv[i]); + return -1; + } + + f_val1=atof(value1); + if(value2) f_val2=atof(value2); + + if(f_val2==0) { + fprintf(stderr,"Division by 0\n"); + return -1; + } + + value=f_val1/f_val2; + if(value>1) value=1; + if(value<0) value=0; + value*=((__u64)-1); + + // And find the value set + if(!strcmp(name,"weight1")) modf->weight1.val=value; + else if(!strcmp(name,"decr1")) modf->weight1.decr=value; + else if(!strcmp(name,"incr1")) modf->weight1.incr=value; + else if(!strcmp(name,"min1")) modf->weight1.min=value; + else if(!strcmp(name,"max1")) modf->weight1.max=value; + else if(!strcmp(name,"weight2")) modf->weight2.val=value; + else if(!strcmp(name,"decr2")) modf->weight2.decr=value; + else if(!strcmp(name,"incr2")) modf->weight2.incr=value; + else if(!strcmp(name,"min2")) modf->weight2.min=value; + else if(!strcmp(name,"max2")) modf->weight2.max=value; + else { + fprintf(stderr,"illegal value: %s\n",name); + return -1; + } + } + + return 0; +} + +static int wrr_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n) +{ + if(n->nlmsg_flags & NLM_F_CREATE) { + // This is a create request: + struct tc_wrr_qdisc_crt opt; + + int sour,dest,ip,mac,masq; + + if(argc<4) { + fprintf(stderr, "Usage: ... wrr sour|dest ip|masq|mac maxclasses proxymaxcon [penalty-setup]\n"); + return -1; + } + + // Read sour/dest: + memset(&opt,0,sizeof(opt)); + sour=!strcmp(argv[0],"sour"); + dest=!strcmp(argv[0],"dest"); + + if(!sour && !dest) { + fprintf(stderr,"sour or dest must be specified\n"); + return -1; + } + + // Read ip/mac + ip=!strcmp(argv[1],"ip"); + mac=!strcmp(argv[1],"mac"); + masq=!strcmp(argv[1],"masq"); + + if(!ip && !mac && !masq) { + fprintf(stderr,"ip, masq or mac must be specified\n"); + return -1; + } + + opt.srcaddr=sour; + opt.usemac=mac; + opt.usemasq=masq; + opt.bands_max=atoi(argv[2]); + + opt.proxy_maxconn=atoi(argv[3]); + + // Read weights: + if(wrr_parse_qdisc_weight(argc-4,argv+4,&opt.qdisc_modf)<0) return -1; + if(opt.qdisc_modf.weight1.weight_mode==-1) opt.qdisc_modf.weight1.weight_mode=0; + if(opt.qdisc_modf.weight2.weight_mode==-1) opt.qdisc_modf.weight2.weight_mode=0; + + addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); + } else { + struct tc_wrr_qdisc_modf_std opt; + char qdisc,class; + + // This is a modify request: + if(argc<1) { + fprintf(stderr,"... qdisc ... or ... class ...\n"); + return -1; + } + + qdisc=!strcmp(argv[0],"qdisc"); + class=!strcmp(argv[0],"class"); + + if(!qdisc && !class) { + fprintf(stderr,"qdisc or class must be specified\n"); + return -1; + } + + argc--; + argv++; + + opt.proxy=0; + + if(qdisc) { + opt.change_class=0; + if(wrr_parse_qdisc_weight(argc, argv, &opt.qdisc_modf)<0) return -1; + } else { + int a0,a1,a2,a3,a4=0,a5=0; + + opt.change_class=1; + + if(argc<1) { + fprintf(stderr,"... <mac>|<ip>|<masq> ...\n"); + return -1; + } + memset(opt.addr,0,sizeof(opt.addr)); + + if((sscanf(argv[0],"%i.%i.%i.%i",&a0,&a1,&a2,&a3)!=4) && + (sscanf(argv[0],"%x:%x:%x:%x:%x:%x",&a0,&a1,&a2,&a3,&a4,&a5)!=6)) { + fprintf(stderr,"Wrong format of mac or ip address\n"); + return -1; + } + + opt.addr[0]=a0; opt.addr[1]=a1; opt.addr[2]=a2; + opt.addr[3]=a3; opt.addr[4]=a4; opt.addr[5]=a5; + + if(wrr_parse_class_modf(argc-1, argv+1, &opt.class_modf)<0) return -1; + } + + addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); + } + return 0; +} + +static int wrr_parse_copt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n) { + struct tc_wrr_class_modf opt; + + memset(&opt,0,sizeof(opt)); + if(wrr_parse_class_modf(argc,argv,&opt)<0) return -1; + + addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); + return 0; +} + +static int wrr_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct tc_wrr_qdisc_stats *qopt; + + if (opt == NULL) + return 0; + + if (RTA_PAYLOAD(opt) < sizeof(*qopt)) + return -1; + qopt = RTA_DATA(opt); + + fprintf(f,"\n (%s/%s) (maxclasses %i) (usedclasses %i) (reused classes %i)\n", + qopt->qdisc_crt.srcaddr ? "sour" : "dest", + qopt->qdisc_crt.usemac ? "mac" : (qopt->qdisc_crt.usemasq ? "masq" : "ip"), + qopt->qdisc_crt.bands_max, + qopt->bands_cur, + qopt->bands_reused + ); + + if(qopt->qdisc_crt.proxy_maxconn) { + fprintf(f," (proxy maxcon %i) (proxy curcon %i)\n", + qopt->qdisc_crt.proxy_maxconn,qopt->proxy_curconn); + } + + fprintf(f," (waiting classes %i) (packets requeued %i) (priosum: %Lg)\n", + qopt->nodes_in_heap, + qopt->packets_requed, + qopt->priosum/((long double)((__u32)-1)) + ); + + fprintf(f," (wmode1 %i) (wmode2 %i) \n", + qopt->qdisc_crt.qdisc_modf.weight1.weight_mode, + qopt->qdisc_crt.qdisc_modf.weight2.weight_mode); + + return 0; +} + +static int wrr_print_copt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { + struct tc_wrr_class_stats *copt; + long double d=(__u64)-1; + + if (opt == NULL) return 0; + + if (RTA_PAYLOAD(opt) < sizeof(*copt)) + return -1; + copt = RTA_DATA(opt); + + if(!copt->used) { + fprintf(f,"(unused)"); + return 0; + } + + if(copt->usemac) { + fprintf(f,"\n (address: %.2X:%.2X:%.2X:%.2X:%.2X:%.2X)\n", + copt->addr[0],copt->addr[1],copt->addr[2], + copt->addr[3],copt->addr[4],copt->addr[5]); + } else { + fprintf(f,"\n (address: %i.%i.%i.%i)\n",copt->addr[0],copt->addr[1],copt->addr[2],copt->addr[3]); + } + + fprintf(f," (total weight: %Lg) (current position: %i) (counters: %u %u : %u %u)\n", + (copt->class_modf.weight1.val/d)*(copt->class_modf.weight2.val/d), + copt->heappos, + (unsigned)(copt->penal_ms>>32), + (unsigned)(copt->penal_ms & 0xffffffffU), + (unsigned)(copt->penal_ls>>32), + (unsigned)(copt->penal_ls & 0xffffffffU) + ); + + fprintf(f," Pars 1: (weight %Lg) (decr: %Lg) (incr: %Lg) (min: %Lg) (max: %Lg)\n", + copt->class_modf.weight1.val/d, + copt->class_modf.weight1.decr/d, + copt->class_modf.weight1.incr/d, + copt->class_modf.weight1.min/d, + copt->class_modf.weight1.max/d); + + fprintf(f," Pars 2: (weight %Lg) (decr: %Lg) (incr: %Lg) (min: %Lg) (max: %Lg)", + copt->class_modf.weight2.val/d, + copt->class_modf.weight2.decr/d, + copt->class_modf.weight2.incr/d, + copt->class_modf.weight2.min/d, + copt->class_modf.weight2.max/d); + + return 0; +} + +static int wrr_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats) +{ + return 0; +} + + +struct qdisc_util wrr_qdisc_util = { + .id = "wrr", + .parse_qopt = wrr_parse_opt, + .print_qopt = wrr_print_opt, + .print_xstats = wrr_print_xstats, + .parse_copt = wrr_parse_copt, + .print_copt = wrr_print_copt +};