diff options
Diffstat (limited to 'target/linux/generic/backport-4.14')
-rw-r--r-- | target/linux/generic/backport-4.14/380-v5.3-net-sched-Introduce-act_ctinfo-action.patch | 584 |
1 files changed, 584 insertions, 0 deletions
diff --git a/target/linux/generic/backport-4.14/380-v5.3-net-sched-Introduce-act_ctinfo-action.patch b/target/linux/generic/backport-4.14/380-v5.3-net-sched-Introduce-act_ctinfo-action.patch new file mode 100644 index 0000000000..26063985c0 --- /dev/null +++ b/target/linux/generic/backport-4.14/380-v5.3-net-sched-Introduce-act_ctinfo-action.patch @@ -0,0 +1,584 @@ +From 21d81d05787908b13a4079f42a63a5b3254b7ab4 Mon Sep 17 00:00:00 2001 +From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> +Date: Wed, 13 Mar 2019 20:54:49 +0000 +Subject: [PATCH] net: sched: Introduce act_ctinfo action + +ctinfo is a new tc filter action module. It is designed to restore DSCPs +stored in conntrack marks + +The feature is intended for use and has been found useful for restoring +ingress classifications based on egress classifications across links +that bleach or otherwise change DSCP, typically home ISP Internet links. +Restoring DSCP on ingress on the WAN link allows qdiscs such as CAKE to +shape inbound packets according to policies that are easier to implement +on egress. + +Ingress classification is traditionally a challenging task since +iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT +lookups, hence are unable to see internal IPv4 addresses as used on the +typical home masquerading gateway. + +ctinfo understands the following parameters: + +dscp mask[/statemask] + +mask - a 32 bit mask of at least 6 contiguous bits where conndscp will +place the DSCP in conntrack mark. The DSCP is left-shifted by the +number of unset lower bits of the mask before storing into the mark +field. + +statemask - a 32 bit mask of (usually) 1 bit length, outside the area +specified by mask. This represents a conditional operation flag the +DSCP is only restored if the flag is set. This is useful to implement a +'one shot' iptables based classification where the 'complicated' +iptables rules are only run once to classify the connection on initial +(egress) packet and subsequent packets are all marked/restored with the +same DSCP. A mask of zero disables the conditional behaviour. + +optional parameters: + +zone - conntrack zone + +control - action related control (reclassify | pipe | drop | continue | +ok | goto chain <CHAIN_INDEX> + +Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> +--- + include/net/tc_act/tc_ctinfo.h | 28 ++ + include/uapi/linux/pkt_cls.h | 3 +- + include/uapi/linux/tc_act/tc_ctinfo.h | 34 +++ + net/sched/Kconfig | 13 + + net/sched/Makefile | 1 + + net/sched/act_ctinfo.c | 394 ++++++++++++++++++++++++++ + 6 files changed, 472 insertions(+), 1 deletion(-) + create mode 100644 include/net/tc_act/tc_ctinfo.h + create mode 100644 include/uapi/linux/tc_act/tc_ctinfo.h + create mode 100644 net/sched/act_ctinfo.c + +diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h +new file mode 100644 +index 000000000000..d6a688571672 +--- /dev/null ++++ b/include/net/tc_act/tc_ctinfo.h +@@ -0,0 +1,28 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __NET_TC_CTINFO_H ++#define __NET_TC_CTINFO_H ++ ++#include <net/act_api.h> ++ ++struct tcf_ctinfo_params { ++ struct rcu_head rcu; ++ struct net *net; ++ u32 dscpmask; ++ u32 dscpstatemask; ++ u32 cpmarkmask; ++ u16 zone; ++ u8 mode; ++ u8 dscpmaskshift; ++}; ++ ++struct tcf_ctinfo { ++ struct tc_action common; ++ struct tcf_ctinfo_params __rcu *params; ++ u64 stats_dscp_set; ++ u64 stats_dscp_error; ++ u64 stats_cpmark_set; ++}; ++ ++#define to_ctinfo(a) ((struct tcf_ctinfo *)a) ++ ++#endif /* __NET_TC_CTINFO_H */ +diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h +index 46c506615f4a..408b02fbb34a 100644 +--- a/include/uapi/linux/pkt_cls.h ++++ b/include/uapi/linux/pkt_cls.h +@@ -66,7 +66,8 @@ enum { + TCA_ID_UNSPEC=0, + TCA_ID_POLICE=1, + /* other actions go here */ +- __TCA_ID_MAX=255 ++ TCA_ID_CTINFO=27, ++ __TCA_ID_MAX = 255 + }; + + #define TCA_ID_MAX __TCA_ID_MAX +diff --git a/include/uapi/linux/tc_act/tc_ctinfo.h b/include/uapi/linux/tc_act/tc_ctinfo.h +new file mode 100644 +index 000000000000..da803e05a89b +--- /dev/null ++++ b/include/uapi/linux/tc_act/tc_ctinfo.h +@@ -0,0 +1,34 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __UAPI_TC_CTINFO_H ++#define __UAPI_TC_CTINFO_H ++ ++#include <linux/types.h> ++#include <linux/pkt_cls.h> ++ ++struct tc_ctinfo { ++ tc_gen; ++}; ++ ++enum { ++ TCA_CTINFO_UNSPEC, ++ TCA_CTINFO_PAD, ++ TCA_CTINFO_TM, ++ TCA_CTINFO_ACT, ++ TCA_CTINFO_ZONE, ++ TCA_CTINFO_PARMS_DSCP_MASK, ++ TCA_CTINFO_PARMS_DSCP_STATEMASK, ++ TCA_CTINFO_PARMS_CPMARK_MASK, ++ TCA_CTINFO_STATS_DSCP_SET, ++ TCA_CTINFO_STATS_DSCP_ERROR, ++ TCA_CTINFO_STATS_CPMARK_SET, ++ __TCA_CTINFO_MAX ++}; ++ ++#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1) ++ ++enum { ++ CTINFO_MODE_DSCP = BIT(0), ++ CTINFO_MODE_CPMARK = BIT(1) ++}; ++ ++#endif +diff --git a/net/sched/Kconfig b/net/sched/Kconfig +index e70ed26485a2..962d90f72f54 100644 +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -808,6 +808,19 @@ config NET_ACT_CONNMARK + To compile this code as a module, choose M here: the + module will be called act_connmark. + ++config NET_ACT_CTINFO ++ tristate "Netfilter Connmark to DSCP Retriever" ++ depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES ++ depends on NF_CONNTRACK && NF_CONNTRACK_MARK ++ help ++ Say Y here to allow transfer of a connmark stored DSCP into ++ ipv4/v6 diffserv ++ ++ If unsure, say N. ++ ++ To compile this code as a module, choose M here: the ++ module will be called act_ctinfo. ++ + config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT +diff --git a/net/sched/Makefile b/net/sched/Makefile +index 9e43a4721ef8..44ee5b87b895 100644 +--- a/net/sched/Makefile ++++ b/net/sched/Makefile +@@ -21,6 +21,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o + obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o + obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o + obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o ++obj-$(CONFIG_NET_ACT_CTINFO) += act_ctinfo.o + obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o + obj-$(CONFIG_NET_ACT_IFE) += act_ife.o + obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o +diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c +new file mode 100644 +index 000000000000..e65344e32801 +--- /dev/null ++++ b/net/sched/act_ctinfo.c +@@ -0,0 +1,394 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* net/sched/act_ctinfo.c netfilter ctinfo connmark actions ++ * ++ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> ++ */ ++ ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/kernel.h> ++#include <linux/skbuff.h> ++#include <linux/rtnetlink.h> ++#include <linux/pkt_cls.h> ++#include <linux/ip.h> ++#include <linux/ipv6.h> ++#include <net/netlink.h> ++#include <net/pkt_sched.h> ++#include <net/act_api.h> ++#include <net/pkt_cls.h> ++#include <uapi/linux/tc_act/tc_ctinfo.h> ++#include <net/tc_act/tc_ctinfo.h> ++ ++#include <net/netfilter/nf_conntrack.h> ++#include <net/netfilter/nf_conntrack_core.h> ++#include <net/netfilter/nf_conntrack_ecache.h> ++#include <net/netfilter/nf_conntrack_zones.h> ++ ++static struct tc_action_ops act_ctinfo_ops; ++static unsigned int ctinfo_net_id; ++ ++static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, ++ struct tcf_ctinfo_params *cp, ++ struct sk_buff *skb, int wlen, int proto) ++{ ++ u8 dscp,newdscp; ++ ++ newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ++ ~INET_ECN_MASK; ++ ++ /* mark contains DSCP so restore DSCP bits from ct->mark into diffserv */ ++ /* using overlimits stats to count how many DSCP updates */ ++ switch (proto) { ++ case NFPROTO_IPV4: ++ dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK; ++ if (dscp != newdscp) { ++ if (likely(!skb_try_make_writable(skb, wlen))) { ++ ipv4_change_dsfield(ip_hdr(skb), ++ INET_ECN_MASK, ++ newdscp); ++ ca->stats_dscp_set++; ++ } else { ++ ca->stats_dscp_error++; ++ } ++ } ++ break; ++ case NFPROTO_IPV6: ++ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK; ++ if (dscp != newdscp) { ++ if (likely(!skb_try_make_writable(skb, wlen))) { ++ ipv6_change_dsfield(ipv6_hdr(skb), ++ INET_ECN_MASK, ++ newdscp); ++ ca->stats_dscp_set++; ++ } else { ++ ca->stats_dscp_error++; ++ } ++ } ++ break; ++ default: ++ break; ++ } ++} ++ ++static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, ++ struct tcf_ctinfo_params *cp, ++ struct sk_buff *skb) ++{ ++ ca->stats_cpmark_set++; ++ skb->mark = ct->mark & cp->cpmarkmask; ++} ++ ++static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, ++ struct tcf_result *res) ++{ ++ const struct nf_conntrack_tuple_hash *thash = NULL; ++ struct tcf_ctinfo *ca = to_ctinfo(a); ++ struct nf_conntrack_tuple tuple; ++ struct nf_conntrack_zone zone; ++ enum ip_conntrack_info ctinfo; ++ struct tcf_ctinfo_params *cp; ++ struct nf_conn *ct; ++ int proto, wlen; ++ int action; ++ ++ cp = rcu_dereference_bh(ca->params); ++ ++ tcf_lastuse_update(&ca->tcf_tm); ++ bstats_update(&ca->tcf_bstats, skb); ++ action = READ_ONCE(ca->tcf_action); ++ ++ wlen = skb_network_offset(skb); ++ if (tc_skb_protocol(skb) == htons(ETH_P_IP)) { ++ wlen += sizeof(struct iphdr); ++ if (!pskb_may_pull(skb, wlen)) ++ goto out; ++ ++ proto = NFPROTO_IPV4; ++ } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) { ++ wlen += sizeof(struct ipv6hdr); ++ if (!pskb_may_pull(skb, wlen)) ++ goto out; ++ ++ proto = NFPROTO_IPV6; ++ } else { ++ goto out; ++ } ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (!ct) { /* look harder, usually ingress */ ++ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), ++ proto, cp->net, &tuple)) ++ goto out; ++ zone.id = cp->zone; ++ zone.dir = NF_CT_DEFAULT_ZONE_DIR; ++ ++ thash = nf_conntrack_find_get(cp->net, &zone, &tuple); ++ if (!thash) ++ goto out; ++ ++ ct = nf_ct_tuplehash_to_ctrack(thash); ++ } ++ ++ if (cp->mode & CTINFO_MODE_DSCP) ++ if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) ++ tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); ++ ++ if (cp->mode & CTINFO_MODE_CPMARK) ++ tcf_ctinfo_cpmark_set(ct, ca, cp, skb); ++ ++ if (thash) ++ nf_ct_put(ct); ++out: ++ return action; ++} ++ ++static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = { ++ [TCA_CTINFO_ACT] = { .len = sizeof(struct ++ tc_ctinfo) }, ++ [TCA_CTINFO_ZONE] = { .type = NLA_U16 }, ++ [TCA_CTINFO_PARMS_DSCP_MASK] = { .type = NLA_U32 }, ++ [TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 }, ++ [TCA_CTINFO_PARMS_CPMARK_MASK] = { .type = NLA_U32 }, ++}; ++ ++static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, ++ struct nlattr *est, struct tc_action **a, ++ int ovr, int bind) ++{ ++ struct tc_action_net *tn = net_generic(net, ctinfo_net_id); ++ struct nlattr *tb[TCA_CTINFO_MAX + 1]; ++ struct tcf_ctinfo_params *cp_new; ++/* struct tcf_chain *goto_ch = NULL; */ ++ u32 dscpmask = 0, dscpstatemask; ++ struct tc_ctinfo *actparm; ++ struct tcf_ctinfo *ci; ++ u8 dscpmaskshift; ++ int ret = 0, err; ++ ++ if (!nla) ++ return -EINVAL; ++ ++ err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, NULL); ++ if (err < 0) ++ return err; ++ ++ if (!tb[TCA_CTINFO_ACT]) ++ return -EINVAL; ++ actparm = nla_data(tb[TCA_CTINFO_ACT]); ++ ++ /* do some basic validation here before dynamically allocating things */ ++ /* that we would otherwise have to clean up. */ ++ if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) { ++ dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]); ++ /* need contiguous 6 bit mask */ ++ dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0; ++ if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f) ++ return -EINVAL; ++ dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ? ++ nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0; ++ /* mask & statemask must not overlap */ ++ if (dscpmask & dscpstatemask) ++ return -EINVAL; ++ } ++ /* done the validation:now to the actual action allocation */ ++ err = tcf_idr_check(tn, actparm->index, a, bind); ++ if (!err) { ++ ret = tcf_idr_create(tn, actparm->index, est, a, ++ &act_ctinfo_ops, bind, false); ++ if (ret) { ++ /* tcf_idr_cleanup(tn, actparm->index); */ ++ return ret; ++ } ++ ret = ACT_P_CREATED; ++ } else if (err > 0) { ++ if (bind) /* don't override defaults */ ++ return 0; ++ if (!ovr) { ++ tcf_idr_release(*a, bind); ++ return -EEXIST; ++ } ++ } else { ++ return err; ++ } ++ ++/* err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack); ++ if (err < 0) ++ goto release_idr; ++ */ ++ ++ ci = to_ctinfo(*a); ++ ++ cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL); ++ if (unlikely(!cp_new)) { ++ err = -ENOMEM; ++ goto put_chain; ++ } ++ ++ cp_new->net = net; ++ cp_new->zone = tb[TCA_CTINFO_ZONE] ? ++ nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0; ++ if (dscpmask) { ++ cp_new->dscpmask = dscpmask; ++ cp_new->dscpmaskshift = dscpmaskshift; ++ cp_new->dscpstatemask = dscpstatemask; ++ cp_new->mode |= CTINFO_MODE_DSCP; ++ } ++ ++ if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) { ++ cp_new->cpmarkmask = nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]); ++ cp_new->mode |= CTINFO_MODE_CPMARK; ++ } ++ ++ spin_lock_bh(&ci->tcf_lock); ++/* goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch); */ ++ ci->tcf_action = actparm->action; ++ rcu_swap_protected(ci->params, cp_new, ++ lockdep_is_held(&ci->tcf_lock)); ++ spin_unlock_bh(&ci->tcf_lock); ++ ++/* if (goto_ch) ++ tcf_chain_put_by_act(goto_ch); */ ++ if (cp_new) ++ kfree_rcu(cp_new, rcu); ++ ++ if (ret == ACT_P_CREATED) ++ tcf_idr_insert(tn, *a); ++ ++ return ret; ++ ++put_chain: ++/* if (goto_ch) ++ tcf_chain_put_by_act(goto_ch); */ ++/*release_idr:*/ ++ tcf_idr_release(*a, bind); ++ return err; ++} ++ ++static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a, ++ int bind, int ref) ++{ ++ struct tcf_ctinfo *ci = to_ctinfo(a); ++ struct tc_ctinfo opt = { ++ .index = ci->tcf_index, ++ .refcnt = ci->tcf_refcnt - ref, ++ .bindcnt = ci->tcf_bindcnt - bind, ++ }; ++ unsigned char *b = skb_tail_pointer(skb); ++ struct tcf_ctinfo_params *cp; ++ struct tcf_t t; ++ ++ spin_lock_bh(&ci->tcf_lock); ++ cp = rcu_dereference_protected(ci->params, ++ lockdep_is_held(&ci->tcf_lock)); ++ ++ tcf_tm_dump(&t, &ci->tcf_tm); ++ if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD)) ++ goto nla_put_failure; ++ ++ opt.action = ci->tcf_action; ++ if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt)) ++ goto nla_put_failure; ++ ++ if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone)) ++ goto nla_put_failure; ++ ++ if (cp->mode & CTINFO_MODE_DSCP) { ++ if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK, ++ cp->dscpmask)) ++ goto nla_put_failure; ++ if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK, ++ cp->dscpstatemask)) ++ goto nla_put_failure; ++ } ++ ++ if (cp->mode & CTINFO_MODE_CPMARK) { ++ if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK, ++ cp->cpmarkmask)) ++ goto nla_put_failure; ++ } ++ ++ if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET, ++ ci->stats_dscp_set, TCA_CTINFO_PAD)) ++ goto nla_put_failure; ++ ++ if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR, ++ ci->stats_dscp_error, TCA_CTINFO_PAD)) ++ goto nla_put_failure; ++ ++ if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET, ++ ci->stats_cpmark_set, TCA_CTINFO_PAD)) ++ goto nla_put_failure; ++ ++ spin_unlock_bh(&ci->tcf_lock); ++ return skb->len; ++ ++nla_put_failure: ++ spin_unlock_bh(&ci->tcf_lock); ++ nlmsg_trim(skb, b); ++ return -1; ++} ++ ++static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb, ++ struct netlink_callback *cb, int type, ++ const struct tc_action_ops *ops) ++{ ++ struct tc_action_net *tn = net_generic(net, ctinfo_net_id); ++ ++ return tcf_generic_walker(tn, skb, cb, type, ops); ++} ++ ++static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index) ++{ ++ struct tc_action_net *tn = net_generic(net, ctinfo_net_id); ++ ++ return tcf_idr_search(tn, a, index); ++} ++ ++static struct tc_action_ops act_ctinfo_ops = { ++ .kind = "ctinfo", ++ .type = TCA_ID_CTINFO, ++ .owner = THIS_MODULE, ++ .act = tcf_ctinfo_act, ++ .dump = tcf_ctinfo_dump, ++ .init = tcf_ctinfo_init, ++ .walk = tcf_ctinfo_walker, ++ .lookup = tcf_ctinfo_search, ++ .size = sizeof(struct tcf_ctinfo), ++}; ++ ++static __net_init int ctinfo_init_net(struct net *net) ++{ ++ struct tc_action_net *tn = net_generic(net, ctinfo_net_id); ++ ++ return tc_action_net_init(tn, &act_ctinfo_ops); ++} ++ ++static void __net_exit ctinfo_exit_net(struct net *net) ++{ ++ struct tc_action_net *tn = net_generic(net, ctinfo_net_id); ++ ++ tc_action_net_exit(tn); ++} ++ ++static struct pernet_operations ctinfo_net_ops = { ++ .init = ctinfo_init_net, ++ .exit = ctinfo_exit_net, ++ .id = &ctinfo_net_id, ++ .size = sizeof(struct tc_action_net), ++}; ++ ++static int __init ctinfo_init_module(void) ++{ ++ return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops); ++} ++ ++static void __exit ctinfo_cleanup_module(void) ++{ ++ tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops); ++} ++ ++module_init(ctinfo_init_module); ++module_exit(ctinfo_cleanup_module); ++MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>"); ++MODULE_DESCRIPTION("Conntrack mark to DSCP restoring"); ++MODULE_LICENSE("GPL"); +-- +2.20.1 (Apple Git-117) + |