--- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2045,6 +2045,8 @@ int bond_enslave(struct net_device *bond goto err_unregister; } + bond_lower_state_changed(new_slave); + res = bond_sysfs_slave_add(new_slave); if (res) { slave_dbg(bond_dev, slave_dev, "Error %d calling bond_sysfs_slave_add\n", res); --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -149,8 +149,41 @@ struct dsa_switch_tree { /* List of DSA links composing the routing table */ struct list_head rtable; + + /* Maps offloaded LAG netdevs to a zero-based linear ID for + * drivers that need it. + */ + struct net_device **lags; + unsigned int lags_len; }; +#define dsa_lags_foreach_id(_id, _dst) \ + for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \ + if ((_dst)->lags[(_id)]) + +#define dsa_lag_foreach_port(_dp, _dst, _lag) \ + list_for_each_entry((_dp), &(_dst)->ports, list) \ + if ((_dp)->lag_dev == (_lag)) + +static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, + unsigned int id) +{ + return dst->lags[id]; +} + +static inline int dsa_lag_id(struct dsa_switch_tree *dst, + struct net_device *lag) +{ + unsigned int id; + + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_dev(dst, id) == lag) + return id; + } + + return -ENODEV; +} + /* TC matchall action types */ enum dsa_port_mall_action_type { DSA_PORT_MALL_MIRROR, @@ -220,6 +253,8 @@ struct dsa_port { bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; + struct net_device *lag_dev; + bool lag_tx_enabled; struct list_head list; @@ -340,6 +375,14 @@ struct dsa_switch { */ bool mtu_enforcement_ingress; + /* Drivers that benefit from having an ID associated with each + * offloaded LAG should set this to the maximum number of + * supported IDs. DSA will then maintain a mapping of _at + * least_ these many IDs, accessible to drivers via + * dsa_lag_id(). + */ + unsigned int num_lag_ids; + size_t num_ports; }; @@ -432,6 +475,18 @@ static inline bool dsa_port_is_vlan_filt return dp->vlan_filtering; } +static inline +struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) +{ + if (!dp->bridge_dev) + return NULL; + + if (dp->lag_dev) + return dp->lag_dev; + + return dp->slave; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { @@ -629,6 +684,13 @@ struct dsa_switch_ops { void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct net_device *br); + int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, + int port); + int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag, + struct netdev_lag_upper_info *info); + int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag); /* * PTP functionality @@ -660,6 +722,16 @@ struct dsa_switch_ops { int (*port_change_mtu)(struct dsa_switch *ds, int port, int new_mtu); int (*port_max_mtu)(struct dsa_switch *ds, int port); + + /* + * LAG integration + */ + int (*port_lag_change)(struct dsa_switch *ds, int port); + int (*port_lag_join)(struct dsa_switch *ds, int port, + struct net_device *lag, + struct netdev_lag_upper_info *info); + int (*port_lag_leave)(struct dsa_switch *ds, int port, + struct net_device *lag); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -220,11 +220,21 @@ static int dsa_switch_rcv(struct sk_buff } skb = nskb; - p = netdev_priv(skb->dev); skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); + if (unlikely(!dsa_slave_dev_check(skb->dev))) { + /* Packet is to be injected directly on an upper + * device, e.g. a team/bond, so skip all DSA-port + * specific actions. + */ + netif_rx(skb); + return 0; + } + + p = netdev_priv(skb->dev); + if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { nskb = dsa_untag_bridge_pvid(skb); if (!nskb) { --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,6 +21,65 @@ static DEFINE_MUTEX(dsa2_mutex); LIST_HEAD(dsa_tree_list); +/** + * dsa_lag_map() - Map LAG netdev to a linear LAG ID + * @dst: Tree in which to record the mapping. + * @lag: Netdev that is to be mapped to an ID. + * + * dsa_lag_id/dsa_lag_dev can then be used to translate between the + * two spaces. The size of the mapping space is determined by the + * driver by setting ds->num_lag_ids. It is perfectly legal to leave + * it unset if it is not needed, in which case these functions become + * no-ops. + */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag) +{ + unsigned int id; + + if (dsa_lag_id(dst, lag) >= 0) + /* Already mapped */ + return; + + for (id = 0; id < dst->lags_len; id++) { + if (!dsa_lag_dev(dst, id)) { + dst->lags[id] = lag; + return; + } + } + + /* No IDs left, which is OK. Some drivers do not need it. The + * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id + * returns an error for this device when joining the LAG. The + * driver can then return -EOPNOTSUPP back to DSA, which will + * fall back to a software LAG. + */ +} + +/** + * dsa_lag_unmap() - Remove a LAG ID mapping + * @dst: Tree in which the mapping is recorded. + * @lag: Netdev that was mapped. + * + * As there may be multiple users of the mapping, it is only removed + * if there are no other references to it. + */ +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) +{ + struct dsa_port *dp; + unsigned int id; + + dsa_lag_foreach_port(dp, dst, lag) + /* There are remaining users of this mapping */ + return; + + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_dev(dst, id) == lag) { + dst->lags[id] = NULL; + break; + } + } +} + struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) { struct dsa_switch_tree *dst; @@ -597,6 +656,32 @@ static void dsa_tree_teardown_master(str dsa_master_teardown(dp->master); } +static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) +{ + unsigned int len = 0; + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->num_lag_ids > len) + len = dp->ds->num_lag_ids; + } + + if (!len) + return 0; + + dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL); + if (!dst->lags) + return -ENOMEM; + + dst->lags_len = len; + return 0; +} + +static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) +{ + kfree(dst->lags); +} + static int dsa_tree_setup(struct dsa_switch_tree *dst) { bool complete; @@ -624,12 +709,18 @@ static int dsa_tree_setup(struct dsa_swi if (err) goto teardown_switches; + err = dsa_tree_setup_lags(dst); + if (err) + goto teardown_master; + dst->setup = true; pr_info("DSA: tree %d setup\n", dst->index); return 0; +teardown_master: + dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); teardown_default_cpu: @@ -645,6 +736,8 @@ static void dsa_tree_teardown(struct dsa if (!dst->setup) return; + dsa_tree_teardown_lags(dst); + dsa_tree_teardown_master(dst); dsa_tree_teardown_switches(dst); --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -20,6 +20,9 @@ enum { DSA_NOTIFIER_BRIDGE_LEAVE, DSA_NOTIFIER_FDB_ADD, DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_LAG_CHANGE, + DSA_NOTIFIER_LAG_JOIN, + DSA_NOTIFIER_LAG_LEAVE, DSA_NOTIFIER_MDB_ADD, DSA_NOTIFIER_MDB_DEL, DSA_NOTIFIER_VLAN_ADD, @@ -57,6 +60,15 @@ struct dsa_notifier_mdb_info { int port; }; +/* DSA_NOTIFIER_LAG_* */ +struct dsa_notifier_lag_info { + struct net_device *lag; + int sw_index; + int port; + + struct netdev_lag_upper_info *info; +}; + /* DSA_NOTIFIER_VLAN_* */ struct dsa_notifier_vlan_info { const struct switchdev_obj_port_vlan *vlan; @@ -149,6 +161,11 @@ void dsa_port_disable_rt(struct dsa_port void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo); +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo); +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct switchdev_trans *trans); bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); @@ -181,6 +198,71 @@ int dsa_port_link_register_of(struct dsa void dsa_port_link_unregister_of(struct dsa_port *dp); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; +static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, + struct net_device *dev) +{ + /* Switchdev offloading can be configured on: */ + + if (dev == dp->slave) + /* DSA ports directly connected to a bridge, and event + * was emitted for the ports themselves. + */ + return true; + + if (dp->bridge_dev == dev) + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return true; + + if (dp->lag_dev == dev) + /* DSA ports connected to a bridge via a LAG */ + return true; + + return false; +} + +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + struct net_device *dev) +{ + return dsa_port_to_bridge_port(dp) == dev; +} + +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dp->bridge_dev == bridge_dev; +} + +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_port(dp, dev)) + return true; + + return false; +} + +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst, + struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_netdev(dp, dev)) + return true; + + return false; +} + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); @@ -285,6 +367,9 @@ int dsa_switch_register_notifier(struct void dsa_switch_unregister_notifier(struct dsa_switch *ds); /* dsa2.c */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag); +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag); + extern struct list_head dsa_tree_list; #endif --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -193,6 +193,99 @@ void dsa_port_bridge_leave(struct dsa_po dsa_port_set_state_now(dp, BR_STATE_FORWARDING); } +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + }; + bool tx_enabled; + + if (!dp->lag_dev) + return 0; + + /* On statically configured aggregates (e.g. loadbalance + * without LACP) ports will always be tx_enabled, even if the + * link is down. Thus we require both link_up and tx_enabled + * in order to include it in the tx set. + */ + tx_enabled = linfo->link_up && linfo->tx_enabled; + + if (tx_enabled == dp->lag_tx_enabled) + return 0; + + dp->lag_tx_enabled = tx_enabled; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info); +} + +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag, + struct netdev_lag_upper_info *uinfo) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag, + .info = uinfo, + }; + struct net_device *bridge_dev; + int err; + + dsa_lag_map(dp->ds->dst, lag); + dp->lag_dev = lag; + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info); + if (err) + goto err_lag_join; + + bridge_dev = netdev_master_upper_dev_get(lag); + if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) + return 0; + + err = dsa_port_bridge_join(dp, bridge_dev); + if (err) + goto err_bridge_join; + + return 0; + +err_bridge_join: + dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); +err_lag_join: + dp->lag_dev = NULL; + dsa_lag_unmap(dp->ds->dst, lag); + return err; +} + +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag, + }; + int err; + + if (!dp->lag_dev) + return; + + /* Port might have been part of a LAG that in turn was + * attached to a bridge. + */ + if (dp->bridge_dev) + dsa_port_bridge_leave(dp, dp->bridge_dev); + + dp->lag_tx_enabled = false; + dp->lag_dev = NULL; + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); + if (err) + pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n", + err); + + dsa_lag_unmap(dp->ds->dst, lag); +} + /* Must be called under rcu_read_lock() */ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, bool vlan_filtering) --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -337,9 +337,6 @@ static int dsa_slave_vlan_add(struct net struct switchdev_obj_port_vlan vlan; int vid, err; - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - if (dsa_port_skip_vlan_configuration(dp)) return 0; @@ -394,11 +391,13 @@ static int dsa_slave_port_obj_add(struct switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) + return -EOPNOTSUPP; /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ @@ -406,6 +405,9 @@ static int dsa_slave_port_obj_add(struct trans); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + err = dsa_slave_vlan_add(dev, obj, trans); break; default: @@ -424,9 +426,6 @@ static int dsa_slave_vlan_del(struct net struct switchdev_obj_port_vlan *vlan; int vid, err; - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - if (dsa_port_skip_vlan_configuration(dp)) return 0; @@ -453,17 +452,22 @@ static int dsa_slave_port_obj_del(struct switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) + return -EOPNOTSUPP; /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + err = dsa_slave_vlan_del(dev, obj); break; default: @@ -1993,6 +1997,46 @@ static int dsa_slave_changeupper(struct dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } + } else if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_lag_join(dp, info->upper_dev, + info->upper_info); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_lag_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + +static int +dsa_slave_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_slave_dev_check(lower)) + continue; + + dp = dsa_slave_to_port(lower); + if (!dp->lag_dev) + /* Software LAG */ + continue; + + err = dsa_slave_changeupper(lower, info); + if (notifier_to_errno(err)) + break; } return err; @@ -2078,10 +2122,26 @@ static int dsa_slave_netdevice_event(str break; } case NETDEV_CHANGEUPPER: + if (dsa_slave_dev_check(dev)) + return dsa_slave_changeupper(dev, ptr); + + if (netif_is_lag_master(dev)) + return dsa_slave_lag_changeupper(dev, ptr); + + break; + case NETDEV_CHANGELOWERSTATE: { + struct netdev_notifier_changelowerstate_info *info = ptr; + struct dsa_port *dp; + int err; + if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; + break; - return dsa_slave_changeupper(dev, ptr); + dp = dsa_slave_to_port(dev); + + err = dsa_port_lag_change(dp, info->lower_state_info); + return notifier_from_errno(err); + } } return NOTIFY_DONE; @@ -2229,6 +2289,15 @@ static int dsa_slave_switchdev_event(str if (!fdb_info->added_by_user && !dp->ds->assisted_learning_on_cpu_port) return NOTIFY_DONE; + + /* When the bridge learns an address on an offloaded + * LAG we don't want to send traffic to the CPU, the + * other ports bridged with the LAG should be able to + * autonomously forward towards it. + */ + if (dsa_tree_offloads_netdev(dp->ds->dst, dev)) + return NOTIFY_DONE; + } if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del) --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -193,6 +193,47 @@ static int dsa_switch_fdb_del(struct dsa return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); } +static int dsa_switch_lag_change(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_change) + return ds->ops->port_lag_change(ds, info->port); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_change) + return ds->ops->crosschip_lag_change(ds, info->sw_index, + info->port); + + return 0; +} + +static int dsa_switch_lag_join(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_join) + return ds->ops->port_lag_join(ds, info->port, info->lag, + info->info); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_join) + return ds->ops->crosschip_lag_join(ds, info->sw_index, + info->port, info->lag, + info->info); + + return -EOPNOTSUPP; +} + +static int dsa_switch_lag_leave(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_leave) + return ds->ops->port_lag_leave(ds, info->port, info->lag); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave) + return ds->ops->crosschip_lag_leave(ds, info->sw_index, + info->port, info->lag); + + return -EOPNOTSUPP; +} + static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, struct dsa_notifier_mdb_info *info) { @@ -340,6 +381,15 @@ static int dsa_switch_event(struct notif case DSA_NOTIFIER_FDB_DEL: err = dsa_switch_fdb_del(ds, info); break; + case DSA_NOTIFIER_LAG_CHANGE: + err = dsa_switch_lag_change(ds, info); + break; + case DSA_NOTIFIER_LAG_JOIN: + err = dsa_switch_lag_join(ds, info); + break; + case DSA_NOTIFIER_LAG_LEAVE: + err = dsa_switch_lag_leave(ds, info); + break; case DSA_NOTIFIER_MDB_ADD: err = dsa_switch_mdb_add(ds, info); break; --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -82,7 +82,19 @@ static struct sk_buff *dsa_rcv(struct sk source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - skb->dev = dsa_master_find_slave(dev, source_device, source_port); + if (trunk) { + struct dsa_port *cpu_dp = dev->dsa_ptr; + + /* The exact source port is not available in the tag, + * so we inject the frame directly on the upper + * team/bond. + */ + skb->dev = dsa_lag_dev(cpu_dp->dst, source_port); + } else { + skb->dev = dsa_master_find_slave(dev, source_device, + source_port); + } + if (!skb->dev) return NULL;