From 3015c50384743eb14d20c907a400e10225da033b Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 2 Sep 2022 11:27:37 +0200 Subject: net: dsa: microchip: fix kernel oops on ksz8 switches After driver refactoring we was running ksz9477 specific CPU port configuration on ksz8 family which ended with kernel oops. So, make sure we run this code only on ksz9477 compatible devices. Tested on KSZ8873 and KSZ9477. Fixes: da8cd08520f3 ("net: dsa: microchip: add support for common phylink mac link up") Signed-off-by: Oleksij Rempel Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers/net/dsa') diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 6bd69a7e6809..872aba63e7d4 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -170,6 +170,13 @@ static const struct ksz_dev_ops ksz8_dev_ops = { .exit = ksz8_switch_exit, }; +static void ksz9477_phylink_mac_link_up(struct ksz_device *dev, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev, int speed, + int duplex, bool tx_pause, + bool rx_pause); + static const struct ksz_dev_ops ksz9477_dev_ops = { .setup = ksz9477_setup, .get_port_addr = ksz9477_get_port_addr, @@ -196,6 +203,7 @@ static const struct ksz_dev_ops ksz9477_dev_ops = { .mdb_del = ksz9477_mdb_del, .change_mtu = ksz9477_change_mtu, .max_mtu = ksz9477_max_mtu, + .phylink_mac_link_up = ksz9477_phylink_mac_link_up, .config_cpu_port = ksz9477_config_cpu_port, .enable_stp_addr = ksz9477_enable_stp_addr, .reset = ksz9477_reset_switch, @@ -230,6 +238,7 @@ static const struct ksz_dev_ops lan937x_dev_ops = { .mdb_del = ksz9477_mdb_del, .change_mtu = lan937x_change_mtu, .max_mtu = ksz9477_max_mtu, + .phylink_mac_link_up = ksz9477_phylink_mac_link_up, .config_cpu_port = lan937x_config_cpu_port, .enable_stp_addr = ksz9477_enable_stp_addr, .reset = lan937x_reset_switch, @@ -1656,13 +1665,13 @@ static void ksz_duplex_flowctrl(struct ksz_device *dev, int port, int duplex, ksz_prmw8(dev, port, regs[P_XMII_CTRL_0], mask, val); } -static void ksz_phylink_mac_link_up(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface, - struct phy_device *phydev, int speed, - int duplex, bool tx_pause, bool rx_pause) +static void ksz9477_phylink_mac_link_up(struct ksz_device *dev, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev, int speed, + int duplex, bool tx_pause, + bool rx_pause) { - struct ksz_device *dev = ds->priv; struct ksz_port *p; p = &dev->ports[port]; @@ -1676,6 +1685,15 @@ static void ksz_phylink_mac_link_up(struct dsa_switch *ds, int port, ksz_port_set_xmii_speed(dev, port, speed); ksz_duplex_flowctrl(dev, port, duplex, tx_pause, rx_pause); +} + +static void ksz_phylink_mac_link_up(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev, int speed, + int duplex, bool tx_pause, bool rx_pause) +{ + struct ksz_device *dev = ds->priv; if (dev->dev_ops->phylink_mac_link_up) dev->dev_ops->phylink_mac_link_up(dev, port, mode, interface, -- cgit From 42b998d4aa59b9dea51665e4f3be1d733c47e2bf Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sun, 4 Sep 2022 23:53:19 +0200 Subject: net: dsa: qca8k: fix NULL pointer dereference for of_device_get_match_data of_device_get_match_data is called on priv->dev before priv->dev is actually set. Move of_device_get_match_data after priv->dev is correctly set to fix this kernel panic. Fixes: 3bb0844e7bcd ("net: dsa: qca8k: cache match data to speed up access") Signed-off-by: Christian Marangi Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220904215319.13070-1-ansuelsmth@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/qca/qca8k-8xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/dsa') diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 1d3e7782a71f..c181346388a4 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -1889,9 +1889,9 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (!priv) return -ENOMEM; - priv->info = of_device_get_match_data(priv->dev); priv->bus = mdiodev->bus; priv->dev = &mdiodev->dev; + priv->info = of_device_get_match_data(priv->dev); priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset", GPIOD_ASIS); -- cgit From 11afdc6526de0e0368c05da632a8c0d29fc60bb8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 5 Sep 2022 20:01:23 +0300 Subject: net: dsa: felix: tc-taprio intervals smaller than MTU should send at least one packet The blamed commit broke tc-taprio schedules such as this one: tc qdisc replace dev $swp1 root taprio \ num_tc 8 \ map 0 1 2 3 4 5 6 7 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ base-time 0 \ sched-entry S 0x7f 990000 \ sched-entry S 0x80 10000 \ flags 0x2 because the gate entry for TC 7 (S 0x80 10000 ns) now has a static guard band added earlier than its 'gate close' event, such that packet overruns won't occur in the worst case of the largest packet possible. Since guard bands are statically determined based on the per-tc QSYS_QMAXSDU_CFG_* with a fallback on the port-based QSYS_PORT_MAX_SDU, we need to discuss what happens with TC 7 depending on kernel version, since the driver, prior to commit 55a515b1f5a9 ("net: dsa: felix: drop oversized frames with tc-taprio instead of hanging the port"), did not touch QSYS_QMAXSDU_CFG_*, and therefore relied on QSYS_PORT_MAX_SDU. 1 (before vsc9959_tas_guard_bands_update): QSYS_PORT_MAX_SDU defaults to 1518, and at gigabit this introduces a static guard band (independent of packet sizes) of 12144 ns, plus QSYS::HSCH_MISC_CFG.FRM_ADJ (bit time of 20 octets => 160 ns). But this is larger than the time window itself, of 10000 ns. So, the queue system never considers a frame with TC 7 as eligible for transmission, since the gate practically never opens, and these frames are forever stuck in the TX queues and hang the port. 2 (after vsc9959_tas_guard_bands_update): Under the sole goal of enabling oversized frame dropping, we make an effort to set QSYS_QMAXSDU_CFG_7 to 1230 bytes. But QSYS_QMAXSDU_CFG_7 plays one more role, which we did not take into account: per-tc static guard band, expressed in L2 byte time (auto-adjusted for FCS and L1 overhead). There is a discrepancy between what the driver thinks (that there is no guard band, and 100% of min_gate_len[tc] is available for egress scheduling) and what the hardware actually does (crops the equivalent of QSYS_QMAXSDU_CFG_7 ns out of min_gate_len[tc]). In practice, this means that the hardware thinks it has exactly 0 ns for scheduling tc 7. In both cases, even minimum sized Ethernet frames are stuck on egress rather than being considered for scheduling on TC 7, even if they would fit given a proper configuration. Considering the current situation, with vsc9959_tas_guard_bands_update(), frames between 60 octets and 1230 octets in size are not eligible for oversized dropping (because they are smaller than QSYS_QMAXSDU_CFG_7), but won't be considered as eligible for scheduling either, because the min_gate_len[7] (10000 ns) minus the guard band determined by QSYS_QMAXSDU_CFG_7 (1230 octets * 8 ns per octet == 9840 ns) minus the guard band auto-added for L1 overhead by QSYS::HSCH_MISC_CFG.FRM_ADJ (20 octets * 8 ns per octet == 160 octets) leaves 0 ns for scheduling in the queue system proper. Investigating the hardware behavior, it becomes apparent that the queue system needs precisely 33 ns of 'gate open' time in order to consider a frame as eligible for scheduling to a tc. So the solution to this problem is to amend vsc9959_tas_guard_bands_update(), by giving the per-tc guard bands less space by exactly 33 ns, just enough for one frame to be scheduled in that interval. This allows the queue system to make forward progress for that port-tc, and prevents it from hanging. Fixes: 297c4de6f780 ("net: dsa: felix: re-enable TAS guard band mode") Reported-by: Xiaoliang Yang Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 35 ++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'drivers/net/dsa') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 1cdce8a98d1d..262be2cf4e4b 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -22,6 +22,7 @@ #define VSC9959_NUM_PORTS 6 #define VSC9959_TAS_GCL_ENTRY_MAX 63 +#define VSC9959_TAS_MIN_GATE_LEN_NS 33 #define VSC9959_VCAP_POLICER_BASE 63 #define VSC9959_VCAP_POLICER_MAX 383 #define VSC9959_SWITCH_PCI_BAR 4 @@ -1478,6 +1479,23 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot) mdiobus_free(felix->imdio); } +/* The switch considers any frame (regardless of size) as eligible for + * transmission if the traffic class gate is open for at least 33 ns. + * Overruns are prevented by cropping an interval at the end of the gate time + * slot for which egress scheduling is blocked, but we need to still keep 33 ns + * available for one packet to be transmitted, otherwise the port tc will hang. + * This function returns the size of a gate interval that remains available for + * setting the guard band, after reserving the space for one egress frame. + */ +static u64 vsc9959_tas_remaining_gate_len_ps(u64 gate_len_ns) +{ + /* Gate always open */ + if (gate_len_ns == U64_MAX) + return U64_MAX; + + return (gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS) * PSEC_PER_NSEC; +} + /* Extract shortest continuous gate open intervals in ns for each traffic class * of a cyclic tc-taprio schedule. If a gate is always open, the duration is * considered U64_MAX. If the gate is always closed, it is considered 0. @@ -1596,10 +1614,13 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) vsc9959_tas_min_gate_lengths(ocelot_port->taprio, min_gate_len); for (tc = 0; tc < OCELOT_NUM_TC; tc++) { + u64 remaining_gate_len_ps; u32 max_sdu; - if (min_gate_len[tc] == U64_MAX /* Gate always open */ || - min_gate_len[tc] * PSEC_PER_NSEC > needed_bit_time_ps) { + remaining_gate_len_ps = + vsc9959_tas_remaining_gate_len_ps(min_gate_len[tc]); + + if (remaining_gate_len_ps > needed_bit_time_ps) { /* Setting QMAXSDU_CFG to 0 disables oversized frame * dropping. */ @@ -1612,9 +1633,15 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) /* If traffic class doesn't support a full MTU sized * frame, make sure to enable oversize frame dropping * for frames larger than the smallest that would fit. + * + * However, the exact same register, QSYS_QMAXSDU_CFG_*, + * controls not only oversized frame dropping, but also + * per-tc static guard band lengths, so it reduces the + * useful gate interval length. Therefore, be careful + * to calculate a guard band (and therefore max_sdu) + * that still leaves 33 ns available in the time slot. */ - max_sdu = div_u64(min_gate_len[tc] * PSEC_PER_NSEC, - picos_per_byte); + max_sdu = div_u64(remaining_gate_len_ps, picos_per_byte); /* A TC gate may be completely closed, which is a * special case where all packets are oversized. * Any limit smaller than 64 octets accomplishes this -- cgit From 843794bbdef83955ae5b43dfafc355c3786e2145 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 5 Sep 2022 20:01:24 +0300 Subject: net: dsa: felix: disable cut-through forwarding for frames oversized for tc-taprio Experimentally, it looks like when QSYS_QMAXSDU_CFG_7 is set to 605, frames even way larger than 601 octets are transmitted even though these should be considered as oversized, according to the documentation, and dropped. Since oversized frame dropping depends on frame size, which is only known at the EOF stage, and therefore not at SOF when cut-through forwarding begins, it means that the switch cannot take QSYS_QMAXSDU_CFG_* into consideration for traffic classes that are cut-through. Since cut-through forwarding has no UAPI to control it, and the driver enables it based on the mantra "if we can, then why not", the strategy is to alter vsc9959_cut_through_fwd() to take into consideration which tc's have oversize frame dropping enabled, and disable cut-through for them. Then, from vsc9959_tas_guard_bands_update(), we re-trigger the cut-through determination process. There are 2 strategies for vsc9959_cut_through_fwd() to determine whether a tc has oversized dropping enabled or not. One is to keep a bit mask of traffic classes per port, and the other is to read back from the hardware registers (a non-zero value of QSYS_QMAXSDU_CFG_* means the feature is enabled). We choose reading back from registers, because struct ocelot_port is shared with drivers (ocelot, seville) that don't support either cut-through nor tc-taprio, and we don't have a felix specific extension of struct ocelot_port. Furthermore, reading registers from the Felix hardware is quite cheap, since they are memory-mapped. Fixes: 55a515b1f5a9 ("net: dsa: felix: drop oversized frames with tc-taprio instead of hanging the port") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 122 +++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 43 deletions(-) (limited to 'drivers/net/dsa') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 262be2cf4e4b..ad7c795d6612 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1557,6 +1557,65 @@ static void vsc9959_tas_min_gate_lengths(struct tc_taprio_qopt_offload *taprio, min_gate_len[tc] = 0; } +/* ocelot_write_rix is a macro that concatenates QSYS_MAXSDU_CFG_* with _RSZ, + * so we need to spell out the register access to each traffic class in helper + * functions, to simplify callers + */ +static void vsc9959_port_qmaxsdu_set(struct ocelot *ocelot, int port, int tc, + u32 max_sdu) +{ + switch (tc) { + case 0: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_0, + port); + break; + case 1: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_1, + port); + break; + case 2: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_2, + port); + break; + case 3: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_3, + port); + break; + case 4: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_4, + port); + break; + case 5: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_5, + port); + break; + case 6: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_6, + port); + break; + case 7: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_7, + port); + break; + } +} + +static u32 vsc9959_port_qmaxsdu_get(struct ocelot *ocelot, int port, int tc) +{ + switch (tc) { + case 0: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_0, port); + case 1: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_1, port); + case 2: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_2, port); + case 3: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_3, port); + case 4: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_4, port); + case 5: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_5, port); + case 6: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_6, port); + case 7: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_7, port); + default: + return 0; + } +} + /* Update QSYS_PORT_MAX_SDU to make sure the static guard bands added by the * switch (see the ALWAYS_GUARD_BAND_SCH_Q comment) are correct at all MTU * values (the default value is 1518). Also, for traffic class windows smaller @@ -1613,6 +1672,8 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) vsc9959_tas_min_gate_lengths(ocelot_port->taprio, min_gate_len); + mutex_lock(&ocelot->fwd_domain_lock); + for (tc = 0; tc < OCELOT_NUM_TC; tc++) { u64 remaining_gate_len_ps; u32 max_sdu; @@ -1664,47 +1725,14 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) max_sdu); } - /* ocelot_write_rix is a macro that concatenates - * QSYS_MAXSDU_CFG_* with _RSZ, so we need to spell out - * the writes to each traffic class - */ - switch (tc) { - case 0: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_0, - port); - break; - case 1: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_1, - port); - break; - case 2: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_2, - port); - break; - case 3: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_3, - port); - break; - case 4: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_4, - port); - break; - case 5: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_5, - port); - break; - case 6: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_6, - port); - break; - case 7: - ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_7, - port); - break; - } + vsc9959_port_qmaxsdu_set(ocelot, port, tc, max_sdu); } ocelot_write_rix(ocelot, maxlen, QSYS_PORT_MAX_SDU, port); + + ocelot->ops->cut_through_fwd(ocelot); + + mutex_unlock(&ocelot->fwd_domain_lock); } static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, @@ -2797,7 +2825,7 @@ static void vsc9959_cut_through_fwd(struct ocelot *ocelot) { struct felix *felix = ocelot_to_felix(ocelot); struct dsa_switch *ds = felix->ds; - int port, other_port; + int tc, port, other_port; lockdep_assert_held(&ocelot->fwd_domain_lock); @@ -2841,19 +2869,27 @@ static void vsc9959_cut_through_fwd(struct ocelot *ocelot) min_speed = other_ocelot_port->speed; } - /* Enable cut-through forwarding for all traffic classes. */ - if (ocelot_port->speed == min_speed) + /* Enable cut-through forwarding for all traffic classes that + * don't have oversized dropping enabled, since this check is + * bypassed in cut-through mode. + */ + if (ocelot_port->speed == min_speed) { val = GENMASK(7, 0); + for (tc = 0; tc < OCELOT_NUM_TC; tc++) + if (vsc9959_port_qmaxsdu_get(ocelot, port, tc)) + val &= ~BIT(tc); + } + set: tmp = ocelot_read_rix(ocelot, ANA_CUT_THRU_CFG, port); if (tmp == val) continue; dev_dbg(ocelot->dev, - "port %d fwd mask 0x%lx speed %d min_speed %d, %s cut-through forwarding\n", + "port %d fwd mask 0x%lx speed %d min_speed %d, %s cut-through forwarding on TC mask 0x%x\n", port, mask, ocelot_port->speed, min_speed, - val ? "enabling" : "disabling"); + val ? "enabling" : "disabling", val); ocelot_write_rix(ocelot, val, ANA_CUT_THRU_CFG, port); } -- cgit From a4bb481aeb9d84cb53112a478e6db4705b794c34 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 5 Sep 2022 20:01:25 +0300 Subject: net: dsa: felix: access QSYS_TAG_CONFIG under tas_lock in vsc9959_sched_speed_set The read-modify-write of QSYS_TAG_CONFIG from vsc9959_sched_speed_set() runs unlocked with respect to the other functions that access it, which are vsc9959_tas_guard_bands_update(), vsc9959_qos_port_tas_set() and vsc9959_tas_clock_adjust(). All the others are under ocelot->tas_lock, so move the vsc9959_sched_speed_set() access under that lock as well, to resolve the concurrency. Fixes: 55a515b1f5a9 ("net: dsa: felix: drop oversized frames with tc-taprio instead of hanging the port") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/dsa') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index ad7c795d6612..f8f19a85744c 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1759,13 +1759,13 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, break; } + mutex_lock(&ocelot->tas_lock); + ocelot_rmw_rix(ocelot, QSYS_TAG_CONFIG_LINK_SPEED(tas_speed), QSYS_TAG_CONFIG_LINK_SPEED_M, QSYS_TAG_CONFIG, port); - mutex_lock(&ocelot->tas_lock); - if (ocelot_port->taprio) vsc9959_tas_guard_bands_update(ocelot, port); -- cgit