From e89db675171a7a12f19b6ec0089a9cc62807cdf1 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Tue, 29 Oct 2019 16:34:08 +0200 Subject: [PATCH] sdk_dpaa: ls1043a errata: update and optimize the restrictions An skb is in no danger of triggering the errata under the following conditions: - the paged data doesn't cross a 4K page boundary OR the linear data is aligned to 256 bytes when crossing a 4K page boundary - the linear and the paged data are 16 byte aligned - the paged data is a multiple of 16 bytes in size Optimize the detection for each skb that might trigger the errata. Parse the skb twice, at most, and realign it only once. Signed-off-by: Camelia Groza --- drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h | 2 +- .../net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c | 147 +++++++++++++++------ 2 files changed, 111 insertions(+), 38 deletions(-) --- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h @@ -662,7 +662,7 @@ static inline void _dpa_bp_free_pf(void #ifndef CONFIG_PPC extern bool dpaa_errata_a010022; /* SoC affected by A010022 errata */ #define NONREC_MARK 0x01 -#define HAS_DMA_ISSUE(start, size) \ +#define CROSS_4K(start, size) \ (((uintptr_t)(start) + (size)) > \ (((uintptr_t)(start) + 0x1000) & ~0xFFF)) /* The headroom needs to accommodate our private data (64 bytes) but --- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c +++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c @@ -771,32 +771,73 @@ int __hot skb_to_contig_fd(struct dpa_pr EXPORT_SYMBOL(skb_to_contig_fd); #ifndef CONFIG_PPC -/* Verify the conditions that trigger the A010022 errata: data unaligned to - * 16 bytes, 4K memory address crossings and S/G fragments. +/* Verify the conditions that trigger the A010022 errata: + * - 4K memory address boundary crossings when the data/SG fragments aren't + * aligned to 256 bytes + * - data and SG fragments that aren't aligned to 16 bytes + * - SG fragments that aren't mod 16 bytes in size (except for the last + * fragment) */ static bool a010022_check_skb(struct sk_buff *skb, struct dpa_priv_s *priv) { - /* Check if the headroom is aligned */ - if (((uintptr_t)skb->data - priv->tx_headroom) % - priv->buf_layout[TX].data_align != 0) - return true; + skb_frag_t *frag; + int i, nr_frags; - /* Check for paged data in the skb. We do not support S/G fragments */ - if (skb_is_nonlinear(skb)) + nr_frags = skb_shinfo(skb)->nr_frags; + + /* Check if the linear data is 16 byte aligned */ + if ((uintptr_t)skb->data % 16) return true; - /* Check if the headroom crosses a boundary */ - if (HAS_DMA_ISSUE(skb->head, skb_headroom(skb))) + /* Check if the needed headroom crosses a 4K address boundary without + * being 256 byte aligned + */ + if (CROSS_4K(skb->data - priv->tx_headroom, priv->tx_headroom) && + (((uintptr_t)skb->data - priv->tx_headroom) % 256)) return true; - /* Check if the non-paged data crosses a boundary */ - if (HAS_DMA_ISSUE(skb->data, skb_headlen(skb))) + /* Check if the linear data crosses a 4K address boundary without + * being 256 byte aligned + */ + if (CROSS_4K(skb->data, skb_headlen(skb)) && + ((uintptr_t)skb->data % 256)) return true; - /* Check if the entire linear skb crosses a boundary */ - if (HAS_DMA_ISSUE(skb->head, skb_end_offset(skb))) + /* When using Scatter/Gather, the linear data becomes the first + * fragment in the list and must follow the same restrictions as the + * other fragments. + * + * Check if the linear data is mod 16 bytes in size. + */ + if (nr_frags && (skb_headlen(skb) % 16)) return true; + /* Check the SG fragments. They must follow the same rules as the + * linear data with and additional restriction: they must be multiple + * of 16 bytes in size to account for the hardware carryover effect. + */ + for (i = 0; i < nr_frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + + /* Check if the fragment is a multiple of 16 bytes in size. + * The last fragment is exempt from this restriction. + */ + if ((i != (nr_frags - 1)) && (skb_frag_size(frag) % 16)) + return true; + + /* Check if the fragment is 16 byte aligned */ + if (skb_frag_off(frag) % 16) + return true; + + /* Check if the fragment crosses a 4K address boundary. Since + * the alignment of previous fragments can influence the + * current fragment, checking for the 256 byte alignment + * isn't relevant. + */ + if (CROSS_4K(skb_frag_off(frag), skb_frag_size(frag))) + return true; + } + return false; } @@ -1062,10 +1103,24 @@ int __hot dpa_tx_extended(struct sk_buff struct dpa_percpu_priv_s *percpu_priv; struct rtnl_link_stats64 *percpu_stats; int err = 0; - bool nonlinear; + bool nonlinear, skb_changed, skb_need_wa; int *countptr, offset = 0; struct sk_buff *nskb; + /* Flags to help optimize the A010022 errata restriction checks. + * + * First flag marks if the skb changed between the first A010022 check + * and the moment it's converted to an FD. + * + * The second flag marks if the skb needs to be realigned in order to + * avoid the errata. + * + * The flags should have minimal impact on platforms not impacted by + * the errata. + */ + skb_changed = false; + skb_need_wa = false; + priv = netdev_priv(net_dev); /* Non-migratable context, safe to use raw_cpu_ptr */ percpu_priv = raw_cpu_ptr(priv->percpu_priv); @@ -1075,13 +1130,8 @@ int __hot dpa_tx_extended(struct sk_buff clear_fd(&fd); #ifndef CONFIG_PPC - if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv)) { - nskb = a010022_realign_skb(skb, priv); - if (!nskb) - goto skb_to_fd_failed; - dev_kfree_skb(skb); - skb = nskb; - } + if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv)) + skb_need_wa = true; #endif nonlinear = skb_is_nonlinear(skb); @@ -1102,8 +1152,8 @@ int __hot dpa_tx_extended(struct sk_buff * Btw, we're using the first sgt entry to store the linear part of * the skb, so we're one extra frag short. */ - if (nonlinear && - likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) { + if (nonlinear && !skb_need_wa && + likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) { /* Just create a S/G fd based on the skb */ err = skb_to_sg_fd(priv, skb, &fd); percpu_priv->tx_frag_skbuffs++; @@ -1128,39 +1178,62 @@ int __hot dpa_tx_extended(struct sk_buff dev_kfree_skb(skb); skb = skb_new; + skb_changed = true; } /* We're going to store the skb backpointer at the beginning * of the data buffer, so we need a privately owned skb + * + * Under the A010022 errata, we are going to have a privately + * owned skb after realigning the current one, so no point in + * copying it here in that case. */ /* Code borrowed from skb_unshare(). */ - if (skb_cloned(skb)) { + if (skb_cloned(skb) && !skb_need_wa) { nskb = skb_copy(skb, GFP_ATOMIC); kfree_skb(skb); skb = nskb; -#ifndef CONFIG_PPC - if (unlikely(dpaa_errata_a010022) && - a010022_check_skb(skb, priv)) { - nskb = a010022_realign_skb(skb, priv); - if (!nskb) - goto skb_to_fd_failed; - dev_kfree_skb(skb); - skb = nskb; - } -#endif + skb_changed = true; + /* skb_copy() has now linearized the skbuff. */ - } else if (unlikely(nonlinear)) { + } else if (unlikely(nonlinear) && !skb_need_wa) { /* We are here because the egress skb contains * more fragments than we support. In this case, * we have no choice but to linearize it ourselves. */ - err = __skb_linearize(skb); +#ifndef CONFIG_PPC + /* No point in linearizing the skb now if we are going + * to realign and linearize it again further down due + * to the A010022 errata + */ + if (unlikely(dpaa_errata_a010022)) + skb_need_wa = true; + else +#endif + err = __skb_linearize(skb); } if (unlikely(!skb || err < 0)) /* Common out-of-memory error path */ goto enomem; +#ifndef CONFIG_PPC + /* Verify the skb a second time if it has been updated since + * the previous check + */ + if (unlikely(dpaa_errata_a010022) && skb_changed && + a010022_check_skb(skb, priv)) + skb_need_wa = true; + + if (unlikely(dpaa_errata_a010022) && skb_need_wa) { + nskb = a010022_realign_skb(skb, priv); + if (!nskb) + goto skb_to_fd_failed; + dev_kfree_skb(skb); + skb = nskb; + } +#endif + err = skb_to_contig_fd(priv, skb, &fd, countptr, &offset); } if (unlikely(err < 0))