mediatek/ramips: unify ethernet driver fixes and add performance optimizations

Increase DMA burst size and tx ring size and optimize tx processing

Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
Felix Fietkau 2020-08-27 06:39:48 +02:00
parent 6541028598
commit b5d425af23
16 changed files with 574 additions and 113 deletions

View File

@ -1,6 +1,6 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 8 Jun 2020 17:01:12 +0200
Subject: [PATCH] net: ethernet: mediatek: use napi_consume_skb
Subject: [PATCH] net: ethernet: mtk_eth_soc: use napi_consume_skb
Should improve performance, since it can use bulk free

View File

@ -1,6 +1,6 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 8 Jun 2020 17:02:39 +0200
Subject: [PATCH] net: ethernet: mediatek: significantly reduce mdio bus
Subject: [PATCH] net: ethernet: mtk_eth_soc: significantly reduce mdio bus
access latency
usleep_range often ends up sleeping much longer than the 10-20us provided

View File

@ -1,6 +1,16 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 26 Aug 2020 16:52:12 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: fix rx vlan offload
The VLAN ID in the rx descriptor is only valid if the RX_DMA_VID bit is set
Fixes frames wrongly marked with VLAN tags
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1315,7 +1315,7 @@ static int mtk_poll_rx(struct napi_struc
@@ -1320,7 +1320,7 @@ static int mtk_poll_rx(struct napi_struc
skb->protocol = eth_type_trans(skb, netdev);
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&

View File

@ -1,6 +1,18 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 26 Aug 2020 16:55:54 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: fix unnecessary tx queue
stops
When running short on descriptors, only stop the queue for the netdev that tx
was attempted for. By the time the something tries to send on the other netdev,
the ring might have some more room already
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1121,17 +1121,6 @@ static void mtk_wake_queue(struct mtk_et
@@ -1126,17 +1126,6 @@ static void mtk_wake_queue(struct mtk_et
}
}
@ -18,7 +30,7 @@
static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mtk_mac *mac = netdev_priv(dev);
@@ -1152,7 +1141,7 @@ static int mtk_start_xmit(struct sk_buff
@@ -1157,7 +1146,7 @@ static int mtk_start_xmit(struct sk_buff
tx_num = mtk_cal_txd_req(skb);
if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
@ -27,7 +39,7 @@
netif_err(eth, tx_queued, dev,
"Tx Ring full when queue awake!\n");
spin_unlock(&eth->page_lock);
@@ -1178,7 +1167,7 @@ static int mtk_start_xmit(struct sk_buff
@@ -1183,7 +1172,7 @@ static int mtk_start_xmit(struct sk_buff
goto drop;
if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))

View File

@ -0,0 +1,32 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 26 Aug 2020 16:58:55 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: use larger burst size for
qdma tx
Improves tx performance
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2187,7 +2187,7 @@ static int mtk_start_dma(struct mtk_eth
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
mtk_w32(eth,
MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
- MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
+ MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
MTK_RX_BT_32DWORDS,
MTK_QDMA_GLO_CFG);
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -195,7 +195,7 @@
#define MTK_RX_BT_32DWORDS (3 << 11)
#define MTK_NDP_CO_PRO BIT(10)
#define MTK_TX_WB_DDONE BIT(6)
-#define MTK_DMA_SIZE_16DWORDS (2 << 4)
+#define MTK_TX_BT_32DWORDS (3 << 4)
#define MTK_RX_DMA_BUSY BIT(3)
#define MTK_TX_DMA_BUSY BIT(1)
#define MTK_RX_DMA_EN BIT(2)

View File

@ -0,0 +1,21 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 26 Aug 2020 16:59:41 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: increase DMA ring sizes
256 descriptors is not enough for multi-gigabit traffic under load on MT7622.
Bump it to 512 to improve performance
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -19,7 +19,7 @@
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
#define MTK_TX_DMA_BUF_LEN 0x3fff
-#define MTK_DMA_SIZE 256
+#define MTK_DMA_SIZE 512
#define MTK_NAPI_WEIGHT 64
#define MTK_MAC_COUNT 2
#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)

View File

@ -0,0 +1,281 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 26 Aug 2020 17:02:30 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: implement dynamic interrupt
moderation
Reduces the number of interrupts under load
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -10,6 +10,7 @@ if NET_VENDOR_MEDIATEK
config NET_MEDIATEK_SOC
tristate "MediaTek SoC Gigabit Ethernet support"
select PHYLINK
+ select DIMLIB
---help---
This driver supports the gigabit ethernet MACs in the
MediaTek SoC family.
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1228,12 +1228,13 @@ static void mtk_update_rx_cpu_idx(struct
static int mtk_poll_rx(struct napi_struct *napi, int budget,
struct mtk_eth *eth)
{
+ struct dim_sample dim_sample = {};
struct mtk_rx_ring *ring;
int idx;
struct sk_buff *skb;
u8 *data, *new_data;
struct mtk_rx_dma *rxd, trxd;
- int done = 0;
+ int done = 0, bytes = 0;
while (done < budget) {
struct net_device *netdev;
@@ -1307,6 +1308,7 @@ static int mtk_poll_rx(struct napi_struc
else
skb_checksum_none_assert(skb);
skb->protocol = eth_type_trans(skb, netdev);
+ bytes += pktlen;
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
(trxd.rxd2 & RX_DMA_VTAG))
@@ -1338,6 +1340,12 @@ rx_done:
mtk_update_rx_cpu_idx(eth);
}
+ eth->rx_packets += done;
+ eth->rx_bytes += bytes;
+ dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes,
+ &dim_sample);
+ net_dim(&eth->rx_dim, dim_sample);
+
return done;
}
@@ -1430,6 +1438,7 @@ static int mtk_poll_tx_pdma(struct mtk_e
static int mtk_poll_tx(struct mtk_eth *eth, int budget)
{
struct mtk_tx_ring *ring = &eth->tx_ring;
+ struct dim_sample dim_sample = {};
unsigned int done[MTK_MAX_DEVS];
unsigned int bytes[MTK_MAX_DEVS];
int total = 0, i;
@@ -1447,8 +1456,14 @@ static int mtk_poll_tx(struct mtk_eth *e
continue;
netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
total += done[i];
+ eth->tx_packets += done[i];
+ eth->tx_bytes += bytes[i];
}
+ dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
+ &dim_sample);
+ net_dim(&eth->tx_dim, dim_sample);
+
if (mtk_queue_stopped(eth) &&
(atomic_read(&ring->free_count) > ring->thresh))
mtk_wake_queue(eth);
@@ -2123,6 +2138,7 @@ static irqreturn_t mtk_handle_irq_rx(int
{
struct mtk_eth *eth = _eth;
+ eth->rx_events++;
if (likely(napi_schedule_prep(&eth->rx_napi))) {
__napi_schedule(&eth->rx_napi);
mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
@@ -2135,6 +2151,7 @@ static irqreturn_t mtk_handle_irq_tx(int
{
struct mtk_eth *eth = _eth;
+ eth->tx_events++;
if (likely(napi_schedule_prep(&eth->tx_napi))) {
__napi_schedule(&eth->tx_napi);
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
@@ -2282,6 +2299,9 @@ static int mtk_stop(struct net_device *d
napi_disable(&eth->tx_napi);
napi_disable(&eth->rx_napi);
+ cancel_work_sync(&eth->rx_dim.work);
+ cancel_work_sync(&eth->tx_dim.work);
+
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
@@ -2331,6 +2351,64 @@ err_disable_clks:
return ret;
}
+static void mtk_dim_rx(struct work_struct *work)
+{
+ struct dim *dim = container_of(work, struct dim, work);
+ struct mtk_eth *eth = container_of(dim, struct mtk_eth, rx_dim);
+ struct dim_cq_moder cur_profile;
+ u32 val, cur;
+
+ cur_profile = net_dim_get_rx_moderation(eth->rx_dim.mode,
+ dim->profile_ix);
+ spin_lock_bh(&eth->dim_lock);
+
+ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
+ val &= MTK_PDMA_DELAY_TX_MASK;
+ val |= MTK_PDMA_DELAY_RX_EN;
+
+ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
+ val |= cur << MTK_PDMA_DELAY_RX_PTIME_SHIFT;
+
+ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
+ val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT;
+
+ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
+ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+
+ spin_unlock_bh(&eth->dim_lock);
+
+ dim->state = DIM_START_MEASURE;
+}
+
+static void mtk_dim_tx(struct work_struct *work)
+{
+ struct dim *dim = container_of(work, struct dim, work);
+ struct mtk_eth *eth = container_of(dim, struct mtk_eth, tx_dim);
+ struct dim_cq_moder cur_profile;
+ u32 val, cur;
+
+ cur_profile = net_dim_get_tx_moderation(eth->tx_dim.mode,
+ dim->profile_ix);
+ spin_lock_bh(&eth->dim_lock);
+
+ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
+ val &= MTK_PDMA_DELAY_RX_MASK;
+ val |= MTK_PDMA_DELAY_TX_EN;
+
+ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
+ val |= cur << MTK_PDMA_DELAY_TX_PTIME_SHIFT;
+
+ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
+ val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT;
+
+ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
+ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+
+ spin_unlock_bh(&eth->dim_lock);
+
+ dim->state = DIM_START_MEASURE;
+}
+
static int mtk_hw_init(struct mtk_eth *eth)
{
int i, val, ret;
@@ -2352,9 +2430,6 @@ static int mtk_hw_init(struct mtk_eth *e
goto err_disable_pm;
}
- /* enable interrupt delay for RX */
- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
-
/* disable delay and normal interrupt */
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
@@ -2393,11 +2468,10 @@ static int mtk_hw_init(struct mtk_eth *e
/* Enable RX VLan Offloading */
mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
- /* enable interrupt delay for RX */
- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
+ mtk_dim_rx(&eth->rx_dim.work);
+ mtk_dim_tx(&eth->tx_dim.work);
/* disable delay and normal interrupt */
- mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
@@ -2916,6 +2990,13 @@ static int mtk_probe(struct platform_dev
spin_lock_init(&eth->page_lock);
spin_lock_init(&eth->tx_irq_lock);
spin_lock_init(&eth->rx_irq_lock);
+ spin_lock_init(&eth->dim_lock);
+
+ eth->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ INIT_WORK(&eth->rx_dim.work, mtk_dim_rx);
+
+ eth->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ INIT_WORK(&eth->tx_dim.work, mtk_dim_tx);
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/refcount.h>
#include <linux/phylink.h>
+#include <linux/dim.h>
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
@@ -129,13 +130,18 @@
/* PDMA Delay Interrupt Register */
#define MTK_PDMA_DELAY_INT 0xa0c
+#define MTK_PDMA_DELAY_RX_MASK GENMASK(15, 0)
#define MTK_PDMA_DELAY_RX_EN BIT(15)
-#define MTK_PDMA_DELAY_RX_PINT 4
#define MTK_PDMA_DELAY_RX_PINT_SHIFT 8
-#define MTK_PDMA_DELAY_RX_PTIME 4
-#define MTK_PDMA_DELAY_RX_DELAY \
- (MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \
- (MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT))
+#define MTK_PDMA_DELAY_RX_PTIME_SHIFT 0
+
+#define MTK_PDMA_DELAY_TX_MASK GENMASK(31, 16)
+#define MTK_PDMA_DELAY_TX_EN BIT(31)
+#define MTK_PDMA_DELAY_TX_PINT_SHIFT 24
+#define MTK_PDMA_DELAY_TX_PTIME_SHIFT 16
+
+#define MTK_PDMA_DELAY_PINT_MASK 0x7f
+#define MTK_PDMA_DELAY_PTIME_MASK 0xff
/* PDMA Interrupt Status Register */
#define MTK_PDMA_INT_STATUS 0xa20
@@ -217,6 +223,7 @@
/* QDMA Interrupt Status Register */
#define MTK_QDMA_INT_STATUS 0x1A18
#define MTK_RX_DONE_DLY BIT(30)
+#define MTK_TX_DONE_DLY BIT(28)
#define MTK_RX_DONE_INT3 BIT(19)
#define MTK_RX_DONE_INT2 BIT(18)
#define MTK_RX_DONE_INT1 BIT(17)
@@ -226,8 +233,7 @@
#define MTK_TX_DONE_INT1 BIT(1)
#define MTK_TX_DONE_INT0 BIT(0)
#define MTK_RX_DONE_INT MTK_RX_DONE_DLY
-#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
- MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
+#define MTK_TX_DONE_INT MTK_TX_DONE_DLY
/* QDMA Interrupt grouping registers */
#define MTK_QDMA_INT_GRP1 0x1a20
@@ -890,6 +896,18 @@ struct mtk_eth {
const struct mtk_soc_data *soc;
+ spinlock_t dim_lock;
+
+ u32 rx_events;
+ u32 rx_packets;
+ u32 rx_bytes;
+ struct dim rx_dim;
+
+ u32 tx_events;
+ u32 tx_packets;
+ u32 tx_bytes;
+ struct dim tx_dim;
+
u32 tx_int_mask_reg;
u32 tx_int_status_reg;
u32 rx_dma_l4_valid;

View File

@ -0,0 +1,34 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 26 Aug 2020 17:48:14 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: drop descriptor cpu-own bit check
in qdma tx cleanup
mtk_poll_tx_qdma already checks the MTK_QTX_DRX_PTR register, which points
at the last completed descriptor.
To slightly improve performance, also remove the register bit which forces
the hardware to write back this bit earlier.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1368,9 +1368,6 @@ static int mtk_poll_tx_qdma(struct mtk_e
int mac = 0;
desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
- if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
- break;
-
tx_buf = mtk_desc_to_tx_buf(ring, desc);
if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
mac = 1;
@@ -2203,7 +2200,7 @@ static int mtk_start_dma(struct mtk_eth
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
mtk_w32(eth,
- MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+ MTK_TX_DMA_EN |
MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
MTK_RX_BT_32DWORDS,

View File

@ -0,0 +1,67 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 27 Aug 2020 06:32:03 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: cache hardware pointer of last
freed tx descriptor
The value is only updated by the CPU, so it is cheaper to access from the ring
data structure than from a hardware register
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1358,7 +1358,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
struct mtk_tx_buf *tx_buf;
u32 cpu, dma;
- cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
+ cpu = ring->last_free_ptr;
dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
desc = mtk_qdma_phys_to_virt(ring, cpu);
@@ -1389,6 +1389,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
cpu = next_cpu;
}
+ ring->last_free_ptr = cpu;
mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
return budget;
@@ -1589,6 +1590,7 @@ static int mtk_tx_alloc(struct mtk_eth *
atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
ring->next_free = &ring->dma[0];
ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
+ ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz));
ring->thresh = MAX_SKB_FRAGS;
/* make sure that all changes to the dma ring are flushed before we
@@ -1602,9 +1604,7 @@ static int mtk_tx_alloc(struct mtk_eth *
mtk_w32(eth,
ring->phys + ((MTK_DMA_SIZE - 1) * sz),
MTK_QTX_CRX_PTR);
- mtk_w32(eth,
- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
- MTK_QTX_DRX_PTR);
+ mtk_w32(eth, ring->last_free_ptr, MTK_QTX_DRX_PTR);
mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
MTK_QTX_CFG(0));
} else {
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -622,6 +622,7 @@ struct mtk_tx_buf {
* @phys: The physical addr of tx_buf
* @next_free: Pointer to the next free descriptor
* @last_free: Pointer to the last free descriptor
+ * @last_free_ptr: Hardware pointer value of the last free descriptor
* @thresh: The threshold of minimum amount of free descriptors
* @free_count: QDMA uses a linked list. Track how many free descriptors
* are present
@@ -632,6 +633,7 @@ struct mtk_tx_ring {
dma_addr_t phys;
struct mtk_tx_dma *next_free;
struct mtk_tx_dma *last_free;
+ u32 last_free_ptr;
u16 thresh;
atomic_t free_count;
int dma_size;

View File

@ -0,0 +1,44 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 27 Aug 2020 09:24:25 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: only read the full rx
descriptor if DMA is done
Uncached memory access is expensive, and there is no need to access all
descriptor words if we can't process them anyway
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -772,13 +772,18 @@ static inline int mtk_max_buf_size(int f
return buf_size;
}
-static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
+static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd,
struct mtk_rx_dma *dma_rxd)
{
- rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
+ if (!(rxd->rxd2 & RX_DMA_DONE))
+ return false;
+
+ rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
+
+ return true;
}
/* the qdma core needs scratch memory to be setup */
@@ -1250,8 +1255,7 @@ static int mtk_poll_rx(struct napi_struc
rxd = &ring->dma[idx];
data = ring->data[idx];
- mtk_rx_get_desc(&trxd, rxd);
- if (!(trxd.rxd2 & RX_DMA_DONE))
+ if (!mtk_rx_get_desc(&trxd, rxd))
break;
/* find out which mac the packet come from. values start at 1 */

View File

@ -0,0 +1,44 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 27 Aug 2020 09:44:43 +0200
Subject: [PATCH] net: ethernet: mtk_eth_soc: unmap rx data before calling
build_skb
Since build_skb accesses the data area (for initializing shinfo), dma unmap
needs to happen before that call
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1293,17 +1293,18 @@ static int mtk_poll_rx(struct napi_struc
goto release_desc;
}
+ dma_unmap_single(eth->dev, trxd.rxd1,
+ ring->buf_size, DMA_FROM_DEVICE);
+
/* receive data */
skb = build_skb(data, ring->frag_size);
if (unlikely(!skb)) {
- skb_free_frag(new_data);
+ skb_free_frag(data);
netdev->stats.rx_dropped++;
- goto release_desc;
+ goto skip_rx;
}
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- dma_unmap_single(eth->dev, trxd.rxd1,
- ring->buf_size, DMA_FROM_DEVICE);
pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
skb->dev = netdev;
skb_put(skb, pktlen);
@@ -1321,6 +1322,7 @@ static int mtk_poll_rx(struct napi_struc
skb_record_rx_queue(skb, 0);
napi_gro_receive(napi, skb);
+skip_rx:
ring->data[idx] = new_data;
rxd->rxd1 = (unsigned int)dma_addr;

View File

@ -1,6 +1,6 @@
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -14,4 +14,8 @@ config NET_MEDIATEK_SOC
@@ -15,4 +15,8 @@ config NET_MEDIATEK_SOC
This driver supports the gigabit ethernet MACs in the
MediaTek SoC family.
@ -147,7 +147,7 @@
#include "mtk_eth_soc.h"
@@ -1307,8 +1309,16 @@ static int mtk_poll_rx(struct napi_struc
@@ -1319,8 +1321,16 @@ static int mtk_poll_rx(struct napi_struc
(trxd.rxd2 & RX_DMA_VTAG))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
RX_DMA_VID(trxd.rxd3));
@ -164,9 +164,9 @@
+ }
+#endif
skip_rx:
ring->data[idx] = new_data;
rxd->rxd1 = (unsigned int)dma_addr;
@@ -2225,6 +2235,9 @@ static int mtk_open(struct net_device *d
@@ -2250,6 +2260,9 @@ static int mtk_open(struct net_device *d
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
refcount_set(&eth->dma_refcnt, 1);
@ -176,7 +176,7 @@
}
else
refcount_inc(&eth->dma_refcnt);
@@ -2283,6 +2296,9 @@ static int mtk_stop(struct net_device *d
@@ -2311,6 +2324,9 @@ static int mtk_stop(struct net_device *d
mtk_dma_free(eth);
@ -186,7 +186,7 @@
return 0;
}
@@ -2742,6 +2758,27 @@ static int mtk_set_rxnfc(struct net_devi
@@ -2824,6 +2840,27 @@ static int mtk_set_rxnfc(struct net_devi
return ret;
}
@ -214,7 +214,7 @@
static const struct ethtool_ops mtk_ethtool_ops = {
.get_link_ksettings = mtk_get_link_ksettings,
.set_link_ksettings = mtk_set_link_ksettings,
@@ -2773,6 +2810,9 @@ static const struct net_device_ops mtk_n
@@ -2855,6 +2892,9 @@ static const struct net_device_ops mtk_n
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = mtk_poll_controller,
#endif
@ -224,7 +224,7 @@
};
static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
@@ -3108,6 +3148,7 @@ static const struct mtk_soc_data mt7622_
@@ -3197,6 +3237,7 @@ static const struct mtk_soc_data mt7622_
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7622_CLKS_BITMAP,
.required_pctl = false,
@ -234,7 +234,7 @@
static const struct mtk_soc_data mt7623_data = {
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -779,6 +779,13 @@ enum mkt_eth_capabilities {
@@ -787,6 +787,13 @@ enum mkt_eth_capabilities {
MTK_MUX_U3_GMAC2_TO_QPHY | \
MTK_MUX_GMAC12_TO_GEPHY_SGMII | MTK_QDMA)
@ -248,7 +248,7 @@
/* struct mtk_eth_data - This is the structure holding all differences
* among various plaforms
* @ana_rgc3: The offset for register ANA_RGC3 related to
@@ -796,6 +803,7 @@ struct mtk_soc_data {
@@ -804,6 +811,7 @@ struct mtk_soc_data {
u32 required_clks;
bool required_pctl;
netdev_features_t hw_features;
@ -256,7 +256,7 @@
};
/* currently no SoC has more than 2 macs */
@@ -821,6 +829,23 @@ struct mtk_sgmii {
@@ -829,6 +837,23 @@ struct mtk_sgmii {
u32 ana_rgc3;
};
@ -280,7 +280,7 @@
/* struct mtk_eth - This is the main datasructure for holding the state
* of the driver
* @dev: The device pointer
@@ -894,6 +919,16 @@ struct mtk_eth {
@@ -914,6 +939,16 @@ struct mtk_eth {
u32 tx_int_status_reg;
u32 rx_dma_l4_valid;
int ip_align;
@ -297,7 +297,7 @@
};
/* struct mtk_mac - the structure that holds the info about the MACs of the
@@ -926,6 +961,7 @@ void mtk_stats_update_mac(struct mtk_mac
@@ -946,6 +981,7 @@ void mtk_stats_update_mac(struct mtk_mac
void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg);
u32 mtk_r32(struct mtk_eth *eth, unsigned reg);
@ -305,7 +305,7 @@
int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *np,
u32 ana_rgc3);
@@ -938,4 +974,13 @@ int mtk_gmac_sgmii_path_setup(struct mtk
@@ -958,4 +994,13 @@ int mtk_gmac_sgmii_path_setup(struct mtk
int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);

View File

@ -1,6 +1,6 @@
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2210,6 +2210,31 @@ static int mtk_start_dma(struct mtk_eth
@@ -2235,6 +2235,31 @@ static int mtk_start_dma(struct mtk_eth
return 0;
}
@ -32,7 +32,7 @@
static int mtk_open(struct net_device *dev)
{
struct mtk_mac *mac = netdev_priv(dev);
@@ -2230,6 +2255,8 @@ static int mtk_open(struct net_device *d
@@ -2255,6 +2280,8 @@ static int mtk_open(struct net_device *d
if (err)
return err;
@ -41,7 +41,7 @@
napi_enable(&eth->tx_napi);
napi_enable(&eth->rx_napi);
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
@@ -2285,6 +2312,8 @@ static int mtk_stop(struct net_device *d
@@ -2310,6 +2337,8 @@ static int mtk_stop(struct net_device *d
if (!refcount_dec_and_test(&eth->dma_refcnt))
return 0;
@ -50,8 +50,8 @@
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
napi_disable(&eth->tx_napi);
@@ -2411,8 +2440,6 @@ static int mtk_hw_init(struct mtk_eth *e
mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
@@ -2493,8 +2522,6 @@ static int mtk_hw_init(struct mtk_eth *e
/* disable delay and normal interrupt */
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
- mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
@ -59,7 +59,7 @@
/* FE int grouping */
mtk_w32(eth, MTK_TX_DONE_INT, MTK_PDMA_INT_GRP1);
@@ -2421,19 +2448,6 @@ static int mtk_hw_init(struct mtk_eth *e
@@ -2503,19 +2530,6 @@ static int mtk_hw_init(struct mtk_eth *e
mtk_w32(eth, MTK_RX_DONE_INT, MTK_QDMA_INT_GRP2);
mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
@ -81,7 +81,7 @@
err_disable_pm:
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -84,6 +84,8 @@
@@ -85,6 +85,8 @@
#define MTK_GDMA_ICS_EN BIT(22)
#define MTK_GDMA_TCS_EN BIT(21)
#define MTK_GDMA_UCS_EN BIT(20)

View File

@ -1,24 +0,0 @@
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1364,10 +1364,11 @@ static int mtk_poll_tx_qdma(struct mtk_e
u32 next_cpu = desc->txd2;
int mac = 0;
- desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
break;
+ desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
+
tx_buf = mtk_desc_to_tx_buf(ring, desc);
if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
mac = 1;
@@ -2191,7 +2192,7 @@ static int mtk_start_dma(struct mtk_eth
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
mtk_w32(eth,
- MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+ MTK_TX_DMA_EN |
MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
MTK_RX_BT_32DWORDS,

View File

@ -1,60 +0,0 @@
diff -urN a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c 2020-08-25 14:57:28.403764254 +0800
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c 2020-08-25 14:57:39.803438475 +0800
@@ -2193,7 +2193,7 @@
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
mtk_w32(eth,
MTK_TX_DMA_EN |
- MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
+ MTK_DMA_SIZE_32DWORDS | MTK_NDP_CO_PRO |
MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
MTK_RX_BT_32DWORDS,
MTK_QDMA_GLO_CFG);
@@ -2434,11 +2434,10 @@
/* Enable RX VLan Offloading */
mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
- /* enable interrupt delay for RX */
- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
+ /* enable interrupt delay for RX/TX */
+ mtk_w32(eth, 0x8f0f8f0f, MTK_PDMA_DELAY_INT);
+ mtk_w32(eth, 0x8f0f8f0f, MTK_QDMA_DELAY_INT);
- /* disable delay and normal interrupt */
- mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
diff -urN a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h 2020-08-25 14:57:22.939920398 +0800
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h 2020-08-25 14:57:43.359336855 +0800
@@ -19,8 +19,8 @@
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
#define MTK_TX_DMA_BUF_LEN 0x3fff
-#define MTK_DMA_SIZE 256
-#define MTK_NAPI_WEIGHT 64
+#define MTK_DMA_SIZE 2048
+#define MTK_NAPI_WEIGHT 256
#define MTK_MAC_COUNT 2
#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
@@ -198,6 +198,7 @@
#define MTK_NDP_CO_PRO BIT(10)
#define MTK_TX_WB_DDONE BIT(6)
#define MTK_DMA_SIZE_16DWORDS (2 << 4)
+#define MTK_DMA_SIZE_32DWORDS (3 << 4)
#define MTK_RX_DMA_BUSY BIT(3)
#define MTK_TX_DMA_BUSY BIT(1)
#define MTK_RX_DMA_EN BIT(2)
@@ -228,8 +229,8 @@
#define MTK_TX_DONE_INT1 BIT(1)
#define MTK_TX_DONE_INT0 BIT(0)
#define MTK_RX_DONE_INT MTK_RX_DONE_DLY
-#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
- MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
+#define MTK_TX_DONE_DLY BIT(28)
+#define MTK_TX_DONE_INT MTK_TX_DONE_DLY
/* QDMA Interrupt grouping registers */
#define MTK_QDMA_INT_GRP1 0x1a20

View File

@ -14,7 +14,7 @@ Signed-off-by: René van Dorst <opensource@vdorst.com>
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2793,6 +2793,7 @@ static const struct net_device_ops mtk_n
@@ -2856,6 +2856,7 @@ static const struct net_device_ops mtk_n
static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
{
@ -22,7 +22,7 @@ Signed-off-by: René van Dorst <opensource@vdorst.com>
const __be32 *_id = of_get_property(np, "reg", NULL);
struct phylink *phylink;
int phy_mode, id, err;
@@ -2885,6 +2886,9 @@ static int mtk_add_mac(struct mtk_eth *e
@@ -2948,6 +2949,9 @@ static int mtk_add_mac(struct mtk_eth *e
eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;