Compare commits

...

5 Commits

Author SHA1 Message Date
Zoltan HERPAI 4e9f0e5be4 sunxi: update the 6.6 DTS_DIR hack
Update and simplify the 6.1 vs. 6.6 DTS_DIR hack until 6.1 support is gone.

Signed-off-by: Zoltan HERPAI <wigyori@uid0.hu>
2024-04-26 11:07:43 +02:00
Felix Fietkau 7ebcf2fb9c netifd: add flow steering mode to the packet steering script
This allows directing processing of locally received packets to the CPUs
of the tasks receiving them

Signed-off-by: Felix Fietkau <nbd@nbd.name>
2024-04-26 06:31:27 +02:00
Felix Fietkau c4d394c6cc netifd: add a packet steering mode matching the old script
This spreads packet processing across all cores

Signed-off-by: Felix Fietkau <nbd@nbd.name>
2024-04-26 06:31:27 +02:00
Felix Fietkau b5c53848c3 kernel: improve GRO performance
For packets not belonging to a local socket, use fraglist GRO instead of
regular GRO. This make segmenting packets very cheap and avoids the need for
selectively disabling GRO

Signed-off-by: Felix Fietkau <nbd@nbd.name>
2024-04-26 06:31:27 +02:00
Felix Fietkau 98834a4c3f kernel: backport flow offload pppoe fix
Signed-off-by: Felix Fietkau <nbd@nbd.name>
2024-04-26 06:31:27 +02:00
24 changed files with 1511 additions and 331 deletions

View File

@ -15,9 +15,11 @@ service_triggers() {
reload_service() {
packet_steering="$(uci get "network.@globals[0].packet_steering")"
steering_flows="$(uci get "network.@globals[0].steering_flows")"
[ "$steering_flows" -gt 0 ] && opts="-l $steering_flows"
if [ -e "/usr/libexec/platform/packet-steering.sh" ]; then
/usr/libexec/platform/packet-steering.sh "$packet_steering"
else
/usr/libexec/network/packet-steering.uc "$packet_steering"
/usr/libexec/network/packet-steering.uc $opts "$packet_steering"
fi
}

View File

@ -9,8 +9,11 @@ let eth_bias = 2.0;
let debug = 0, do_nothing = 0;
let disable;
let cpus;
let all_cpus;
let local_flows = 0;
for (let arg in ARGV) {
while (length(ARGV) > 0) {
let arg = shift(ARGV);
switch (arg) {
case "-d":
debug++;
@ -21,6 +24,12 @@ for (let arg in ARGV) {
case '0':
disable = true;
break;
case '2':
all_cpus = true;
break;
case '-l':
local_flows = +shift(ARGV);
break;
}
}
@ -46,9 +55,19 @@ function set_task_cpu(pid, cpu) {
system(`taskset -p -c ${cpu} ${pid}`);
}
function cpu_mask(cpu)
{
let mask;
if (cpu < 0)
mask = (1 << length(cpus)) - 1;
else
mask = (1 << int(cpu));
return sprintf("%x", mask);
}
function set_netdev_cpu(dev, cpu) {
let queues = glob(`/sys/class/net/${dev}/queues/rx-*/rps_cpus`);
let val = sprintf("%x", (1 << int(cpu)));
let val = cpu_mask(cpu);
if (disable)
val = 0;
for (let queue in queues) {
@ -57,6 +76,13 @@ function set_netdev_cpu(dev, cpu) {
if (!do_nothing)
writefile(queue, `${val}`);
}
queues = glob(`/sys/class/net/${dev}/queues/rx-*/rps_flow_cnt`);
for (let queue in queues) {
if (debug || do_nothing)
warn(`echo ${local_flows} > ${queue}\n`);
if (!do_nothing)
writefile(queue, `${local_flows}`);
}
}
function task_device_match(name, device)
@ -173,7 +199,12 @@ function assign_dev_cpu(dev) {
}
if (length(dev.netdev) > 0) {
let cpu = dev.rx_cpu = get_next_cpu(rx_weight, dev.napi_cpu);
let cpu;
if (all_cpus)
cpu = -1;
else
cpu = get_next_cpu(rx_weight, dev.napi_cpu);
dev.rx_cpu = cpu;
for (let netdev in dev.netdev)
set_netdev_cpu(netdev, cpu);
}

View File

@ -0,0 +1,85 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Apr 2024 13:28:59 +0200
Subject: [PATCH] netfilter: flowtable: validate pppoe header
Ensure there is sufficient room to access the protocol field of the
PPPoe header. Validate it once before the flowtable lookup, then use a
helper function to access protocol field.
Reported-by: syzbot+b6f07e1c07ef40199081@syzkaller.appspotmail.com
Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -318,7 +318,7 @@ int nf_flow_rule_route_ipv6(struct net *
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
__be16 proto;
@@ -334,4 +334,14 @@ static inline __be16 nf_flow_pppoe_proto
return 0;
}
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ return false;
+
+ *inner_proto = __nf_flow_pppoe_proto(skb);
+
+ return true;
+}
+
#endif /* _NF_FLOW_TABLE_H */
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, st
proto = veth->h_vlan_encapsulated_proto;
break;
case htons(ETH_P_PPP_SES):
- proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
break;
default:
proto = skb->protocol;
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -246,10 +246,11 @@ static unsigned int nf_flow_xmit_xfrm(st
return NF_STOLEN;
}
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
struct vlan_ethhdr *veth;
+ __be16 inner_proto;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
@@ -260,7 +261,8 @@ static bool nf_flow_skb_encap_protocol(c
}
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_proto(skb) == proto) {
+ if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+ inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
return true;
}
@@ -289,7 +291,7 @@ static void nf_flow_encap_pop(struct sk_
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
- skb->protocol = nf_flow_pppoe_proto(skb);
+ skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;

View File

@ -0,0 +1,24 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Apr 2024 13:29:00 +0200
Subject: [PATCH] netfilter: flowtable: incorrect pppoe tuple
pppoe traffic reaching ingress path does not match the flowtable entry
because the pppoe header is expected to be at the network header offset.
This bug causes a mismatch in the flow table lookup, so pppoe packets
enter the classical forwarding path.
Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -156,7 +156,7 @@ static void nf_flow_tuple_encap(struct s
tuple->encap[i].proto = skb->protocol;
break;
case htons(ETH_P_PPP_SES):
- phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
break;

View File

@ -0,0 +1,87 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Apr 2024 13:28:59 +0200
Subject: [PATCH] netfilter: flowtable: validate pppoe header
Ensure there is sufficient room to access the protocol field of the
PPPoe header. Validate it once before the flowtable lookup, then use a
helper function to access protocol field.
Reported-by: syzbot+b6f07e1c07ef40199081@syzkaller.appspotmail.com
Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -335,7 +335,7 @@ int nf_flow_rule_route_ipv6(struct net *
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
__be16 proto;
@@ -351,6 +351,16 @@ static inline __be16 nf_flow_pppoe_proto
return 0;
}
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ return false;
+
+ *inner_proto = __nf_flow_pppoe_proto(skb);
+
+ return true;
+}
+
#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, st
proto = veth->h_vlan_encapsulated_proto;
break;
case htons(ETH_P_PPP_SES):
- proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
break;
default:
proto = skb->protocol;
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -267,10 +267,11 @@ static unsigned int nf_flow_xmit_xfrm(st
return NF_STOLEN;
}
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
struct vlan_ethhdr *veth;
+ __be16 inner_proto;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
@@ -281,7 +282,8 @@ static bool nf_flow_skb_encap_protocol(c
}
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_proto(skb) == proto) {
+ if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+ inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
return true;
}
@@ -310,7 +312,7 @@ static void nf_flow_encap_pop(struct sk_
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
- skb->protocol = nf_flow_pppoe_proto(skb);
+ skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;

View File

@ -0,0 +1,24 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Apr 2024 13:29:00 +0200
Subject: [PATCH] netfilter: flowtable: incorrect pppoe tuple
pppoe traffic reaching ingress path does not match the flowtable entry
because the pppoe header is expected to be at the network header offset.
This bug causes a mismatch in the flow table lookup, so pppoe packets
enter the classical forwarding path.
Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -156,7 +156,7 @@ static void nf_flow_tuple_encap(struct s
tuple->encap[i].proto = skb->protocol;
break;
case htons(ETH_P_PPP_SES):
- phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
break;

View File

@ -0,0 +1,87 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Apr 2024 13:28:59 +0200
Subject: [PATCH] netfilter: flowtable: validate pppoe header
Ensure there is sufficient room to access the protocol field of the
PPPoe header. Validate it once before the flowtable lookup, then use a
helper function to access protocol field.
Reported-by: syzbot+b6f07e1c07ef40199081@syzkaller.appspotmail.com
Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -335,7 +335,7 @@ int nf_flow_rule_route_ipv6(struct net *
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
__be16 proto;
@@ -351,6 +351,16 @@ static inline __be16 nf_flow_pppoe_proto
return 0;
}
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ return false;
+
+ *inner_proto = __nf_flow_pppoe_proto(skb);
+
+ return true;
+}
+
#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, st
proto = veth->h_vlan_encapsulated_proto;
break;
case htons(ETH_P_PPP_SES):
- proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
break;
default:
proto = skb->protocol;
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(st
return NF_STOLEN;
}
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
struct vlan_ethhdr *veth;
+ __be16 inner_proto;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(c
}
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_proto(skb) == proto) {
+ if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+ inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
return true;
}
@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
- skb->protocol = nf_flow_pppoe_proto(skb);
+ skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;

View File

@ -0,0 +1,24 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Apr 2024 13:29:00 +0200
Subject: [PATCH] netfilter: flowtable: incorrect pppoe tuple
pppoe traffic reaching ingress path does not match the flowtable entry
because the pppoe header is expected to be at the network header offset.
This bug causes a mismatch in the flow table lookup, so pppoe packets
enter the classical forwarding path.
Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct s
tuple->encap[i].proto = skb->protocol;
break;
case htons(ETH_P_PPP_SES):
- phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
break;

View File

@ -98,7 +98,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
@@ -0,0 +1,701 @@
@@ -0,0 +1,702 @@
+/*
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
+ *
@ -163,7 +163,8 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
+ break;
+ default:
+ proto = skb->protocol;

View File

@ -0,0 +1,24 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 23 Apr 2024 12:35:21 +0200
Subject: [PATCH] net: enable fraglist GRO by default
This can significantly improve performance for packet forwarding/bridging
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -242,10 +242,10 @@ static inline int find_next_netdev_featu
#define NETIF_F_UPPER_DISABLES NETIF_F_LRO
/* changeable features with no special hardware requirements */
-#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO)
+#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO | NETIF_F_GRO_FRAGLIST)
/* Changeable features with no special hardware requirements that defaults to off. */
-#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)
+#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_UDP_FWD)
#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
NETIF_F_HW_VLAN_CTAG_RX | \

View File

@ -44,7 +44,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
@@ -0,0 +1,702 @@
@@ -0,0 +1,703 @@
+/*
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
+ *
@ -109,7 +109,8 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
+ break;
+ default:
+ proto = skb->protocol;

View File

@ -47,7 +47,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
const struct header_ops *header_ops;
unsigned char operstate;
@@ -2206,6 +2213,10 @@ struct net_device {
@@ -2204,6 +2211,10 @@ struct net_device {
struct mctp_dev __rcu *mctp_ptr;
#endif
@ -60,7 +60,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
*/
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3046,6 +3046,10 @@ static inline int pskb_trim(struct sk_bu
@@ -3045,6 +3045,10 @@ static inline int pskb_trim(struct sk_bu
return (len < skb->len) ? __pskb_trim(skb, len) : 0;
}
@ -71,7 +71,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/**
* pskb_trim_unique - remove end from a paged unique (not cloned) buffer
* @skb: buffer to alter
@@ -3195,16 +3199,6 @@ static inline struct sk_buff *dev_alloc_
@@ -3194,16 +3198,6 @@ static inline struct sk_buff *dev_alloc_
}
@ -152,7 +152,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
{
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -171,6 +171,12 @@ __be16 eth_type_trans(struct sk_buff *sk
@@ -159,6 +159,12 @@ __be16 eth_type_trans(struct sk_buff *sk
const struct ethhdr *eth;
skb->dev = dev;

View File

@ -0,0 +1,24 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 23 Apr 2024 12:35:21 +0200
Subject: [PATCH] net: enable fraglist GRO by default
This can significantly improve performance for packet forwarding/bridging
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -242,10 +242,10 @@ static inline int find_next_netdev_featu
#define NETIF_F_UPPER_DISABLES NETIF_F_LRO
/* changeable features with no special hardware requirements */
-#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO)
+#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO | NETIF_F_GRO_FRAGLIST)
/* Changeable features with no special hardware requirements that defaults to off. */
-#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)
+#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_UDP_FWD)
#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
NETIF_F_HW_VLAN_CTAG_RX | \

View File

@ -44,7 +44,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
@@ -0,0 +1,702 @@
@@ -0,0 +1,703 @@
+/*
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
+ *
@ -109,7 +109,8 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
+ break;
+ default:
+ proto = skb->protocol;

View File

@ -47,7 +47,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
const struct header_ops *header_ops;
unsigned char operstate;
@@ -2259,6 +2266,10 @@ struct net_device {
@@ -2257,6 +2264,10 @@ struct net_device {
struct mctp_dev __rcu *mctp_ptr;
#endif
@ -60,7 +60,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
*/
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3081,6 +3081,10 @@ static inline int pskb_trim(struct sk_bu
@@ -3080,6 +3080,10 @@ static inline int pskb_trim(struct sk_bu
return (len < skb->len) ? __pskb_trim(skb, len) : 0;
}
@ -71,7 +71,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/**
* pskb_trim_unique - remove end from a paged unique (not cloned) buffer
* @skb: buffer to alter
@@ -3246,16 +3250,6 @@ static inline struct sk_buff *dev_alloc_
@@ -3245,16 +3249,6 @@ static inline struct sk_buff *dev_alloc_
}
@ -152,7 +152,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
{
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -171,6 +171,12 @@ __be16 eth_type_trans(struct sk_buff *sk
@@ -159,6 +159,12 @@ __be16 eth_type_trans(struct sk_buff *sk
const struct ethhdr *eth;
skb->dev = dev;

View File

@ -1,151 +0,0 @@
From: Felix Fietkau <nbd@nbd.name>
Subject: net: replace GRO optimization patch with a new one that supports VLANs/bridges with different MAC addresses
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/linux/netdevice.h | 2 ++
include/linux/skbuff.h | 3 ++-
net/core/dev.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++
net/ethernet/eth.c | 18 +++++++++++++++++-
4 files changed, 69 insertions(+), 2 deletions(-)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2157,6 +2157,8 @@ struct net_device {
struct netdev_hw_addr_list mc;
struct netdev_hw_addr_list dev_addrs;
+ unsigned char local_addr_mask[MAX_ADDR_LEN];
+
#ifdef CONFIG_SYSFS
struct kset *queues_kset;
#endif
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -967,6 +967,7 @@ struct sk_buff {
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
+ __u8 gro_skip:1;
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -492,6 +492,9 @@ static enum gro_result dev_gro_receive(s
int same_flow;
int grow;
+ if (skb->gro_skip)
+ goto normal;
+
if (netif_elide_gro(skb->dev))
goto normal;
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7628,6 +7628,48 @@ static void __netdev_adjacent_dev_unlink
&upper_dev->adj_list.lower);
}
+static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr,
+ struct net_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->addr_len; i++)
+ mask[i] |= addr[i] ^ dev->dev_addr[i];
+}
+
+static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev,
+ struct net_device *lower)
+{
+ struct net_device *cur;
+ struct list_head *iter;
+
+ netdev_for_each_upper_dev_rcu(dev, cur, iter) {
+ __netdev_addr_mask(mask, cur->dev_addr, lower);
+ __netdev_upper_mask(mask, cur, lower);
+ }
+}
+
+static void __netdev_update_addr_mask(struct net_device *dev)
+{
+ unsigned char mask[MAX_ADDR_LEN];
+ struct net_device *cur;
+ struct list_head *iter;
+
+ memset(mask, 0, sizeof(mask));
+ __netdev_upper_mask(mask, dev, dev);
+ memcpy(dev->local_addr_mask, mask, dev->addr_len);
+
+ netdev_for_each_lower_dev(dev, cur, iter)
+ __netdev_update_addr_mask(cur);
+}
+
+static void netdev_update_addr_mask(struct net_device *dev)
+{
+ rcu_read_lock();
+ __netdev_update_addr_mask(dev);
+ rcu_read_unlock();
+}
+
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
void *upper_priv, void *upper_info,
@@ -7679,6 +7721,7 @@ static int __netdev_upper_dev_link(struc
if (ret)
return ret;
+ netdev_update_addr_mask(dev);
ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
@@ -7775,6 +7818,7 @@ static void __netdev_upper_dev_unlink(st
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
+ netdev_update_addr_mask(dev);
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
@@ -8827,6 +8871,7 @@ int dev_set_mac_address(struct net_devic
if (err)
return err;
dev->addr_assign_type = NET_ADDR_SET;
+ netdev_update_addr_mask(dev);
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
return 0;
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -143,6 +143,18 @@ u32 eth_get_headlen(const struct net_dev
}
EXPORT_SYMBOL(eth_get_headlen);
+static inline bool
+eth_check_local_mask(const void *addr1, const void *addr2, const void *mask)
+{
+ const u16 *a1 = addr1;
+ const u16 *a2 = addr2;
+ const u16 *m = mask;
+
+ return (((a1[0] ^ a2[0]) & ~m[0]) |
+ ((a1[1] ^ a2[1]) & ~m[1]) |
+ ((a1[2] ^ a2[2]) & ~m[2]));
+}
+
/**
* eth_type_trans - determine the packet's protocol ID.
* @skb: received socket data
@@ -174,6 +186,10 @@ __be16 eth_type_trans(struct sk_buff *sk
} else {
skb->pkt_type = PACKET_OTHERHOST;
}
+
+ if (eth_check_local_mask(eth->h_dest, dev->dev_addr,
+ dev->local_addr_mask))
+ skb->gro_skip = 1;
}
/*

View File

@ -0,0 +1,559 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 23 Apr 2024 11:23:03 +0200
Subject: [PATCH] net: add TCP fraglist GRO support
When forwarding TCP after GRO, software segmentation is very expensive,
especially when the checksum needs to be recalculated.
One case where that's currently unavoidable is when routing packets over
PPPoE. Performance improves significantly when using fraglist GRO
implemented in the same way as for UDP.
Here's a measurement of running 2 TCP streams through a MediaTek MT7622
device (2-core Cortex-A53), which runs NAT with flow offload enabled from
one ethernet port to PPPoE on another ethernet port + cake qdisc set to
1Gbps.
rx-gro-list off: 630 Mbit/s, CPU 35% idle
rx-gro-list on: 770 Mbit/s, CPU 40% idle
Signe-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -424,6 +424,7 @@ static inline __wsum ip6_gro_compute_pse
}
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
static inline void gro_normal_list(struct napi_struct *napi)
@@ -446,5 +447,48 @@ static inline void gro_normal_one(struct
gro_normal_list(napi);
}
+/* This function is the alternative of 'inet_iif' and 'inet_sdif'
+ * functions in case we can not rely on fields of IPCB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ *iif = inet_iif(skb) ?: skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
+/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
+ * functions in case we can not rely on fields of IP6CB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
+ *iif = skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
#endif /* _NET_IPV6_GRO_H */
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2057,7 +2057,10 @@ void tcp_v4_destroy_sock(struct sock *sk
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb);
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th);
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -290,6 +290,33 @@ done:
return 0;
}
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+
+ /* sk ownership - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -27,6 +27,68 @@ static void tcp_gso_tstamp(struct sk_buf
}
}
+static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
+ __be32 *oldip, __be32 *newip,
+ __be16 *oldport, __be16 *newport)
+{
+ struct tcphdr *th;
+ struct iphdr *iph;
+
+ if (*oldip == *newip && *oldport == *newport)
+ return;
+
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
+ *oldport = *newport;
+
+ csum_replace4(&iph->check, *oldip, *newip);
+ *oldip = *newip;
+}
+
+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+ struct sk_buff *seg;
+ struct tcphdr *th, *th2;
+ struct iphdr *iph, *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ip_hdr(seg->next);
+
+ if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&
+ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->saddr, &iph->saddr,
+ &th2->source, &th->source);
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->daddr, &iph->daddr,
+ &th2->dest, &th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv4_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -36,6 +98,9 @@ static struct sk_buff *tcp4_gso_segment(
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp4_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
@@ -177,61 +242,76 @@ out:
return segs;
}
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
{
- struct sk_buff *pp = NULL;
+ struct tcphdr *th2;
struct sk_buff *p;
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ th2 = tcp_hdr(p);
+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ return p;
+ }
+
+ return NULL;
+}
+
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
+{
+ unsigned int thlen, hlen, off;
struct tcphdr *th;
- struct tcphdr *th2;
- unsigned int len;
- unsigned int thlen;
- __be32 flags;
- unsigned int mss = 1;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int i;
off = skb_gro_offset(skb);
hlen = off + sizeof(*th);
th = skb_gro_header(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
thlen = th->doff * 4;
if (thlen < sizeof(*th))
- goto out;
+ return NULL;
hlen = off + thlen;
if (skb_gro_header_hard(skb, hlen)) {
th = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
}
skb_gro_pull(skb, thlen);
- len = skb_gro_len(skb);
- flags = tcp_flag_word(th);
-
- list_for_each_entry(p, head, list) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
+ return th;
+}
- th2 = tcp_hdr(p);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ unsigned int thlen = th->doff * 4;
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
+ struct tcphdr *th2;
+ unsigned int len;
+ __be32 flags;
+ unsigned int mss = 1;
+ int flush = 1;
+ int i;
- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
+ len = skb_gro_len(skb);
+ flags = tcp_flag_word(th);
- goto found;
- }
- p = NULL;
- goto out_check_final;
+ p = tcp_gro_lookup(head, th);
+ if (!p)
+ goto out_check_final;
-found:
/* Include the IP ID check below from the inner most IP hdr */
+ th2 = tcp_hdr(p);
flush = NAPI_GRO_CB(p)->flush;
flush |= (__force int)(flags & TCP_FLAG_CWR);
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
@@ -268,6 +348,19 @@ found:
flush |= p->decrypted ^ skb->decrypted;
#endif
+ if (NAPI_GRO_CB(p)->is_flist) {
+ flush |= (__force int)(flags ^ tcp_flag_word(th2));
+ flush |= skb->ip_summed != p->ip_summed;
+ flush |= skb->csum_level != p->csum_level;
+ flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
+ flush |= NAPI_GRO_CB(p)->count >= 64;
+
+ if (flush || skb_gro_receive_list(p, skb))
+ mss = 1;
+
+ goto out_check_final;
+ }
+
if (flush || skb_gro_receive(p, skb)) {
mss = 1;
goto out_check_final;
@@ -289,7 +382,6 @@ out_check_final:
if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
pp = p;
-out:
NAPI_GRO_CB(skb)->flush |= (flush != 0);
return pp;
@@ -315,18 +407,56 @@ int tcp_gro_complete(struct sk_buff *skb
}
EXPORT_SYMBOL(tcp_gro_complete);
+static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ const struct iphdr *iph = skb_gro_network_header(skb);
+ struct net *net = dev_net(skb->dev);
+ struct sk_buff *p;
+ struct sock *sk;
+ int iif, sdif;
+
+ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->saddr, th->source,
+ iph->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- inet_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ inet_gro_compute_pseudo))
+ goto flush;
+
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
- return tcp_gro_receive(head, skb);
+ tcp4_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
@@ -334,6 +464,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_com
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -425,33 +425,6 @@ out:
return segs;
}
-static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
-{
- if (unlikely(p->len + skb->len >= 65536))
- return -E2BIG;
-
- if (NAPI_GRO_CB(p)->last == p)
- skb_shinfo(p)->frag_list = skb;
- else
- NAPI_GRO_CB(p)->last->next = skb;
-
- skb_pull(skb, skb_gro_offset(skb));
-
- NAPI_GRO_CB(p)->last = skb;
- NAPI_GRO_CB(p)->count++;
- p->data_len += skb->len;
-
- /* sk ownership - if any - completely transferred to the aggregated packet */
- skb->destructor = NULL;
- skb->sk = NULL;
- p->truesize += skb->truesize;
- p->len += skb->len;
-
- NAPI_GRO_CB(skb)->same_flow = 1;
-
- return 0;
-}
-
#define UDP_GRO_CNT_MAX 64
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -7,24 +7,65 @@
*/
#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
+#include <net/inet6_hashtables.h>
#include <net/gro.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ipv6hdr *hdr = skb_gro_network_header(skb);
+ struct net *net = dev_net(skb->dev);
+ struct sk_buff *p;
+ struct sock *sk;
+ int iif, sdif;
+
+ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- ip6_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ ip6_gro_compute_pseudo))
+ goto flush;
- return tcp_gro_receive(head, skb);
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
+
+ tcp6_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
@@ -32,6 +73,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_com
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
@@ -50,6 +100,9 @@ static struct sk_buff *tcp6_gso_segment(
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return skb_segment_list(skb, features, skb_mac_header_len(skb));
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);

View File

@ -17,7 +17,7 @@ Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2192,7 +2192,7 @@ struct net_device {
@@ -2190,7 +2190,7 @@ struct net_device {
#if IS_ENABLED(CONFIG_AX25)
void *ax25_ptr;
#endif

View File

@ -20,7 +20,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/**
* napi_disable - prevent NAPI from scheduling
@@ -3152,6 +3153,7 @@ struct softnet_data {
@@ -3150,6 +3151,7 @@ struct softnet_data {
unsigned int processed;
unsigned int time_squeeze;
unsigned int received_rps;
@ -157,7 +157,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -11171,6 +11242,9 @@ static int dev_cpu_dead(unsigned int old
@@ -11126,6 +11197,9 @@ static int dev_cpu_dead(unsigned int old
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
@ -167,7 +167,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#ifdef CONFIG_RPS
remsd = oldsd->rps_ipi_list;
oldsd->rps_ipi_list = NULL;
@@ -11483,6 +11557,7 @@ static int __init net_dev_init(void)
@@ -11438,6 +11512,7 @@ static int __init net_dev_init(void)
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
spin_lock_init(&sd->defer_lock);

View File

@ -1,151 +0,0 @@
From: Felix Fietkau <nbd@nbd.name>
Subject: net: replace GRO optimization patch with a new one that supports VLANs/bridges with different MAC addresses
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
include/linux/netdevice.h | 2 ++
include/linux/skbuff.h | 3 ++-
net/core/dev.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++
net/ethernet/eth.c | 18 +++++++++++++++++-
4 files changed, 69 insertions(+), 2 deletions(-)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2210,6 +2210,8 @@ struct net_device {
struct netdev_hw_addr_list mc;
struct netdev_hw_addr_list dev_addrs;
+ unsigned char local_addr_mask[MAX_ADDR_LEN];
+
#ifdef CONFIG_SYSFS
struct kset *queues_kset;
#endif
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -959,6 +959,7 @@ struct sk_buff {
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
+ __u8 gro_skip:1;
#if IS_ENABLED(CONFIG_IP_VS)
__u8 ipvs_property:1;
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -446,6 +446,9 @@ static enum gro_result dev_gro_receive(s
enum gro_result ret;
int same_flow;
+ if (skb->gro_skip)
+ goto normal;
+
if (netif_elide_gro(skb->dev))
goto normal;
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7689,6 +7689,48 @@ static void __netdev_adjacent_dev_unlink
&upper_dev->adj_list.lower);
}
+static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr,
+ struct net_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->addr_len; i++)
+ mask[i] |= addr[i] ^ dev->dev_addr[i];
+}
+
+static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev,
+ struct net_device *lower)
+{
+ struct net_device *cur;
+ struct list_head *iter;
+
+ netdev_for_each_upper_dev_rcu(dev, cur, iter) {
+ __netdev_addr_mask(mask, cur->dev_addr, lower);
+ __netdev_upper_mask(mask, cur, lower);
+ }
+}
+
+static void __netdev_update_addr_mask(struct net_device *dev)
+{
+ unsigned char mask[MAX_ADDR_LEN];
+ struct net_device *cur;
+ struct list_head *iter;
+
+ memset(mask, 0, sizeof(mask));
+ __netdev_upper_mask(mask, dev, dev);
+ memcpy(dev->local_addr_mask, mask, dev->addr_len);
+
+ netdev_for_each_lower_dev(dev, cur, iter)
+ __netdev_update_addr_mask(cur);
+}
+
+static void netdev_update_addr_mask(struct net_device *dev)
+{
+ rcu_read_lock();
+ __netdev_update_addr_mask(dev);
+ rcu_read_unlock();
+}
+
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
void *upper_priv, void *upper_info,
@@ -7740,6 +7782,7 @@ static int __netdev_upper_dev_link(struc
if (ret)
return ret;
+ netdev_update_addr_mask(dev);
ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
@@ -7836,6 +7879,7 @@ static void __netdev_upper_dev_unlink(st
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
+ netdev_update_addr_mask(dev);
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
@@ -8892,6 +8936,7 @@ int dev_set_mac_address(struct net_devic
return err;
}
dev->addr_assign_type = NET_ADDR_SET;
+ netdev_update_addr_mask(dev);
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
return 0;
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -143,6 +143,18 @@ u32 eth_get_headlen(const struct net_dev
}
EXPORT_SYMBOL(eth_get_headlen);
+static inline bool
+eth_check_local_mask(const void *addr1, const void *addr2, const void *mask)
+{
+ const u16 *a1 = addr1;
+ const u16 *a2 = addr2;
+ const u16 *m = mask;
+
+ return (((a1[0] ^ a2[0]) & ~m[0]) |
+ ((a1[1] ^ a2[1]) & ~m[1]) |
+ ((a1[2] ^ a2[2]) & ~m[2]));
+}
+
/**
* eth_type_trans - determine the packet's protocol ID.
* @skb: received socket data
@@ -174,6 +186,10 @@ __be16 eth_type_trans(struct sk_buff *sk
} else {
skb->pkt_type = PACKET_OTHERHOST;
}
+
+ if (eth_check_local_mask(eth->h_dest, dev->dev_addr,
+ dev->local_addr_mask))
+ skb->gro_skip = 1;
}
/*

View File

@ -0,0 +1,510 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 23 Apr 2024 11:23:03 +0200
Subject: [PATCH] net: add TCP fraglist GRO support
When forwarding TCP after GRO, software segmentation is very expensive,
especially when the checksum needs to be recalculated.
One case where that's currently unavoidable is when routing packets over
PPPoE. Performance improves significantly when using fraglist GRO
implemented in the same way as for UDP.
Here's a measurement of running 2 TCP streams through a MediaTek MT7622
device (2-core Cortex-A53), which runs NAT with flow offload enabled from
one ethernet port to PPPoE on another ethernet port + cake qdisc set to
1Gbps.
rx-gro-list off: 630 Mbit/s, CPU 35% idle
rx-gro-list on: 770 Mbit/s, CPU 40% idle
Signe-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -430,6 +430,7 @@ static inline __wsum ip6_gro_compute_pse
}
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
static inline void gro_normal_list(struct napi_struct *napi)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2082,7 +2082,10 @@ void tcp_v4_destroy_sock(struct sock *sk
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb);
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th);
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -233,6 +233,33 @@ done:
return 0;
}
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+
+ /* sk ownership - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buf
}
}
+static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
+ __be32 *oldip, __be32 *newip,
+ __be16 *oldport, __be16 *newport)
+{
+ struct tcphdr *th;
+ struct iphdr *iph;
+
+ if (*oldip == *newip && *oldport == *newport)
+ return;
+
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
+ *oldport = *newport;
+
+ csum_replace4(&iph->check, *oldip, *newip);
+ *oldip = *newip;
+}
+
+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+ struct sk_buff *seg;
+ struct tcphdr *th, *th2;
+ struct iphdr *iph, *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ip_hdr(seg->next);
+
+ if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&
+ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->saddr, &iph->saddr,
+ &th2->source, &th->source);
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->daddr, &iph->daddr,
+ &th2->dest, &th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv4_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -37,6 +99,9 @@ static struct sk_buff *tcp4_gso_segment(
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp4_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
@@ -178,61 +243,76 @@ out:
return segs;
}
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
{
- struct sk_buff *pp = NULL;
+ struct tcphdr *th2;
struct sk_buff *p;
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ th2 = tcp_hdr(p);
+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ return p;
+ }
+
+ return NULL;
+}
+
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
+{
+ unsigned int thlen, hlen, off;
struct tcphdr *th;
- struct tcphdr *th2;
- unsigned int len;
- unsigned int thlen;
- __be32 flags;
- unsigned int mss = 1;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int i;
off = skb_gro_offset(skb);
hlen = off + sizeof(*th);
th = skb_gro_header(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
thlen = th->doff * 4;
if (thlen < sizeof(*th))
- goto out;
+ return NULL;
hlen = off + thlen;
if (skb_gro_header_hard(skb, hlen)) {
th = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
}
skb_gro_pull(skb, thlen);
- len = skb_gro_len(skb);
- flags = tcp_flag_word(th);
-
- list_for_each_entry(p, head, list) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
+ return th;
+}
- th2 = tcp_hdr(p);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ unsigned int thlen = th->doff * 4;
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
+ struct tcphdr *th2;
+ unsigned int len;
+ __be32 flags;
+ unsigned int mss = 1;
+ int flush = 1;
+ int i;
- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
+ len = skb_gro_len(skb);
+ flags = tcp_flag_word(th);
- goto found;
- }
- p = NULL;
- goto out_check_final;
+ p = tcp_gro_lookup(head, th);
+ if (!p)
+ goto out_check_final;
-found:
/* Include the IP ID check below from the inner most IP hdr */
+ th2 = tcp_hdr(p);
flush = NAPI_GRO_CB(p)->flush;
flush |= (__force int)(flags & TCP_FLAG_CWR);
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
@@ -269,6 +349,19 @@ found:
flush |= p->decrypted ^ skb->decrypted;
#endif
+ if (NAPI_GRO_CB(p)->is_flist) {
+ flush |= (__force int)(flags ^ tcp_flag_word(th2));
+ flush |= skb->ip_summed != p->ip_summed;
+ flush |= skb->csum_level != p->csum_level;
+ flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
+ flush |= NAPI_GRO_CB(p)->count >= 64;
+
+ if (flush || skb_gro_receive_list(p, skb))
+ mss = 1;
+
+ goto out_check_final;
+ }
+
if (flush || skb_gro_receive(p, skb)) {
mss = 1;
goto out_check_final;
@@ -290,7 +383,6 @@ out_check_final:
if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
pp = p;
-out:
NAPI_GRO_CB(skb)->flush |= (flush != 0);
return pp;
@@ -314,18 +406,56 @@ void tcp_gro_complete(struct sk_buff *sk
}
EXPORT_SYMBOL(tcp_gro_complete);
+static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ const struct iphdr *iph = skb_gro_network_header(skb);
+ struct net *net = dev_net(skb->dev);
+ struct sk_buff *p;
+ struct sock *sk;
+ int iif, sdif;
+
+ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->saddr, th->source,
+ iph->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- inet_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ inet_gro_compute_pseudo))
+ goto flush;
+
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
- return tcp_gro_receive(head, skb);
+ tcp4_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
@@ -333,6 +463,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_com
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -433,33 +433,6 @@ out:
return segs;
}
-static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
-{
- if (unlikely(p->len + skb->len >= 65536))
- return -E2BIG;
-
- if (NAPI_GRO_CB(p)->last == p)
- skb_shinfo(p)->frag_list = skb;
- else
- NAPI_GRO_CB(p)->last->next = skb;
-
- skb_pull(skb, skb_gro_offset(skb));
-
- NAPI_GRO_CB(p)->last = skb;
- NAPI_GRO_CB(p)->count++;
- p->data_len += skb->len;
-
- /* sk ownership - if any - completely transferred to the aggregated packet */
- skb->destructor = NULL;
- skb->sk = NULL;
- p->truesize += skb->truesize;
- p->len += skb->len;
-
- NAPI_GRO_CB(skb)->same_flow = 1;
-
- return 0;
-}
-
#define UDP_GRO_CNT_MAX 64
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -7,24 +7,65 @@
*/
#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
+#include <net/inet6_hashtables.h>
#include <net/gro.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ipv6hdr *hdr = skb_gro_network_header(skb);
+ struct net *net = dev_net(skb->dev);
+ struct sk_buff *p;
+ struct sock *sk;
+ int iif, sdif;
+
+ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- ip6_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ ip6_gro_compute_pseudo))
+ goto flush;
- return tcp_gro_receive(head, skb);
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
+
+ tcp6_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
@@ -32,6 +73,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_com
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
@@ -51,6 +101,9 @@ static struct sk_buff *tcp6_gso_segment(
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return skb_segment_list(skb, features, skb_mac_header_len(skb));
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);

View File

@ -17,7 +17,7 @@ Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2245,7 +2245,7 @@ struct net_device {
@@ -2243,7 +2243,7 @@ struct net_device {
#if IS_ENABLED(CONFIG_AX25)
void *ax25_ptr;
#endif

View File

@ -20,7 +20,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/**
* napi_disable - prevent NAPI from scheduling
@@ -3238,6 +3239,7 @@ struct softnet_data {
@@ -3236,6 +3237,7 @@ struct softnet_data {
/* stats */
unsigned int processed;
unsigned int time_squeeze;
@ -157,7 +157,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -11351,6 +11422,9 @@ static int dev_cpu_dead(unsigned int old
@@ -11306,6 +11377,9 @@ static int dev_cpu_dead(unsigned int old
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
@ -167,7 +167,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#ifdef CONFIG_RPS
remsd = oldsd->rps_ipi_list;
oldsd->rps_ipi_list = NULL;
@@ -11666,6 +11740,7 @@ static int __init net_dev_init(void)
@@ -11621,6 +11695,7 @@ static int __init net_dev_init(void)
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
spin_lock_init(&sd->defer_lock);

View File

@ -34,9 +34,7 @@ define Device/Default
KERNEL := kernel-bin | uImage none
IMAGES := sdcard.img.gz
IMAGE/sdcard.img.gz := sunxi-sdcard | append-metadata | gzip
ifneq ($(LINUX_6_1),)
SUNXI_DTS_DIR :=
else
ifdef CONFIG_LINUX_6_6
SUNXI_DTS_DIR :=allwinner/
endif
SUNXI_DTS = $$(SUNXI_DTS_DIR)$$(SOC)-$(lastword $(subst _, ,$(1)))