kernel: update flow offload patches to upstream version

Move patches to backport-5.10, since the series was accepted upstream

Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
Felix Fietkau 2021-04-10 13:20:04 +02:00
parent 012a9aa00b
commit f07fe36f22
51 changed files with 1845 additions and 525 deletions

View File

@ -0,0 +1,98 @@
From: Oz Shlomo <ozsh@nvidia.com>
Date: Tue, 23 Mar 2021 00:56:19 +0100
Subject: [PATCH] netfilter: flowtable: separate replace, destroy and
stats to different workqueues
Currently the flow table offload replace, destroy and stats work items are
executed on a single workqueue. As such, DESTROY and STATS commands may
be backloged after a burst of REPLACE work items. This scenario can bloat
up memory and may cause active connections to age.
Instatiate add, del and stats workqueues to avoid backlogs of non-dependent
actions. Provide sysfs control over the workqueue attributes, allowing
userspace applications to control the workqueue cpumask.
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -13,7 +13,9 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-static struct workqueue_struct *nf_flow_offload_wq;
+static struct workqueue_struct *nf_flow_offload_add_wq;
+static struct workqueue_struct *nf_flow_offload_del_wq;
+static struct workqueue_struct *nf_flow_offload_stats_wq;
struct flow_offload_work {
struct list_head list;
@@ -826,7 +828,12 @@ static void flow_offload_work_handler(st
static void flow_offload_queue_work(struct flow_offload_work *offload)
{
- queue_work(nf_flow_offload_wq, &offload->work);
+ if (offload->cmd == FLOW_CLS_REPLACE)
+ queue_work(nf_flow_offload_add_wq, &offload->work);
+ else if (offload->cmd == FLOW_CLS_DESTROY)
+ queue_work(nf_flow_offload_del_wq, &offload->work);
+ else
+ queue_work(nf_flow_offload_stats_wq, &offload->work);
}
static struct flow_offload_work *
@@ -898,8 +905,11 @@ void nf_flow_offload_stats(struct nf_flo
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
{
- if (nf_flowtable_hw_offload(flowtable))
- flush_workqueue(nf_flow_offload_wq);
+ if (nf_flowtable_hw_offload(flowtable)) {
+ flush_workqueue(nf_flow_offload_add_wq);
+ flush_workqueue(nf_flow_offload_del_wq);
+ flush_workqueue(nf_flow_offload_stats_wq);
+ }
}
static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
@@ -1011,15 +1021,33 @@ EXPORT_SYMBOL_GPL(nf_flow_table_offload_
int nf_flow_table_offload_init(void)
{
- nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload",
- WQ_UNBOUND, 0);
- if (!nf_flow_offload_wq)
+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!nf_flow_offload_add_wq)
return -ENOMEM;
+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!nf_flow_offload_del_wq)
+ goto err_del_wq;
+
+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!nf_flow_offload_stats_wq)
+ goto err_stats_wq;
+
return 0;
+
+err_stats_wq:
+ destroy_workqueue(nf_flow_offload_del_wq);
+err_del_wq:
+ destroy_workqueue(nf_flow_offload_add_wq);
+ return -ENOMEM;
}
void nf_flow_table_offload_exit(void)
{
- destroy_workqueue(nf_flow_offload_wq);
+ destroy_workqueue(nf_flow_offload_add_wq);
+ destroy_workqueue(nf_flow_offload_del_wq);
+ destroy_workqueue(nf_flow_offload_stats_wq);
}

View File

@ -0,0 +1,44 @@
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 23 Mar 2021 00:56:20 +0100
Subject: [PATCH] netfilter: Fix fall-through warnings for Clang
In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of just
letting the code fall through to the next case.
Link: https://github.com/KSPP/linux/issues/115
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -397,6 +397,7 @@ dccp_new(struct nf_conn *ct, const struc
msg = "not picking up existing connection ";
goto out_invalid;
}
+ break;
case CT_DCCP_REQUEST:
break;
case CT_DCCP_INVALID:
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8364,6 +8364,7 @@ static int nf_tables_check_loops(const s
data->verdict.chain);
if (err < 0)
return err;
+ break;
default:
break;
}
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -528,6 +528,7 @@ static void __nft_ct_set_destroy(const s
case NFT_CT_ZONE:
if (--nft_ct_pcpu_template_refcnt == 0)
nft_ct_tmpl_put_pcpu();
+ break;
#endif
default:
break;

View File

@ -0,0 +1,22 @@
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 23 Mar 2021 00:56:21 +0100
Subject: [PATCH] netfilter: conntrack: Remove unused variable
declaration
commit e97c3e278e95 ("tproxy: split off ipv6 defragmentation to a separate
module") left behind this.
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -4,7 +4,4 @@
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
-#include <linux/sysctl.h>
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
-
#endif /* _NF_CONNTRACK_IPV6_H*/

View File

@ -0,0 +1,291 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Mar 2021 00:56:22 +0100
Subject: [PATCH] netfilter: flowtable: consolidate
skb_try_make_writable() call
Fetch the layer 4 header size to be mangled by NAT when building the
tuple, then use it to make writable the network and the transport
headers. After this update, the NAT routines now assumes that the skbuff
area is writable. Do the pointer refetch only after the single
skb_try_make_writable() call.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -395,9 +395,6 @@ static int nf_flow_nat_port_tcp(struct s
{
struct tcphdr *tcph;
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
@@ -409,9 +406,6 @@ static int nf_flow_nat_port_udp(struct s
{
struct udphdr *udph;
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace2(&udph->check, skb, port,
@@ -447,9 +441,6 @@ int nf_flow_snat_port(const struct flow_
struct flow_ports *hdr;
__be16 port, new_port;
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- return -1;
-
hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) {
@@ -478,9 +469,6 @@ int nf_flow_dnat_port(const struct flow_
struct flow_ports *hdr;
__be16 port, new_port;
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- return -1;
-
hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) {
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -39,9 +39,6 @@ static int nf_flow_nat_ip_tcp(struct sk_
{
struct tcphdr *tcph;
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
@@ -53,9 +50,6 @@ static int nf_flow_nat_ip_udp(struct sk_
{
struct udphdr *udph;
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
@@ -136,19 +130,17 @@ static int nf_flow_dnat_ip(const struct
}
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- unsigned int thoff, enum flow_offload_tuple_dir dir)
+ unsigned int thoff, enum flow_offload_tuple_dir dir,
+ struct iphdr *iph)
{
- struct iphdr *iph = ip_hdr(skb);
-
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
- iph = ip_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
return 0;
@@ -160,10 +152,10 @@ static bool ip_has_options(unsigned int
}
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple, u32 *hdrsize)
{
- unsigned int thoff, hdrsize;
struct flow_ports *ports;
+ unsigned int thoff;
struct iphdr *iph;
if (!pskb_may_pull(skb, sizeof(*iph)))
@@ -178,10 +170,10 @@ static int nf_flow_tuple_ip(struct sk_bu
switch (iph->protocol) {
case IPPROTO_TCP:
- hdrsize = sizeof(struct tcphdr);
+ *hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- hdrsize = sizeof(struct udphdr);
+ *hdrsize = sizeof(struct udphdr);
break;
default:
return -1;
@@ -191,7 +183,7 @@ static int nf_flow_tuple_ip(struct sk_bu
return -1;
thoff = iph->ihl * 4;
- if (!pskb_may_pull(skb, thoff + hdrsize))
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
iph = ip_hdr(skb);
@@ -252,11 +244,12 @@ nf_flow_offload_ip_hook(void *priv, stru
unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
+ u32 hdrsize;
if (skb->protocol != htons(ETH_P_IP))
return NF_ACCEPT;
- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -271,11 +264,13 @@ nf_flow_offload_ip_hook(void *priv, stru
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
- if (skb_try_make_writable(skb, sizeof(*iph)))
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+ if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
- thoff = ip_hdr(skb)->ihl * 4;
- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
+ iph = ip_hdr(skb);
+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
flow_offload_refresh(flow_table, flow);
@@ -285,10 +280,9 @@ nf_flow_offload_ip_hook(void *priv, stru
return NF_ACCEPT;
}
- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+ if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
return NF_DROP;
- iph = ip_hdr(skb);
ip_decrease_ttl(iph);
skb->tstamp = 0;
@@ -317,9 +311,6 @@ static int nf_flow_nat_ipv6_tcp(struct s
{
struct tcphdr *tcph;
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
@@ -333,9 +324,6 @@ static int nf_flow_nat_ipv6_udp(struct s
{
struct udphdr *udph;
- if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
@@ -417,31 +405,30 @@ static int nf_flow_dnat_ipv6(const struc
static int nf_flow_nat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb,
- enum flow_offload_tuple_dir dir)
+ enum flow_offload_tuple_dir dir,
+ struct ipv6hdr *ip6h)
{
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
unsigned int thoff = sizeof(*ip6h);
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;
- ip6h = ipv6_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;
return 0;
}
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple, u32 *hdrsize)
{
- unsigned int thoff, hdrsize;
struct flow_ports *ports;
struct ipv6hdr *ip6h;
+ unsigned int thoff;
if (!pskb_may_pull(skb, sizeof(*ip6h)))
return -1;
@@ -450,10 +437,10 @@ static int nf_flow_tuple_ipv6(struct sk_
switch (ip6h->nexthdr) {
case IPPROTO_TCP:
- hdrsize = sizeof(struct tcphdr);
+ *hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- hdrsize = sizeof(struct udphdr);
+ *hdrsize = sizeof(struct udphdr);
break;
default:
return -1;
@@ -463,7 +450,7 @@ static int nf_flow_tuple_ipv6(struct sk_
return -1;
thoff = sizeof(*ip6h);
- if (!pskb_may_pull(skb, thoff + hdrsize))
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
ip6h = ipv6_hdr(skb);
@@ -493,11 +480,12 @@ nf_flow_offload_ipv6_hook(void *priv, st
struct net_device *outdev;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
+ u32 hdrsize;
if (skb->protocol != htons(ETH_P_IPV6))
return NF_ACCEPT;
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -523,13 +511,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
return NF_ACCEPT;
}
- if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
return NF_DROP;
- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ ip6h = ipv6_hdr(skb);
+ if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
return NF_DROP;
- ip6h = ipv6_hdr(skb);
ip6h->hop_limit--;
skb->tstamp = 0;

View File

@ -0,0 +1,35 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Mar 2021 00:56:23 +0100
Subject: [PATCH] netfilter: flowtable: move skb_try_make_writable()
before NAT in IPv4
For consistency with the IPv6 flowtable datapath and to make sure the
skbuff is writable right before the NAT header updates.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -266,10 +266,6 @@ nf_flow_offload_ip_hook(void *priv, stru
iph = ip_hdr(skb);
thoff = iph->ihl * 4;
- if (skb_try_make_writable(skb, thoff + hdrsize))
- return NF_DROP;
-
- iph = ip_hdr(skb);
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
@@ -280,6 +276,10 @@ nf_flow_offload_ip_hook(void *priv, stru
return NF_ACCEPT;
}
+ if (skb_try_make_writable(skb, thoff + hdrsize))
+ return NF_DROP;
+
+ iph = ip_hdr(skb);
if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
return NF_DROP;

View File

@ -0,0 +1,82 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Mar 2021 00:56:24 +0100
Subject: [PATCH] netfilter: flowtable: move FLOW_OFFLOAD_DIR_MAX away
from enumeration
This allows to remove the default case which should not ever happen and
that was added to avoid gcc warnings on unhandled FLOW_OFFLOAD_DIR_MAX
enumeration case.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -86,8 +86,8 @@ static inline bool nf_flowtable_hw_offlo
enum flow_offload_tuple_dir {
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
};
+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
struct flow_offload_tuple {
union {
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -454,8 +454,6 @@ int nf_flow_snat_port(const struct flow_
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
hdr->dest = new_port;
break;
- default:
- return -1;
}
return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
@@ -482,8 +480,6 @@ int nf_flow_dnat_port(const struct flow_
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
hdr->source = new_port;
break;
- default:
- return -1;
}
return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -96,8 +96,6 @@ static int nf_flow_snat_ip(const struct
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
- default:
- return -1;
}
csum_replace4(&iph->check, addr, new_addr);
@@ -121,8 +119,6 @@ static int nf_flow_dnat_ip(const struct
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
- default:
- return -1;
}
csum_replace4(&iph->check, addr, new_addr);
@@ -371,8 +367,6 @@ static int nf_flow_snat_ipv6(const struc
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
ip6h->daddr = new_addr;
break;
- default:
- return -1;
}
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
@@ -396,8 +390,6 @@ static int nf_flow_dnat_ipv6(const struc
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
- default:
- return -1;
}
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);

View File

@ -0,0 +1,394 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Mar 2021 00:56:25 +0100
Subject: [PATCH] netfilter: flowtable: fast NAT functions never fail
Simplify existing fast NAT routines by returning void. After the
skb_try_make_writable() call consolidation, these routines cannot ever
fail.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -229,12 +229,12 @@ void nf_flow_table_free(struct nf_flowta
void flow_offload_teardown(struct flow_offload *flow);
-int nf_flow_snat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
-int nf_flow_dnat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
struct flow_ports {
__be16 source, dest;
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -389,20 +389,17 @@ static void nf_flow_offload_work_gc(stru
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
-
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
- __be16 port, __be16 new_port)
+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
{
struct tcphdr *tcph;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
-
- return 0;
}
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
- __be16 port, __be16 new_port)
+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
{
struct udphdr *udph;
@@ -413,30 +410,24 @@ static int nf_flow_nat_port_udp(struct s
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
-
- return 0;
}
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
- u8 protocol, __be16 port, __be16 new_port)
+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, __be16 port, __be16 new_port)
{
switch (protocol) {
case IPPROTO_TCP:
- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
- return NF_DROP;
+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
break;
case IPPROTO_UDP:
- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
- return NF_DROP;
+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
break;
}
-
- return 0;
}
-int nf_flow_snat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
{
struct flow_ports *hdr;
__be16 port, new_port;
@@ -456,13 +447,13 @@ int nf_flow_snat_port(const struct flow_
break;
}
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
}
EXPORT_SYMBOL_GPL(nf_flow_snat_port);
-int nf_flow_dnat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
+ unsigned int thoff, u8 protocol,
+ enum flow_offload_tuple_dir dir)
{
struct flow_ports *hdr;
__be16 port, new_port;
@@ -482,7 +473,7 @@ int nf_flow_dnat_port(const struct flow_
break;
}
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -34,19 +34,17 @@ static int nf_flow_state_check(struct fl
return 0;
}
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
- __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
{
struct tcphdr *tcph;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
-
- return 0;
}
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
- __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
{
struct udphdr *udph;
@@ -57,31 +55,25 @@ static int nf_flow_nat_ip_udp(struct sk_
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
-
- return 0;
}
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
- unsigned int thoff, __be32 addr,
- __be32 new_addr)
+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, __be32 addr,
+ __be32 new_addr)
{
switch (iph->protocol) {
case IPPROTO_TCP:
- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
break;
case IPPROTO_UDP:
- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
break;
}
-
- return 0;
}
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- struct iphdr *iph, unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ip(const struct flow_offload *flow,
+ struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
@@ -99,12 +91,12 @@ static int nf_flow_snat_ip(const struct
}
csum_replace4(&iph->check, addr, new_addr);
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- struct iphdr *iph, unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ip(const struct flow_offload *flow,
+ struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
@@ -122,24 +114,21 @@ static int nf_flow_dnat_ip(const struct
}
csum_replace4(&iph->check, addr, new_addr);
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
unsigned int thoff, enum flow_offload_tuple_dir dir,
struct iphdr *iph)
{
- if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
- return -1;
-
- if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
- return -1;
-
- return 0;
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
+ }
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
+ }
}
static bool ip_has_options(unsigned int thoff)
@@ -276,8 +265,7 @@ nf_flow_offload_ip_hook(void *priv, stru
return NF_DROP;
iph = ip_hdr(skb);
- if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
- return NF_DROP;
+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
ip_decrease_ttl(iph);
skb->tstamp = 0;
@@ -301,22 +289,21 @@ nf_flow_offload_ip_hook(void *priv, stru
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
- struct in6_addr *addr,
- struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr,
+ struct ipv6hdr *ip6h)
{
struct tcphdr *tcph;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
-
- return 0;
}
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
- struct in6_addr *addr,
- struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
{
struct udphdr *udph;
@@ -327,32 +314,26 @@ static int nf_flow_nat_ipv6_udp(struct s
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
-
- return 0;
}
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff, struct in6_addr *addr,
- struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
{
switch (ip6h->nexthdr) {
case IPPROTO_TCP:
- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
break;
case IPPROTO_UDP:
- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
break;
}
-
- return 0;
}
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
@@ -369,13 +350,13 @@ static int nf_flow_snat_ipv6(const struc
break;
}
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
@@ -392,27 +373,24 @@ static int nf_flow_dnat_ipv6(const struc
break;
}
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb,
- enum flow_offload_tuple_dir dir,
- struct ipv6hdr *ip6h)
+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir,
+ struct ipv6hdr *ip6h)
{
unsigned int thoff = sizeof(*ip6h);
- if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
- return -1;
-
- if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
- return -1;
-
- return 0;
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
+ }
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
+ }
}
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
@@ -507,8 +485,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
return NF_DROP;
ip6h = ipv6_hdr(skb);
- if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
- return NF_DROP;
+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
ip6h->hop_limit--;
skb->tstamp = 0;

View File

@ -0,0 +1,46 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Mar 2021 00:56:26 +0100
Subject: [PATCH] netfilter: flowtable: call dst_check() to fall back to
classic forwarding
In case the route is stale, pass up the packet to the classic forwarding
path for re-evaluation and schedule this flow entry for removal.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -197,14 +197,6 @@ static bool nf_flow_exceeds_mtu(const st
return true;
}
-static int nf_flow_offload_dst_check(struct dst_entry *dst)
-{
- if (unlikely(dst_xfrm(dst)))
- return dst_check(dst, 0) ? 0 : -1;
-
- return 0;
-}
-
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
const struct nf_hook_state *state,
struct dst_entry *dst)
@@ -256,7 +248,7 @@ nf_flow_offload_ip_hook(void *priv, stru
flow_offload_refresh(flow_table, flow);
- if (nf_flow_offload_dst_check(&rt->dst)) {
+ if (!dst_check(&rt->dst, 0)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
@@ -476,7 +468,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
flow_offload_refresh(flow_table, flow);
- if (nf_flow_offload_dst_check(&rt->dst)) {
+ if (!dst_check(&rt->dst, 0)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}

View File

@ -0,0 +1,49 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Mar 2021 00:56:27 +0100
Subject: [PATCH] netfilter: flowtable: refresh timeout after dst and
writable checks
Refresh the timeout (and retry hardware offload) once the skbuff dst
is confirmed to be current and after the skbuff is made writable.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -246,8 +246,6 @@ nf_flow_offload_ip_hook(void *priv, stru
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
- flow_offload_refresh(flow_table, flow);
-
if (!dst_check(&rt->dst, 0)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
@@ -256,6 +254,8 @@ nf_flow_offload_ip_hook(void *priv, stru
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
+ flow_offload_refresh(flow_table, flow);
+
iph = ip_hdr(skb);
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
@@ -466,8 +466,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
sizeof(*ip6h)))
return NF_ACCEPT;
- flow_offload_refresh(flow_table, flow);
-
if (!dst_check(&rt->dst, 0)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
@@ -476,6 +474,8 @@ nf_flow_offload_ipv6_hook(void *priv, st
if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
return NF_DROP;
+ flow_offload_refresh(flow_table, flow);
+
ip6h = ipv6_hdr(skb);
nf_flow_nat_ipv6(flow, skb, dir, ip6h);

View File

@ -0,0 +1,103 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Mar 2021 00:56:28 +0100
Subject: [PATCH] netfilter: nftables: update table flags from the commit
phase
Do not update table flags from the preparation phase. Store the flags
update into the transaction, then update the flags from the commit
phase.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1470,13 +1470,16 @@ struct nft_trans_chain {
struct nft_trans_table {
bool update;
- bool enable;
+ u8 state;
+ u32 flags;
};
#define nft_trans_table_update(trans) \
(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans) \
- (((struct nft_trans_table *)trans->data)->enable)
+#define nft_trans_table_state(trans) \
+ (((struct nft_trans_table *)trans->data)->state)
+#define nft_trans_table_flags(trans) \
+ (((struct nft_trans_table *)trans->data)->flags)
struct nft_trans_elem {
struct nft_set *set;
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -891,6 +891,12 @@ static void nf_tables_table_disable(stru
nft_table_disable(net, table, 0);
}
+enum {
+ NFT_TABLE_STATE_UNCHANGED = 0,
+ NFT_TABLE_STATE_DORMANT,
+ NFT_TABLE_STATE_WAKEUP
+};
+
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
@@ -914,19 +920,17 @@ static int nf_tables_updtable(struct nft
if ((flags & NFT_TABLE_F_DORMANT) &&
!(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
- nft_trans_table_enable(trans) = false;
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
ret = nf_tables_table_enable(ctx->net, ctx->table);
if (ret >= 0)
- nft_trans_table_enable(trans) = true;
- else
- ctx->table->flags |= NFT_TABLE_F_DORMANT;
+ nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
}
if (ret < 0)
goto err;
+ nft_trans_table_flags(trans) = flags;
nft_trans_table_update(trans) = true;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
@@ -7873,11 +7877,10 @@ static int nf_tables_commit(struct net *
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (!nft_trans_table_enable(trans)) {
- nf_tables_table_disable(net,
- trans->ctx.table);
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- }
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+ nf_tables_table_disable(net, trans->ctx.table);
+
+ trans->ctx.table->flags = nft_trans_table_flags(trans);
} else {
nft_clear(net, trans->ctx.table);
}
@@ -8090,11 +8093,9 @@ static int __nf_tables_abort(struct net
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (nft_trans_table_enable(trans)) {
- nf_tables_table_disable(net,
- trans->ctx.table);
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- }
+ if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+ nf_tables_table_disable(net, trans->ctx.table);
+
nft_trans_destroy(trans);
} else {
list_del_rcu(&trans->ctx.table->list);

View File

@ -1,5 +1,5 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Mar 2021 23:18:11 +0100
Date: Wed, 24 Mar 2021 02:30:32 +0100
Subject: [PATCH] net: resolve forwarding path from virtual netdevice and
HW destination address

View File

@ -1,5 +1,5 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Nov 2020 13:49:16 +0100
Date: Wed, 24 Mar 2021 02:30:33 +0100
Subject: [PATCH] net: 8021q: resolve forwarding path for vlan devices
Add .ndo_fill_forward_path for vlan devices.

View File

@ -1,6 +1,6 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Nov 2020 13:49:17 +0100
Subject: [PATCH] bridge: resolve forwarding path for bridge devices
Date: Wed, 24 Mar 2021 02:30:34 +0100
Subject: [PATCH] net: bridge: resolve forwarding path for bridge devices
Add .ndo_fill_forward_path for bridge devices.

View File

@ -1,7 +1,7 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 7 Dec 2020 20:31:45 +0100
Subject: [PATCH] net: bridge: resolve VLAN tag actions in forwarding
path for bridge devices
Date: Wed, 24 Mar 2021 02:30:35 +0100
Subject: [PATCH] net: bridge: resolve forwarding path for VLAN tag
actions in bridge devices
Depending on the VLAN settings of the bridge and the port, the bridge can
either add or remove a tag. When vlan filtering is enabled, the fdb lookup

View File

@ -1,9 +1,13 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 2 Mar 2021 21:45:16 +0100
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:36 +0100
Subject: [PATCH] net: ppp: resolve forwarding path for bridge pppoe
devices
Pass on the PPPoE session ID and the real device.
Pass on the PPPoE session ID, destination hardware address and the real
device.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/drivers/net/ppp/ppp_generic.c
@ -15,7 +19,7 @@ Pass on the PPPoE session ID and the real device.
+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+ struct ppp *ppp = netdev_priv(path->dev);
+ struct ppp *ppp = netdev_priv(ctx->dev);
+ struct ppp_channel *chan;
+ struct channel *pch;
+
@ -45,7 +49,7 @@ Pass on the PPPoE session ID and the real device.
static struct device_type ppp_type = {
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -972,8 +972,30 @@ static int pppoe_xmit(struct ppp_channel
@@ -972,8 +972,31 @@ static int pppoe_xmit(struct ppp_channel
return __pppoe_xmit(sk, skb);
}
@ -64,6 +68,7 @@ Pass on the PPPoE session ID and the real device.
+ path->type = DEV_PATH_PPPOE;
+ path->encap.proto = htons(ETH_P_PPP_SES);
+ path->encap.id = be16_to_cpu(po->num);
+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
+ path->dev = ctx->dev;
+ ctx->dev = dev;
+
@ -86,6 +91,14 @@ Pass on the PPPoE session ID and the real device.
};
struct net_device_path {
@@ -840,6 +841,7 @@ struct net_device_path {
struct {
u16 id;
__be16 proto;
+ u8 h_dest[ETH_ALEN];
} encap;
struct {
enum {
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -28,6 +28,9 @@ struct ppp_channel_ops {

View File

@ -1,8 +1,11 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 4 Mar 2021 23:19:06 +0100
Date: Wed, 24 Mar 2021 02:30:37 +0100
Subject: [PATCH] net: dsa: resolve forwarding path for dsa slave ports
Add .ndo_fill_forward_path for dsa slave port devices
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netdevice.h
@ -15,7 +18,7 @@ Add .ndo_fill_forward_path for dsa slave port devices
};
struct net_device_path {
@@ -851,6 +852,10 @@ struct net_device_path {
@@ -852,6 +853,10 @@ struct net_device_path {
u16 vlan_id;
__be16 vlan_proto;
} bridge;

View File

@ -1,5 +1,5 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Nov 2020 13:49:14 +0100
Date: Wed, 24 Mar 2021 02:30:38 +0100
Subject: [PATCH] netfilter: flowtable: add xmit path types
Add the xmit_type field that defines the two supported xmit paths in the
@ -12,8 +12,8 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -89,6 +89,11 @@ enum flow_offload_tuple_dir {
FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
};
#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
+enum flow_offload_xmit_type {
+ FLOW_OFFLOAD_XMIT_NEIGH = 0,
@ -55,30 +55,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
return 0;
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -220,10 +220,20 @@ static bool nf_flow_exceeds_mtu(const st
return true;
}
-static int nf_flow_offload_dst_check(struct dst_entry *dst)
+static inline struct dst_entry *
+nft_flow_dst(struct flow_offload_tuple_rhash *tuplehash)
{
- if (unlikely(dst_xfrm(dst)))
+ return tuplehash->tuple.dst_cache;
+}
+
+static int nf_flow_offload_dst_check(struct flow_offload_tuple_rhash *tuplehash)
+{
+ struct dst_entry *dst;
+
+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+ dst = nft_flow_dst(tuplehash);
return dst_check(dst, 0) ? 0 : -1;
+ }
return 0;
}
@@ -265,8 +275,6 @@ nf_flow_offload_ip_hook(void *priv, stru
@@ -235,8 +235,6 @@ nf_flow_offload_ip_hook(void *priv, stru
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
@ -87,16 +64,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
@@ -280,7 +288,7 @@ nf_flow_offload_ip_hook(void *priv, stru
flow_offload_refresh(flow_table, flow);
- if (nf_flow_offload_dst_check(&rt->dst)) {
+ if (nf_flow_offload_dst_check(tuplehash)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
@@ -295,13 +303,16 @@ nf_flow_offload_ip_hook(void *priv, stru
@@ -265,13 +263,16 @@ nf_flow_offload_ip_hook(void *priv, stru
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
@ -114,7 +82,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
skb_dst_set_noref(skb, &rt->dst);
@@ -506,8 +517,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
@@ -456,8 +457,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
@ -123,16 +91,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
@@ -518,7 +527,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
flow_offload_refresh(flow_table, flow);
- if (nf_flow_offload_dst_check(&rt->dst)) {
+ if (nf_flow_offload_dst_check(tuplehash)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
@@ -536,13 +545,16 @@ nf_flow_offload_ipv6_hook(void *priv, st
@@ -485,13 +484,16 @@ nf_flow_offload_ipv6_hook(void *priv, st
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);

View File

@ -1,5 +1,5 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Nov 2020 13:49:18 +0100
Date: Wed, 24 Mar 2021 02:30:39 +0100
Subject: [PATCH] netfilter: flowtable: use dev_fill_forward_path() to
obtain ingress device

View File

@ -1,5 +1,5 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Mar 2021 03:26:35 +0100
Date: Wed, 24 Mar 2021 02:30:40 +0100
Subject: [PATCH] netfilter: flowtable: use dev_fill_forward_path() to
obtain egress device
@ -133,7 +133,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
void flow_offload_free(struct flow_offload *flow)
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -248,6 +248,24 @@ static unsigned int nf_flow_xmit_xfrm(st
@@ -207,6 +207,24 @@ static unsigned int nf_flow_xmit_xfrm(st
return NF_STOLEN;
}
@ -158,25 +158,41 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -262,6 +280,7 @@ nf_flow_offload_ip_hook(void *priv, stru
unsigned int thoff;
@@ -222,6 +240,7 @@ nf_flow_offload_ip_hook(void *priv, stru
struct iphdr *iph;
__be32 nexthop;
u32 hdrsize;
+ int ret;
if (skb->protocol != htons(ETH_P_IP))
return NF_ACCEPT;
@@ -303,22 +322,32 @@ nf_flow_offload_ip_hook(void *priv, stru
@@ -244,9 +263,13 @@ nf_flow_offload_ip_hook(void *priv, stru
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
- if (!dst_check(&rt->dst, 0)) {
- flow_offload_teardown(flow);
- return NF_ACCEPT;
+ if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+ tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ if (!dst_check(&rt->dst, 0)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
}
if (skb_try_make_writable(skb, thoff + hdrsize))
@@ -263,8 +286,6 @@ nf_flow_offload_ip_hook(void *priv, stru
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
-
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
@@ -272,13 +293,23 @@ nf_flow_offload_ip_hook(void *priv, stru
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
@ -187,7 +203,6 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+ switch (tuplehash->tuple.xmit_type) {
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ outdev = rt->dst.dev;
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
@ -207,25 +222,41 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
@@ -504,6 +533,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
struct net_device *outdev;
@@ -444,6 +475,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
struct ipv6hdr *ip6h;
struct rt6_info *rt;
u32 hdrsize;
+ int ret;
if (skb->protocol != htons(ETH_P_IPV6))
return NF_ACCEPT;
@@ -545,21 +575,31 @@ nf_flow_offload_ipv6_hook(void *priv, st
@@ -465,9 +497,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
sizeof(*ip6h)))
return NF_ACCEPT;
- if (!dst_check(&rt->dst, 0)) {
- flow_offload_teardown(flow);
- return NF_ACCEPT;
+ if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+ tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ if (!dst_check(&rt->dst, 0)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
}
if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
@@ -484,8 +520,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
-
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
@@ -493,12 +527,22 @@ nf_flow_offload_ipv6_hook(void *priv, st
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
@ -236,7 +267,6 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+ switch (tuplehash->tuple.xmit_type) {
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ outdev = rt->dst.dev;
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);

View File

@ -1,11 +1,15 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Nov 2020 13:49:20 +0100
Date: Wed, 24 Mar 2021 02:30:41 +0100
Subject: [PATCH] netfilter: flowtable: add vlan support
Add the vlan id and protocol to the flow tuple to uniquely identify
flows from the receive path. For the transmit path, dev_hard_header() on
the vlan device push the headers. This patch includes support for two
VLAN headers (QinQ) from the ingress path.
vlan headers (QinQ) from the ingress path.
Add a generic encap field to the flowtable entry which stores the
protocol and the tag id. This allows to reuse these fields in the PPPoE
support coming in a later patch.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
@ -79,7 +83,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
case FLOW_OFFLOAD_XMIT_DIRECT:
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -159,17 +159,38 @@ static bool ip_has_options(unsigned int
@@ -136,23 +136,44 @@ static bool ip_has_options(unsigned int
return thoff != sizeof(struct iphdr);
}
@ -102,41 +106,46 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+}
+
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
- struct flow_offload_tuple *tuple, u32 *hdrsize)
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
+ u32 offset)
{
- unsigned int thoff, hdrsize;
+ unsigned int thoff, hdrsize, offset = 0;
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
- if (!pskb_may_pull(skb, sizeof(*iph)))
+ if (skb->protocol == htons(ETH_P_8021Q))
+ offset += VLAN_HLEN;
+
+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
return -1;
- iph = ip_hdr(skb);
- thoff = iph->ihl * 4;
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
thoff = iph->ihl * 4;
+ thoff = (iph->ihl * 4);
if (ip_is_fragment(iph) ||
@@ -191,11 +212,11 @@ static int nf_flow_tuple_ip(struct sk_bu
unlikely(ip_has_options(thoff)))
return -1;
thoff = iph->ihl * 4;
- if (!pskb_may_pull(skb, thoff + hdrsize))
+ if (!pskb_may_pull(skb, thoff + hdrsize + offset))
+ thoff += offset;
+
switch (iph->protocol) {
case IPPROTO_TCP:
*hdrsize = sizeof(struct tcphdr);
@@ -167,11 +188,10 @@ static int nf_flow_tuple_ip(struct sk_bu
if (iph->ttl <= 1)
return -1;
- thoff = iph->ihl * 4;
if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
- iph = ip_hdr(skb);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff + offset);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
@@ -204,6 +225,7 @@ static int nf_flow_tuple_ip(struct sk_bu
@@ -181,6 +201,7 @@ static int nf_flow_tuple_ip(struct sk_bu
tuple->l3proto = AF_INET;
tuple->l4proto = iph->protocol;
tuple->iifidx = dev->ifindex;
@ -144,18 +153,21 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
return 0;
}
@@ -248,6 +270,40 @@ static unsigned int nf_flow_xmit_xfrm(st
@@ -207,6 +228,43 @@ static unsigned int nf_flow_xmit_xfrm(st
return NF_STOLEN;
}
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto)
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+ u32 *offset)
+{
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veth;
+
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ if (veth->h_vlan_encapsulated_proto == proto)
+ if (veth->h_vlan_encapsulated_proto == proto) {
+ *offset += VLAN_HLEN;
+ return true;
+ }
+ }
+
+ return false;
@ -185,25 +197,30 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
const struct flow_offload_tuple_rhash *tuplehash,
unsigned short type)
@@ -276,13 +332,15 @@ nf_flow_offload_ip_hook(void *priv, stru
@@ -235,17 +293,18 @@ nf_flow_offload_ip_hook(void *priv, stru
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
+ u32 hdrsize, offset = 0;
+ unsigned int thoff, mtu;
struct rtable *rt;
- unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
+ u32 offset = 0;
- u32 hdrsize;
int ret;
- if (skb->protocol != htons(ETH_P_IP))
+ if (skb->protocol != htons(ETH_P_IP) &&
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP)))
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
return NF_ACCEPT;
if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
@@ -295,14 +353,19 @@ nf_flow_offload_ip_hook(void *priv, stru
- if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -255,11 +314,12 @@ nf_flow_offload_ip_hook(void *priv, stru
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
@ -212,45 +229,38 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return NF_ACCEPT;
- if (skb_try_make_writable(skb, sizeof(*iph)))
+ if (skb->protocol == htons(ETH_P_8021Q))
+ offset += VLAN_HLEN;
+
+ if (skb_try_make_writable(skb, sizeof(*iph) + offset))
return NF_DROP;
- thoff = ip_hdr(skb)->ihl * 4;
- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
- iph = ip_hdr(skb);
- thoff = iph->ihl * 4;
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ thoff = (iph->ihl * 4) + offset;
+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
@@ -277,6 +337,9 @@ nf_flow_offload_ip_hook(void *priv, stru
flow_offload_refresh(flow_table, flow);
@@ -312,6 +375,9 @@ nf_flow_offload_ip_hook(void *priv, stru
return NF_ACCEPT;
}
+ nf_flow_encap_pop(skb, tuplehash);
+ thoff -= offset;
+
if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
iph = ip_hdr(skb);
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
@@ -418,16 +481,18 @@ static void nf_flow_nat_ipv6(const struc
}
@@ -479,14 +545,17 @@ static int nf_flow_nat_ipv6(const struct
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
- struct flow_offload_tuple *tuple, u32 *hdrsize)
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
+ u32 offset)
{
- unsigned int thoff, hdrsize;
+ unsigned int thoff, hdrsize, offset = 0;
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ if (skb->protocol == htons(ETH_P_8021Q))
+ offset += VLAN_HLEN;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h) + offset))
+ thoff = sizeof(*ip6h) + offset;
+ if (!pskb_may_pull(skb, thoff))
return -1;
- ip6h = ipv6_hdr(skb);
@ -258,22 +268,20 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
switch (ip6h->nexthdr) {
case IPPROTO_TCP:
@@ -503,11 +572,11 @@ static int nf_flow_tuple_ipv6(struct sk_
@@ -443,11 +508,10 @@ static int nf_flow_tuple_ipv6(struct sk_
if (ip6h->hop_limit <= 1)
return -1;
thoff = sizeof(*ip6h);
- if (!pskb_may_pull(skb, thoff + hdrsize))
+ if (!pskb_may_pull(skb, thoff + hdrsize + offset))
- thoff = sizeof(*ip6h);
if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
- ip6h = ipv6_hdr(skb);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff + offset);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
@@ -516,6 +585,7 @@ static int nf_flow_tuple_ipv6(struct sk_
@@ -457,6 +521,7 @@ static int nf_flow_tuple_ipv6(struct sk_
tuple->l3proto = AF_INET6;
tuple->l4proto = ip6h->nexthdr;
tuple->iifidx = dev->ifindex;
@ -281,21 +289,28 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
return 0;
}
@@ -533,9 +603,12 @@ nf_flow_offload_ipv6_hook(void *priv, st
@@ -472,15 +537,17 @@ nf_flow_offload_ipv6_hook(void *priv, st
const struct in6_addr *nexthop;
struct flow_offload *flow;
struct net_device *outdev;
+ unsigned int thoff, mtu;
+ u32 hdrsize, offset = 0;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
+ unsigned int mtu;
+ u32 offset = 0;
- u32 hdrsize;
int ret;
- if (skb->protocol != htons(ETH_P_IPV6))
+ if (skb->protocol != htons(ETH_P_IPV6) &&
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6)))
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
return NF_ACCEPT;
if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
@@ -548,11 +621,15 @@ nf_flow_offload_ipv6_hook(void *priv, st
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -490,11 +557,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
@ -306,22 +321,26 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
- sizeof(*ip6h)))
+ if (skb->protocol == htons(ETH_P_8021Q))
+ offset += VLAN_HLEN;
+
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, sizeof(*ip6h)))
+ thoff = sizeof(*ip6h) + offset;
+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT;
flow_offload_refresh(flow_table, flow);
@@ -562,6 +639,8 @@ nf_flow_offload_ipv6_hook(void *priv, st
return NF_ACCEPT;
if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
@@ -506,11 +575,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
}
}
- if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
+ if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
flow_offload_refresh(flow_table, flow);
+ nf_flow_encap_pop(skb, tuplehash);
+
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;
ip6h = ipv6_hdr(skb);
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c

View File

@ -1,7 +1,6 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 24 Jan 2021 18:01:34 +0100
Subject: [PATCH] netfilter: nft_flow_offload: add bridge vlan filtering
support
Date: Wed, 24 Mar 2021 02:30:42 +0100
Subject: [PATCH] netfilter: flowtable: add bridge vlan filtering support
Add the vlan tag based when PVID is set on.

View File

@ -0,0 +1,145 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:43 +0100
Subject: [PATCH] netfilter: flowtable: add pppoe support
Add the PPPoE protocol and session id to the flow tuple using the encap
fields to uniquely identify flows from the receive path. For the
transmit path, dev_hard_header() on the vlan device push the headers.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -7,6 +7,9 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -139,6 +142,8 @@ static bool ip_has_options(unsigned int
static void nf_flow_tuple_encap(struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
+ struct vlan_ethhdr *veth;
+ struct pppoe_hdr *phdr;
int i = 0;
if (skb_vlan_tag_present(skb)) {
@@ -146,11 +151,17 @@ static void nf_flow_tuple_encap(struct s
tuple->encap[i].proto = skb->vlan_proto;
i++;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
-
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
tuple->encap[i].proto = skb->protocol;
+ break;
+ case htons(ETH_P_PPP_SES):
+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ tuple->encap[i].id = ntohs(phdr->sid);
+ tuple->encap[i].proto = skb->protocol;
+ break;
}
}
@@ -228,17 +239,41 @@ static unsigned int nf_flow_xmit_xfrm(st
return NF_STOLEN;
}
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+{
+ __be16 proto;
+
+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+ sizeof(struct pppoe_hdr)));
+ switch (proto) {
+ case htons(PPP_IP):
+ return htons(ETH_P_IP);
+ case htons(PPP_IPV6):
+ return htons(ETH_P_IPV6);
+ }
+
+ return 0;
+}
+
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
u32 *offset)
{
- if (skb->protocol == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veth;
+ struct vlan_ethhdr *veth;
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto) {
*offset += VLAN_HLEN;
return true;
}
+ break;
+ case htons(ETH_P_PPP_SES):
+ if (nf_flow_pppoe_proto(skb) == proto) {
+ *offset += PPPOE_SES_HLEN;
+ return true;
+ }
+ break;
}
return false;
@@ -255,12 +290,18 @@ static void nf_flow_encap_pop(struct sk_
__vlan_hwaccel_clear_tag(skb);
continue;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
vlan_hdr = (struct vlan_hdr *)skb->data;
__skb_pull(skb, VLAN_HLEN);
vlan_set_encap_proto(skb, vlan_hdr);
skb_reset_network_header(skb);
break;
+ case htons(ETH_P_PPP_SES):
+ skb->protocol = nf_flow_pppoe_proto(skb);
+ skb_pull(skb, PPPOE_SES_HLEN);
+ skb_reset_network_header(skb);
+ break;
}
}
}
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -90,6 +90,7 @@ static void nft_dev_path_info(const stru
switch (path->type) {
case DEV_PATH_ETHERNET:
case DEV_PATH_VLAN:
+ case DEV_PATH_PPPOE:
info->indev = path->dev;
if (is_zero_ether_addr(info->h_source))
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
@@ -97,7 +98,7 @@ static void nft_dev_path_info(const stru
if (path->type == DEV_PATH_ETHERNET)
break;
- /* DEV_PATH_VLAN */
+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
info->indev = NULL;
break;
@@ -106,6 +107,8 @@ static void nft_dev_path_info(const stru
info->encap[info->num_encaps].id = path->encap.id;
info->encap[info->num_encaps].proto = path->encap.proto;
info->num_encaps++;
+ if (path->type == DEV_PATH_PPPOE)
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
break;
case DEV_PATH_BRIDGE:
if (is_zero_ether_addr(info->h_source))

View File

@ -1,8 +1,10 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Mar 2021 19:22:55 +0100
Subject: [PATCH] netfilter: nft_flow_offload: add dsa support
Date: Wed, 24 Mar 2021 02:30:44 +0100
Subject: [PATCH] netfilter: flowtable: add dsa support
Replace the master ethernet device by the dsa slave port.
Replace the master ethernet device by the dsa slave port. Packets coming
in from the software ingress path use the dsa slave port as input
device.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---

View File

@ -1,14 +1,14 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Nov 2020 13:49:21 +0100
Subject: [PATCH] selftests: netfilter: flowtable bridge and VLAN support
Date: Wed, 24 Mar 2021 02:30:45 +0100
Subject: [PATCH] selftests: netfilter: flowtable bridge and vlan support
This patch adds two new tests to cover bridge and VLAN support:
This patch adds two new tests to cover bridge and vlan support:
- Add a bridge device to the Router1 (nsr1) container and attach the
veth0 device to the bridge. Set the IP address to the bridge device
to exercise the bridge forwarding path.
- Add VLAN encapsulation between to the bridge device in the Router1 and
- Add vlan encapsulation between to the bridge device in the Router1 and
one of the sender containers (ns1).
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

View File

@ -1,5 +1,5 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 7 Dec 2020 20:31:44 +0100
Date: Wed, 24 Mar 2021 02:30:46 +0100
Subject: [PATCH] netfilter: flowtable: add offload support for xmit path
types
@ -9,11 +9,13 @@ need to be used.
This patch also adds the FLOW_ACTION_VLAN_PUSH action to pass the VLAN
tag to the driver.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -175,28 +175,45 @@ static int flow_offload_eth_src(struct n
@@ -177,28 +177,45 @@ static int flow_offload_eth_src(struct n
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@ -67,7 +69,7 @@ tag to the driver.
return 0;
}
@@ -208,27 +225,40 @@ static int flow_offload_eth_dst(struct n
@@ -210,27 +227,40 @@ static int flow_offload_eth_dst(struct n
{
struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
@ -120,7 +122,7 @@ tag to the driver.
}
mask = ~0xffffffff;
@@ -241,7 +271,6 @@ static int flow_offload_eth_dst(struct n
@@ -243,7 +273,6 @@ static int flow_offload_eth_dst(struct n
val = val16;
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
&val, &mask);
@ -128,7 +130,7 @@ tag to the driver.
return 0;
}
@@ -463,27 +492,52 @@ static void flow_offload_ipv4_checksum(s
@@ -465,27 +494,52 @@ static void flow_offload_ipv4_checksum(s
}
}
@ -188,7 +190,7 @@ tag to the driver.
if (dst && dst->lwtstate) {
struct ip_tunnel_info *tun_info;
@@ -500,10 +554,15 @@ static void flow_offload_decap_tunnel(co
@@ -502,10 +556,15 @@ static void flow_offload_decap_tunnel(co
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@ -205,7 +207,7 @@ tag to the driver.
if (dst && dst->lwtstate) {
struct ip_tunnel_info *tun_info;
@@ -515,10 +574,14 @@ static void flow_offload_decap_tunnel(co
@@ -517,10 +576,14 @@ static void flow_offload_decap_tunnel(co
}
}
@ -223,7 +225,7 @@ tag to the driver.
flow_offload_decap_tunnel(flow, dir, flow_rule);
flow_offload_encap_tunnel(flow, dir, flow_rule);
@@ -526,6 +589,26 @@ int nf_flow_rule_route_ipv4(struct net *
@@ -528,6 +591,26 @@ int nf_flow_rule_route_ipv4(struct net *
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
return -1;
@ -250,7 +252,7 @@ tag to the driver.
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
flow_offload_ipv4_snat(net, flow, dir, flow_rule);
flow_offload_port_snat(net, flow, dir, flow_rule);
@@ -538,7 +621,7 @@ int nf_flow_rule_route_ipv4(struct net *
@@ -540,7 +623,7 @@ int nf_flow_rule_route_ipv4(struct net *
test_bit(NF_FLOW_DNAT, &flow->flags))
flow_offload_ipv4_checksum(net, flow, flow_rule);
@ -259,7 +261,7 @@ tag to the driver.
return 0;
}
@@ -548,11 +631,7 @@ int nf_flow_rule_route_ipv6(struct net *
@@ -550,11 +633,7 @@ int nf_flow_rule_route_ipv6(struct net *
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@ -272,7 +274,7 @@ tag to the driver.
return -1;
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
@@ -564,7 +643,7 @@ int nf_flow_rule_route_ipv6(struct net *
@@ -566,7 +645,7 @@ int nf_flow_rule_route_ipv6(struct net *
flow_offload_port_dnat(net, flow, dir, flow_rule);
}
@ -281,7 +283,7 @@ tag to the driver.
return 0;
}
@@ -578,10 +657,10 @@ nf_flow_offload_rule_alloc(struct net *n
@@ -580,10 +659,10 @@ nf_flow_offload_rule_alloc(struct net *n
enum flow_offload_tuple_dir dir)
{
const struct nf_flowtable *flowtable = offload->flowtable;
@ -294,7 +296,7 @@ tag to the driver.
int err = -ENOMEM;
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
@@ -597,7 +676,10 @@ nf_flow_offload_rule_alloc(struct net *n
@@ -599,7 +678,10 @@ nf_flow_offload_rule_alloc(struct net *n
flow_rule->rule->match.key = &flow_rule->match.key;
tuple = &flow->tuplehash[dir].tuple;

View File

@ -1,10 +1,16 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Mar 2021 19:24:11 +0100
Date: Wed, 24 Mar 2021 02:30:47 +0100
Subject: [PATCH] netfilter: nft_flow_offload: use direct xmit if
hardware offload is enabled
If there is a forward path to reach an ethernet device and hardware
offload is enabled, then use the direct xmit path.
Moreover, store the real device in the direct xmit path info since
software datapath uses dev_hard_header() to push the layer encapsulation
headers while hardware offload refers to the real device.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
@ -37,7 +43,7 @@ offload is enabled, then use the direct xmit path.
case FLOW_OFFLOAD_XMIT_NEIGH:
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -506,7 +506,7 @@ static void flow_offload_redirect(struct
@@ -508,7 +508,7 @@ static void flow_offload_redirect(struct
switch (this_tuple->xmit_type) {
case FLOW_OFFLOAD_XMIT_DIRECT:
this_tuple = &flow->tuplehash[dir].tuple;

View File

@ -1,12 +1,13 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 8 Mar 2021 12:06:44 +0100
Subject: [PATCH] netfilter: nf_flow_table: fix untagging with
hardware-offloaded bridge vlan_filtering
Date: Wed, 24 Mar 2021 02:30:48 +0100
Subject: [PATCH] netfilter: flowtable: bridge vlan hardware offload and
switchdev
When switchdev offloading is enabled, treat an untagged VLAN as tagged for
ingress only
The switch might have already added the VLAN tag through PVID hardware
offload. Keep this extra VLAN in the flowtable but skip it on egress.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netdevice.h
@ -78,7 +79,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -592,8 +592,12 @@ nf_flow_rule_route_common(struct net *ne
@@ -594,8 +594,12 @@ nf_flow_rule_route_common(struct net *ne
other_tuple = &flow->tuplehash[!dir].tuple;
for (i = 0; i < other_tuple->encap_num; i++) {

View File

@ -1,7 +1,11 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 2 Mar 2021 00:51:31 +0100
Date: Wed, 24 Mar 2021 02:30:49 +0100
Subject: [PATCH] net: flow_offload: add FLOW_ACTION_PPPOE_PUSH
Add an action to represent the PPPoE hardware offload support that
includes the session ID.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/flow_offload.h

View File

@ -1,13 +1,17 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 2 Mar 2021 01:01:50 +0100
Date: Wed, 24 Mar 2021 02:30:50 +0100
Subject: [PATCH] netfilter: flowtable: support for
FLOW_ACTION_PPPOE_PUSH
Add a PPPoE push action if layer 2 protocol is ETH_P_PPP_SES to add
PPPoE flowtable hardware offload support.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -598,9 +598,18 @@ nf_flow_rule_route_common(struct net *ne
@@ -600,9 +600,18 @@ nf_flow_rule_route_common(struct net *ne
continue;
entry = flow_action_entry_next(flow_rule);

View File

@ -1,5 +1,5 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 18 Jan 2021 22:39:17 +0100
Date: Wed, 24 Mar 2021 02:30:51 +0100
Subject: [PATCH] dsa: slave: add support for TC_SETUP_FT
The dsa infrastructure provides a well-defined hierarchy of devices,

View File

@ -1,11 +1,12 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 13 Sep 2020 08:17:02 +0200
Date: Wed, 24 Mar 2021 02:30:52 +0100
Subject: [PATCH] net: ethernet: mtk_eth_soc: fix parsing packets in GDM
When using DSA, set the special tag in GDM ingress control to allow the MAC
to parse packets properly earlier. This affects rx DMA source port reporting.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@ -18,7 +19,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#include "mtk_eth_soc.h"
@@ -1263,13 +1264,12 @@ static int mtk_poll_rx(struct napi_struc
@@ -1264,13 +1265,12 @@ static int mtk_poll_rx(struct napi_struc
break;
/* find out which mac the packet come from. values start at 1 */
@ -37,7 +38,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
!eth->netdev[mac]))
@@ -2251,6 +2251,9 @@ static void mtk_gdm_config(struct mtk_et
@@ -2233,6 +2233,9 @@ static void mtk_gdm_config(struct mtk_et
val |= config;
@ -49,7 +50,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/* Reset and enable PSE */
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -82,6 +82,7 @@
@@ -81,6 +81,7 @@
/* GDM Exgress Control Register */
#define MTK_GDMA_FWD_CFG(x) (0x500 + (x * 0x1000))
@ -57,7 +58,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#define MTK_GDMA_ICS_EN BIT(22)
#define MTK_GDMA_TCS_EN BIT(21)
#define MTK_GDMA_UCS_EN BIT(20)
@@ -311,6 +312,7 @@
@@ -304,6 +305,7 @@
#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
#define RX_DMA_FPORT_SHIFT 19
#define RX_DMA_FPORT_MASK 0x7

View File

@ -1,6 +1,6 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 11 Oct 2020 22:23:08 +0200
Subject: [PATCH] ethernet: mediatek: mtk_eth_soc: add support for
Date: Wed, 24 Mar 2021 02:30:53 +0100
Subject: [PATCH] net: ethernet: mtk_eth_soc: add support for
initializing the PPE
The PPE (packet processing engine) is used to offload NAT/routed or even
@ -9,6 +9,7 @@ hash. It also contains some functionality that will be used to bring up
flow offloading.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
@ -26,7 +27,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2284,12 +2284,17 @@ static int mtk_open(struct net_device *d
@@ -2258,12 +2258,17 @@ static int mtk_open(struct net_device *d
/* we run 2 netdevs on the same dma ring so we only bring it up once */
if (!refcount_read(&eth->dma_refcnt)) {
@ -46,7 +47,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
napi_enable(&eth->tx_napi);
napi_enable(&eth->rx_napi);
@@ -2359,6 +2364,9 @@ static int mtk_stop(struct net_device *d
@@ -2330,6 +2335,9 @@ static int mtk_stop(struct net_device *d
mtk_dma_free(eth);
@ -56,7 +57,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
return 0;
}
@@ -3148,6 +3156,13 @@ static int mtk_probe(struct platform_dev
@@ -3058,6 +3066,13 @@ static int mtk_probe(struct platform_dev
goto err_free_dev;
}
@ -70,7 +71,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
continue;
@@ -3222,6 +3237,7 @@ static const struct mtk_soc_data mt7621_
@@ -3132,6 +3147,7 @@ static const struct mtk_soc_data mt7621_
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7621_CLKS_BITMAP,
.required_pctl = false,
@ -78,7 +79,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
};
static const struct mtk_soc_data mt7622_data = {
@@ -3230,6 +3246,7 @@ static const struct mtk_soc_data mt7622_
@@ -3140,6 +3156,7 @@ static const struct mtk_soc_data mt7622_
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7622_CLKS_BITMAP,
.required_pctl = false,
@ -88,15 +89,15 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
static const struct mtk_soc_data mt7623_data = {
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -16,6 +16,7 @@
@@ -15,6 +15,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/refcount.h>
#include <linux/phylink.h>
#include <linux/dim.h>
+#include "mtk_ppe.h"
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
@@ -87,6 +88,7 @@
@@ -86,6 +87,7 @@
#define MTK_GDMA_TCS_EN BIT(21)
#define MTK_GDMA_UCS_EN BIT(20)
#define MTK_GDMA_TO_PDMA 0x0
@ -104,7 +105,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#define MTK_GDMA_DROP_ALL 0x7777
/* Unicast Filter MAC Address Register - Low */
@@ -308,6 +310,12 @@
@@ -301,6 +303,12 @@
#define RX_DMA_VID(_x) ((_x) & 0xfff)
/* QDMA descriptor rxd4 */
@ -117,7 +118,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#define RX_DMA_L4_VALID BIT(24)
#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
#define RX_DMA_FPORT_SHIFT 19
@@ -807,6 +815,7 @@ struct mtk_soc_data {
@@ -798,6 +806,7 @@ struct mtk_soc_data {
u32 caps;
u32 required_clks;
bool required_pctl;
@ -125,7 +126,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
netdev_features_t hw_features;
};
@@ -918,6 +927,8 @@ struct mtk_eth {
@@ -897,6 +906,8 @@ struct mtk_eth {
u32 tx_int_status_reg;
u32 rx_dma_l4_valid;
int ip_align;
@ -185,7 +186,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+{
+ unsigned long timeout = jiffies + HZ;
+
+ while (time_is_after_jiffies(timeout)) {
+ while (time_is_before_jiffies(timeout)) {
+ if (!(ppe_r32(ppe, MTK_PPE_GLO_CFG) & MTK_PPE_GLO_CFG_BUSY))
+ return 0;
+

View File

@ -1,9 +1,9 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 10 Dec 2020 12:19:18 +0100
Date: Wed, 24 Mar 2021 02:30:54 +0100
Subject: [PATCH] net: ethernet: mtk_eth_soc: add flow offloading support
This adds support for offloading IPv4 routed flows, including SNAT/DNAT,
one VLAN, and DSA.
one VLAN, PPPoE and DSA.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
@ -21,7 +21,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2896,6 +2896,7 @@ static const struct net_device_ops mtk_n
@@ -2813,6 +2813,7 @@ static const struct net_device_ops mtk_n
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = mtk_poll_controller,
#endif
@ -29,7 +29,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
};
static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
@@ -3161,6 +3162,10 @@ static int mtk_probe(struct platform_dev
@@ -3071,6 +3072,10 @@ static int mtk_probe(struct platform_dev
eth->base + MTK_ETH_PPE_BASE, 2);
if (err)
goto err_free_dev;
@ -42,15 +42,15 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
for (i = 0; i < MTK_MAX_DEVS; i++) {
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -16,6 +16,7 @@
@@ -15,6 +15,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/refcount.h>
#include <linux/phylink.h>
#include <linux/dim.h>
+#include <linux/rhashtable.h>
#include "mtk_ppe.h"
#define MTK_QDMA_PAGE_SIZE 2048
@@ -41,7 +42,8 @@
@@ -40,7 +41,8 @@
NETIF_F_HW_VLAN_CTAG_RX | \
NETIF_F_SG | NETIF_F_TSO | \
NETIF_F_TSO6 | \
@ -60,7 +60,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
#define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
#define NEXT_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1))
@@ -929,6 +931,7 @@ struct mtk_eth {
@@ -908,6 +910,7 @@ struct mtk_eth {
int ip_align;
struct mtk_ppe ppe;
@ -68,7 +68,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
};
/* struct mtk_mac - the structure that holds the info about the MACs of the
@@ -973,4 +976,9 @@ int mtk_gmac_sgmii_path_setup(struct mtk
@@ -952,4 +955,9 @@ int mtk_gmac_sgmii_path_setup(struct mtk
int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
@ -80,7 +80,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
#endif /* MTK_ETH_H */
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -0,0 +1,491 @@
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
@ -220,12 +220,6 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+ if (IS_ERR(dp))
+ return -ENODEV;
+
+ if (!dp->cpu_dp)
+ return -ENODEV;
+
+ if (!dp->cpu_dp->tag_ops)
+ return -ENODEV;
+
+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
+ return -ENODEV;
+

View File

@ -0,0 +1,236 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:55 +0100
Subject: [PATCH] docs: nf_flowtable: update documentation with
enhancements
This patch updates the flowtable documentation to describe recent
enhancements:
- Offload action is available after the first packets go through the
classic forwarding path.
- IPv4 and IPv6 are supported. Only TCP and UDP layer 4 are supported at
this stage.
- Tuple has been augmented to track VLAN id and PPPoE session id.
- Bridge and IP forwarding integration, including bridge VLAN filtering
support.
- Hardware offload support.
- Describe the [OFFLOAD] and [HW_OFFLOAD] tags in the conntrack table
listing.
- Replace 'flow offload' by 'flow add' in example rulesets (preferred
syntax).
- Describe existing cache limitations.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/Documentation/networking/nf_flowtable.rst
+++ b/Documentation/networking/nf_flowtable.rst
@@ -4,35 +4,38 @@
Netfilter's flowtable infrastructure
====================================
-This documentation describes the software flowtable infrastructure available in
-Netfilter since Linux kernel 4.16.
+This documentation describes the Netfilter flowtable infrastructure which allows
+you to define a fastpath through the flowtable datapath. This infrastructure
+also provides hardware offload support. The flowtable supports for the layer 3
+IPv4 and IPv6 and the layer 4 TCP and UDP protocols.
Overview
--------
-Initial packets follow the classic forwarding path, once the flow enters the
-established state according to the conntrack semantics (ie. we have seen traffic
-in both directions), then you can decide to offload the flow to the flowtable
-from the forward chain via the 'flow offload' action available in nftables.
-
-Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
-output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
-path (the visible effect is that you do not see these packets from any of the
-netfilter hooks coming after the ingress). In case of flowtable miss, the packet
-follows the classic forward path.
-
-The flowtable uses a resizable hashtable, lookups are based on the following
-7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
-and destination ports and the input interface (useful in case there are several
-conntrack zones in place).
-
-Flowtables are populated via the 'flow offload' nftables action, so the user can
-selectively specify what flows are placed into the flow table. Hence, packets
-follow the classic forwarding path unless the user explicitly instruct packets
-to use this new alternative forwarding path via nftables policy.
+Once the first packet of the flow successfully goes through the IP forwarding
+path, from the second packet on, you might decide to offload the flow to the
+flowtable through your ruleset. The flowtable infrastructure provides a rule
+action that allows you to specify when to add a flow to the flowtable.
+
+A packet that finds a matching entry in the flowtable (ie. flowtable hit) is
+transmitted to the output netdevice via neigh_xmit(), hence, packets bypass the
+classic IP forwarding path (the visible effect is that you do not see these
+packets from any of the Netfilter hooks coming after ingress). In case that
+there is no matching entry in the flowtable (ie. flowtable miss), the packet
+follows the classic IP forwarding path.
+
+The flowtable uses a resizable hashtable. Lookups are based on the following
+n-tuple selectors: layer 2 protocol encapsulation (VLAN and PPPoE), layer 3
+source and destination, layer 4 source and destination ports and the input
+interface (useful in case there are several conntrack zones in place).
+
+The 'flow add' action allows you to populate the flowtable, the user selectively
+specifies what flows are placed into the flowtable. Hence, packets follow the
+classic IP forwarding path unless the user explicitly instruct flows to use this
+new alternative forwarding path via policy.
-This is represented in Fig.1, which describes the classic forwarding path
-including the Netfilter hooks and the flowtable fastpath bypass.
+The flowtable datapath is represented in Fig.1, which describes the classic IP
+forwarding path including the Netfilter hooks and the flowtable fastpath bypass.
::
@@ -67,11 +70,13 @@ including the Netfilter hooks and the fl
Fig.1 Netfilter hooks and flowtable interactions
The flowtable entry also stores the NAT configuration, so all packets are
-mangled according to the NAT policy that matches the initial packets that went
-through the classic forwarding path. The TTL is decremented before calling
-neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
-path given that the transport selectors are missing, therefore flowtable lookup
-is not possible.
+mangled according to the NAT policy that is specified from the classic IP
+forwarding path. The TTL is decremented before calling neigh_xmit(). Fragmented
+traffic is passed up to follow the classic IP forwarding path given that the
+transport header is missing, in this case, flowtable lookups are not possible.
+TCP RST and FIN packets are also passed up to the classic IP forwarding path to
+release the flow gracefully. Packets that exceed the MTU are also passed up to
+the classic forwarding path to report packet-too-big ICMP errors to the sender.
Example configuration
---------------------
@@ -85,7 +90,7 @@ flowtable and add one rule to your forwa
}
chain y {
type filter hook forward priority 0; policy accept;
- ip protocol tcp flow offload @f
+ ip protocol tcp flow add @f
counter packets 0 bytes 0
}
}
@@ -103,6 +108,117 @@ flow is offloaded, you will observe that
does not get updated for the packets that are being forwarded through the
forwarding bypass.
+You can identify offloaded flows through the [OFFLOAD] tag when listing your
+connection tracking table.
+
+::
+ # conntrack -L
+ tcp 6 src=10.141.10.2 dst=192.168.10.2 sport=52728 dport=5201 src=192.168.10.2 dst=192.168.10.1 sport=5201 dport=52728 [OFFLOAD] mark=0 use=2
+
+
+Layer 2 encapsulation
+---------------------
+
+Since Linux kernel 5.13, the flowtable infrastructure discovers the real
+netdevice behind VLAN and PPPoE netdevices. The flowtable software datapath
+parses the VLAN and PPPoE layer 2 headers to extract the ethertype and the
+VLAN ID / PPPoE session ID which are used for the flowtable lookups. The
+flowtable datapath also deals with layer 2 decapsulation.
+
+You do not need to add the PPPoE and the VLAN devices to your flowtable,
+instead the real device is sufficient for the flowtable to track your flows.
+
+Bridge and IP forwarding
+------------------------
+
+Since Linux kernel 5.13, you can add bridge ports to the flowtable. The
+flowtable infrastructure discovers the topology behind the bridge device. This
+allows the flowtable to define a fastpath bypass between the bridge ports
+(represented as eth1 and eth2 in the example figure below) and the gateway
+device (represented as eth0) in your switch/router.
+
+::
+ fastpath bypass
+ .-------------------------.
+ / \
+ | IP forwarding |
+ | / \ \/
+ | br0 eth0 ..... eth0
+ . / \ *host B*
+ -> eth1 eth2
+ . *switch/router*
+ .
+ .
+ eth0
+ *host A*
+
+The flowtable infrastructure also supports for bridge VLAN filtering actions
+such as PVID and untagged. You can also stack a classic VLAN device on top of
+your bridge port.
+
+If you would like that your flowtable defines a fastpath between your bridge
+ports and your IP forwarding path, you have to add your bridge ports (as
+represented by the real netdevice) to your flowtable definition.
+
+Counters
+--------
+
+The flowtable can synchronize packet and byte counters with the existing
+connection tracking entry by specifying the counter statement in your flowtable
+definition, e.g.
+
+::
+ table inet x {
+ flowtable f {
+ hook ingress priority 0; devices = { eth0, eth1 };
+ counter
+ }
+ ...
+ }
+
+Counter support is available since Linux kernel 5.7.
+
+Hardware offload
+----------------
+
+If your network device provides hardware offload support, you can turn it on by
+means of the 'offload' flag in your flowtable definition, e.g.
+
+::
+ table inet x {
+ flowtable f {
+ hook ingress priority 0; devices = { eth0, eth1 };
+ flags offload;
+ }
+ ...
+ }
+
+There is a workqueue that adds the flows to the hardware. Note that a few
+packets might still run over the flowtable software path until the workqueue has
+a chance to offload the flow to the network device.
+
+You can identify hardware offloaded flows through the [HW_OFFLOAD] tag when
+listing your connection tracking table. Please, note that the [OFFLOAD] tag
+refers to the software offload mode, so there is a distinction between [OFFLOAD]
+which refers to the software flowtable fastpath and [HW_OFFLOAD] which refers
+to the hardware offload datapath being used by the flow.
+
+The flowtable hardware offload infrastructure also supports for the DSA
+(Distributed Switch Architecture).
+
+Limitations
+-----------
+
+The flowtable behaves like a cache. The flowtable entries might get stale if
+either the destination MAC address or the egress netdevice that is used for
+transmission changes.
+
+This might be a problem if:
+
+- You run the flowtable in software mode and you combine bridge and IP
+ forwarding in your setup.
+- Hardware offload is enabled.
+
More reading
------------

View File

@ -25,7 +25,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2075,7 +2075,9 @@ static void dsa_slave_switchdev_event_wo
@@ -2109,7 +2109,9 @@ static void dsa_slave_switchdev_event_wo
err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
if (err) {
@ -36,7 +36,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
break;
}
fdb_info->offloaded = true;
@@ -2090,9 +2092,11 @@ static void dsa_slave_switchdev_event_wo
@@ -2124,9 +2126,11 @@ static void dsa_slave_switchdev_event_wo
err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
if (err) {

View File

@ -54,7 +54,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
struct sk_buff * (*xmit)(struct sk_buff *skb,
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2050,76 +2050,66 @@ static int dsa_slave_netdevice_event(str
@@ -2084,76 +2084,66 @@ static int dsa_slave_netdevice_event(str
return NOTIFY_DONE;
}
@ -167,7 +167,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
}
/* Called under rcu_read_lock() */
@@ -2127,7 +2117,9 @@ static int dsa_slave_switchdev_event(str
@@ -2161,7 +2151,9 @@ static int dsa_slave_switchdev_event(str
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
@ -177,7 +177,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
int err;
if (event == SWITCHDEV_PORT_ATTR_SET) {
@@ -2140,20 +2132,32 @@ static int dsa_slave_switchdev_event(str
@@ -2174,20 +2166,32 @@ static int dsa_slave_switchdev_event(str
if (!dsa_slave_dev_check(dev))
return NOTIFY_DONE;
@ -213,7 +213,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
dev_hold(dev);
break;
default:
@@ -2163,10 +2167,6 @@ static int dsa_slave_switchdev_event(str
@@ -2197,10 +2201,6 @@ static int dsa_slave_switchdev_event(str
dsa_schedule_work(&switchdev_work->work);
return NOTIFY_OK;

View File

@ -20,7 +20,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2122,31 +2122,29 @@ static int dsa_slave_switchdev_event(str
@@ -2156,31 +2156,29 @@ static int dsa_slave_switchdev_event(str
struct dsa_port *dp;
int err;
@ -68,7 +68,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
fdb_info = ptr;
if (!fdb_info->added_by_user) {
@@ -2159,13 +2157,12 @@ static int dsa_slave_switchdev_event(str
@@ -2193,13 +2191,12 @@ static int dsa_slave_switchdev_event(str
switchdev_work->vid = fdb_info->vid;
dev_hold(dev);

View File

@ -30,7 +30,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2135,6 +2135,9 @@ static int dsa_slave_switchdev_event(str
@@ -2169,6 +2169,9 @@ static int dsa_slave_switchdev_event(str
dp = dsa_slave_to_port(dev);

View File

@ -170,7 +170,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
*/
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2112,6 +2112,28 @@ static void dsa_slave_switchdev_event_wo
@@ -2146,6 +2146,28 @@ static void dsa_slave_switchdev_event_wo
dev_put(dp->slave);
}
@ -199,7 +199,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
/* Called under rcu_read_lock() */
static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
@@ -2130,10 +2152,37 @@ static int dsa_slave_switchdev_event(str
@@ -2164,10 +2186,37 @@ static int dsa_slave_switchdev_event(str
return notifier_from_errno(err);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
@ -240,7 +240,7 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
return NOTIFY_DONE;
@@ -2148,18 +2197,13 @@ static int dsa_slave_switchdev_event(str
@@ -2182,18 +2231,13 @@ static int dsa_slave_switchdev_event(str
switchdev_work->port = dp->index;
switchdev_work->event = event;

View File

@ -812,9 +812,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
void flow_offload_teardown(struct flow_offload *flow);
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data);
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data);
+
int nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,
u8 protocol, enum flow_offload_tuple_dir dir);
void nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,
u8 protocol, enum flow_offload_tuple_dir dir);

View File

@ -1,263 +0,0 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 1 Mar 2021 23:52:49 +0100
Subject: [PATCH] netfilter: flowtable: add pppoe support
---
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1453,7 +1453,7 @@ static void ppp_dev_priv_destructor(stru
static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
struct net_device_path *path)
{
- struct ppp *ppp = netdev_priv(path->dev);
+ struct ppp *ppp = netdev_priv(ctx->dev);
struct ppp_channel *chan;
struct channel *pch;
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -987,6 +987,7 @@ static int pppoe_fill_forward_path(struc
path->type = DEV_PATH_PPPOE;
path->encap.proto = htons(ETH_P_PPP_SES);
path->encap.id = be16_to_cpu(po->num);
+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
path->dev = ctx->dev;
ctx->dev = dev;
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -842,6 +842,7 @@ struct net_device_path {
struct {
u16 id;
__be16 proto;
+ u8 h_dest[ETH_ALEN];
} encap;
struct {
enum {
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -7,6 +7,9 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -162,6 +165,8 @@ static bool ip_has_options(unsigned int
static void nf_flow_tuple_encap(struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
+ struct vlan_ethhdr *veth;
+ struct pppoe_hdr *phdr;
int i = 0;
if (skb_vlan_tag_present(skb)) {
@@ -169,23 +174,35 @@ static void nf_flow_tuple_encap(struct s
tuple->encap[i].proto = skb->vlan_proto;
i++;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
-
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
tuple->encap[i].proto = skb->protocol;
+ break;
+ case htons(ETH_P_PPP_SES):
+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ tuple->encap[i].id = ntohs(phdr->sid);
+ tuple->encap[i].proto = skb->protocol;
+ break;
}
}
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple, u32 *nhoff)
{
unsigned int thoff, hdrsize, offset = 0;
struct flow_ports *ports;
struct iphdr *iph;
- if (skb->protocol == htons(ETH_P_8021Q))
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
offset += VLAN_HLEN;
+ break;
+ case htons(ETH_P_PPP_SES):
+ offset += PPPOE_SES_HLEN;
+ break;
+ }
if (!pskb_may_pull(skb, sizeof(*iph) + offset))
return -1;
@@ -226,6 +243,7 @@ static int nf_flow_tuple_ip(struct sk_bu
tuple->l4proto = iph->protocol;
tuple->iifidx = dev->ifindex;
nf_flow_tuple_encap(skb, tuple);
+ *nhoff = offset;
return 0;
}
@@ -270,14 +288,36 @@ static unsigned int nf_flow_xmit_xfrm(st
return NF_STOLEN;
}
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+{
+ __be16 proto;
+
+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+ sizeof(struct pppoe_hdr)));
+ switch (proto) {
+ case htons(PPP_IP):
+ return htons(ETH_P_IP);
+ case htons(PPP_IPV6):
+ return htons(ETH_P_IPV6);
+ }
+
+ return 0;
+}
+
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto)
{
- if (skb->protocol == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veth;
+ struct vlan_ethhdr *veth;
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto)
return true;
+ break;
+ case htons(ETH_P_PPP_SES):
+ if (nf_flow_pppoe_proto(skb) == proto)
+ return true;
+ break;
}
return false;
@@ -294,12 +334,18 @@ static void nf_flow_encap_pop(struct sk_
__vlan_hwaccel_clear_tag(skb);
continue;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
vlan_hdr = (struct vlan_hdr *)skb->data;
__skb_pull(skb, VLAN_HLEN);
vlan_set_encap_proto(skb, vlan_hdr);
skb_reset_network_header(skb);
break;
+ case htons(ETH_P_PPP_SES):
+ skb->protocol = nf_flow_pppoe_proto(skb);
+ skb_pull(skb, PPPOE_SES_HLEN);
+ skb_reset_network_header(skb);
+ break;
}
}
}
@@ -343,7 +389,7 @@ nf_flow_offload_ip_hook(void *priv, stru
!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP)))
return NF_ACCEPT;
- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &offset) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -357,9 +403,6 @@ nf_flow_offload_ip_hook(void *priv, stru
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return NF_ACCEPT;
- if (skb->protocol == htons(ETH_P_8021Q))
- offset += VLAN_HLEN;
-
if (skb_try_make_writable(skb, sizeof(*iph) + offset))
return NF_DROP;
@@ -543,14 +586,20 @@ static int nf_flow_nat_ipv6(const struct
}
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple, u32 *nhoff)
{
unsigned int thoff, hdrsize, offset = 0;
struct flow_ports *ports;
struct ipv6hdr *ip6h;
- if (skb->protocol == htons(ETH_P_8021Q))
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
offset += VLAN_HLEN;
+ break;
+ case htons(ETH_P_PPP_SES):
+ offset += PPPOE_SES_HLEN;
+ break;
+ }
if (!pskb_may_pull(skb, sizeof(*ip6h) + offset))
return -1;
@@ -586,6 +635,7 @@ static int nf_flow_tuple_ipv6(struct sk_
tuple->l4proto = ip6h->nexthdr;
tuple->iifidx = dev->ifindex;
nf_flow_tuple_encap(skb, tuple);
+ *nhoff = offset;
return 0;
}
@@ -611,7 +661,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6)))
return NF_ACCEPT;
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &offset) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -625,9 +675,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return NF_ACCEPT;
- if (skb->protocol == htons(ETH_P_8021Q))
- offset += VLAN_HLEN;
-
ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, sizeof(*ip6h)))
return NF_ACCEPT;
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -90,6 +90,7 @@ static void nft_dev_path_info(const stru
switch (path->type) {
case DEV_PATH_ETHERNET:
case DEV_PATH_VLAN:
+ case DEV_PATH_PPPOE:
info->indev = path->dev;
if (is_zero_ether_addr(info->h_source))
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
@@ -97,7 +98,7 @@ static void nft_dev_path_info(const stru
if (path->type == DEV_PATH_ETHERNET)
break;
- /* DEV_PATH_VLAN */
+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
info->indev = NULL;
break;
@@ -106,6 +107,8 @@ static void nft_dev_path_info(const stru
info->encap[info->num_encaps].id = path->encap.id;
info->encap[info->num_encaps].proto = path->encap.proto;
info->num_encaps++;
+ if (path->type == DEV_PATH_PPPOE)
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
break;
case DEV_PATH_BRIDGE:
if (is_zero_ether_addr(info->h_source))

View File

@ -9,7 +9,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -857,7 +857,8 @@ static int txd_to_idx(struct mtk_tx_ring
@@ -858,7 +858,8 @@ static int txd_to_idx(struct mtk_tx_ring
return ((void *)dma - (void *)ring->dma) / sizeof(*dma);
}
@ -19,7 +19,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
{
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
@@ -889,8 +890,12 @@ static void mtk_tx_unmap(struct mtk_eth
@@ -890,8 +891,12 @@ static void mtk_tx_unmap(struct mtk_eth
tx_buf->flags = 0;
if (tx_buf->skb &&
@ -34,7 +34,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
tx_buf->skb = NULL;
}
@@ -1068,7 +1073,7 @@ err_dma:
@@ -1069,7 +1074,7 @@ err_dma:
tx_buf = mtk_desc_to_tx_buf(ring, itxd);
/* unmap dma */

View File

@ -15,7 +15,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -85,7 +85,7 @@ static int mtk_mdio_busy_wait(struct mtk
@@ -86,7 +86,7 @@ static int mtk_mdio_busy_wait(struct mtk
return 0;
if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT))
break;

View File

@ -21,7 +21,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
skb_record_rx_queue(skb, 0);
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -295,6 +295,7 @@
@@ -300,6 +300,7 @@
#define RX_DMA_LSO BIT(30)
#define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16)
#define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff)

View File

@ -12,7 +12,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1130,17 +1130,6 @@ static void mtk_wake_queue(struct mtk_et
@@ -1131,17 +1131,6 @@ static void mtk_wake_queue(struct mtk_et
}
}
@ -30,7 +30,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mtk_mac *mac = netdev_priv(dev);
@@ -1161,7 +1150,7 @@ static netdev_tx_t mtk_start_xmit(struct
@@ -1162,7 +1151,7 @@ static netdev_tx_t mtk_start_xmit(struct
tx_num = mtk_cal_txd_req(skb);
if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
@ -39,7 +39,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
netif_err(eth, tx_queued, dev,
"Tx Ring full when queue awake!\n");
spin_unlock(&eth->page_lock);
@@ -1187,7 +1176,7 @@ static netdev_tx_t mtk_start_xmit(struct
@@ -1188,7 +1177,7 @@ static netdev_tx_t mtk_start_xmit(struct
goto drop;
if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))

View File

@ -21,7 +21,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
MTK_QDMA_GLO_CFG);
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -197,7 +197,7 @@
@@ -202,7 +202,7 @@
#define MTK_RX_BT_32DWORDS (3 << 11)
#define MTK_NDP_CO_PRO BIT(10)
#define MTK_TX_WB_DDONE BIT(6)

View File

@ -10,7 +10,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -19,7 +19,7 @@
@@ -21,7 +21,7 @@
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
#define MTK_TX_DMA_BUF_LEN 0x3fff

View File

@ -20,7 +20,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
MediaTek SoC family.
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1232,12 +1232,13 @@ static void mtk_update_rx_cpu_idx(struct
@@ -1233,12 +1233,13 @@ static void mtk_update_rx_cpu_idx(struct
static int mtk_poll_rx(struct napi_struct *napi, int budget,
struct mtk_eth *eth)
{
@ -95,7 +95,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
if (likely(napi_schedule_prep(&eth->tx_napi))) {
__napi_schedule(&eth->tx_napi);
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
@@ -2315,6 +2332,9 @@ static int mtk_stop(struct net_device *d
@@ -2323,6 +2340,9 @@ static int mtk_stop(struct net_device *d
napi_disable(&eth->tx_napi);
napi_disable(&eth->rx_napi);
@ -105,7 +105,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
@@ -2364,6 +2384,64 @@ err_disable_clks:
@@ -2375,6 +2395,64 @@ err_disable_clks:
return ret;
}
@ -170,7 +170,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
static int mtk_hw_init(struct mtk_eth *eth)
{
int i, val, ret;
@@ -2385,9 +2463,6 @@ static int mtk_hw_init(struct mtk_eth *e
@@ -2396,9 +2474,6 @@ static int mtk_hw_init(struct mtk_eth *e
goto err_disable_pm;
}
@ -180,7 +180,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/* disable delay and normal interrupt */
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
@@ -2426,11 +2501,10 @@ static int mtk_hw_init(struct mtk_eth *e
@@ -2437,11 +2512,10 @@ static int mtk_hw_init(struct mtk_eth *e
/* Enable RX VLan Offloading */
mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
@ -194,7 +194,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
@@ -2934,6 +3008,13 @@ static int mtk_probe(struct platform_dev
@@ -2946,6 +3020,13 @@ static int mtk_probe(struct platform_dev
spin_lock_init(&eth->page_lock);
spin_lock_init(&eth->tx_irq_lock);
spin_lock_init(&eth->rx_irq_lock);
@ -210,15 +210,15 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,7 @@
#include <linux/u64_stats_sync.h>
@@ -16,6 +16,7 @@
#include <linux/refcount.h>
#include <linux/phylink.h>
#include <linux/rhashtable.h>
+#include <linux/dim.h>
#include "mtk_ppe.h"
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
@@ -131,13 +132,18 @@
@@ -136,13 +137,18 @@
/* PDMA Delay Interrupt Register */
#define MTK_PDMA_DELAY_INT 0xa0c
@ -242,7 +242,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/* PDMA Interrupt Status Register */
#define MTK_PDMA_INT_STATUS 0xa20
@@ -219,6 +225,7 @@
@@ -224,6 +230,7 @@
/* QDMA Interrupt Status Register */
#define MTK_QDMA_INT_STATUS 0x1A18
#define MTK_RX_DONE_DLY BIT(30)
@ -250,7 +250,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#define MTK_RX_DONE_INT3 BIT(19)
#define MTK_RX_DONE_INT2 BIT(18)
#define MTK_RX_DONE_INT1 BIT(17)
@@ -228,8 +235,7 @@
@@ -233,8 +240,7 @@
#define MTK_TX_DONE_INT1 BIT(1)
#define MTK_TX_DONE_INT0 BIT(0)
#define MTK_RX_DONE_INT MTK_RX_DONE_DLY
@ -260,7 +260,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/* QDMA Interrupt grouping registers */
#define MTK_QDMA_INT_GRP1 0x1a20
@@ -892,6 +898,18 @@ struct mtk_eth {
@@ -905,6 +911,18 @@ struct mtk_eth {
const struct mtk_soc_data *soc;

View File

@ -49,7 +49,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
} else {
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -624,6 +624,7 @@ struct mtk_tx_buf {
@@ -636,6 +636,7 @@ struct mtk_tx_buf {
* @phys: The physical addr of tx_buf
* @next_free: Pointer to the next free descriptor
* @last_free: Pointer to the last free descriptor
@ -57,7 +57,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
* @thresh: The threshold of minimum amount of free descriptors
* @free_count: QDMA uses a linked list. Track how many free descriptors
* are present
@@ -634,6 +635,7 @@ struct mtk_tx_ring {
@@ -646,6 +647,7 @@ struct mtk_tx_ring {
dma_addr_t phys;
struct mtk_tx_dma *next_free;
struct mtk_tx_dma *last_free;

View File

@ -11,7 +11,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -776,13 +776,18 @@ static inline int mtk_max_buf_size(int f
@@ -777,13 +777,18 @@ static inline int mtk_max_buf_size(int f
return buf_size;
}
@ -32,7 +32,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
}
/* the qdma core needs scratch memory to be setup */
@@ -1254,8 +1259,7 @@ static int mtk_poll_rx(struct napi_struc
@@ -1255,8 +1260,7 @@ static int mtk_poll_rx(struct napi_struc
rxd = &ring->dma[idx];
data = ring->data[idx];