kernel: port upstream nft_flow_offload changes to xt_FLOWOFFLOAD and fix routing issues

Replace an old cleanup patch that never made it upstream with the proper
upstream fix. This patch was incompatible with the recent changes that
affected the way that the flow tuple dst entry was used.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
(cherry-picked from commits 442ecce761 and c8933ce533)
v19.07.3_mercusys_ac12_duma
Felix Fietkau 5 years ago
parent ac04be82c4
commit 151bd9ee25

@ -1,89 +0,0 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 15 Mar 2018 18:21:43 +0100
Subject: [PATCH] netfilter: nf_flow_table: clean up and fix dst handling
dst handling in the code is inconsistent and possibly wrong. In my test,
skb_dst(skb) holds the dst entry after routing but before NAT, so the
code could possibly return the same dst entry for both directions of a
connection.
Additionally, there was some confusion over the dst entry vs the address
passed as parameter to rt_nexthop/rt6_nexthop.
Do an explicit dst lookup for both ends of the connection and always use
the source address for it. When running the IP hook, use the dst entry
for the opposite direction for determining the route.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -238,7 +238,7 @@ nf_flow_offload_ip_hook(void *priv, stru
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ rt = (const struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
@@ -455,7 +455,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ rt = (struct rt6_info *)flow->tuplehash[!dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -17,27 +17,38 @@ struct nft_flow_offload {
struct nft_flowtable *flowtable;
};
-static int nft_flow_route(const struct nft_pktinfo *pkt,
- const struct nf_conn *ct,
- struct nf_flow_route *route,
- enum ip_conntrack_dir dir)
+static struct dst_entry *
+nft_flow_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
+ const struct nft_pktinfo *pkt)
{
- struct dst_entry *this_dst = skb_dst(pkt->skb);
- struct dst_entry *other_dst = NULL;
+ struct dst_entry *dst;
struct flowi fl;
memset(&fl, 0, sizeof(fl));
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
- fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
break;
case NFPROTO_IPV6:
- fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
break;
}
- nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
- if (!other_dst)
+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
+
+ return dst;
+}
+
+static int nft_flow_route(const struct nft_pktinfo *pkt,
+ const struct nf_conn *ct,
+ struct nf_flow_route *route,
+ enum ip_conntrack_dir dir)
+{
+ struct dst_entry *this_dst, *other_dst;
+
+ this_dst = nft_flow_dst(ct, dir, pkt);
+ other_dst = nft_flow_dst(ct, !dir, pkt);
+ if (!this_dst || !other_dst)
return -ENOENT;
route->tuple[dir].dst = this_dst;

@ -0,0 +1,39 @@
From: wenxu <wenxu@ucloud.cn>
Date: Wed, 9 Jan 2019 10:40:11 +0800
Subject: [PATCH] netfilter: nft_flow_offload: Fix reverse route lookup
Using the following example:
client 1.1.1.7 ---> 2.2.2.7 which dnat to 10.0.0.7 server
The first reply packet (ie. syn+ack) uses an incorrect destination
address for the reverse route lookup since it uses:
daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
which is 2.2.2.7 in the scenario that is described above, while this
should be:
daddr = ct->tuplehash[dir].tuple.src.u3.ip;
that is 10.0.0.7.
Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -29,10 +29,10 @@ static int nft_flow_route(const struct n
memset(&fl, 0, sizeof(fl));
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
- fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
break;
case NFPROTO_IPV6:
- fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
break;
}

@ -26,8 +26,8 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (const struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
+ rt = (struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)

@ -0,0 +1,86 @@
From: wenxu <wenxu@ucloud.cn>
Date: Thu, 10 Jan 2019 14:51:35 +0800
Subject: [PATCH] netfilter: nft_flow_offload: fix interaction with vrf slave
device
In the forward chain, the iif is changed from slave device to master vrf
device. Thus, flow offload does not find a match on the lower slave
device.
This patch uses the cached route, ie. dst->dev, to update the iif and
oif fields in the flow entry.
After this patch, the following example works fine:
# ip addr add dev eth0 1.1.1.1/24
# ip addr add dev eth1 10.0.0.1/24
# ip link add user1 type vrf table 1
# ip l set user1 up
# ip l set dev eth0 master user1
# ip l set dev eth1 master user1
# nft add table firewall
# nft add flowtable f fb1 { hook ingress priority 0 \; devices = { eth0, eth1 } \; }
# nft add chain f ftb-all {type filter hook forward priority 0 \; policy accept \; }
# nft add rule f ftb-all ct zone 1 ip protocol tcp flow offload @fb1
# nft add rule f ftb-all ct zone 1 ip protocol udp flow offload @fb1
Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -84,7 +84,6 @@ struct flow_offload {
struct nf_flow_route {
struct {
struct dst_entry *dst;
- int ifindex;
} tuple[FLOW_OFFLOAD_DIR_MAX];
};
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -28,6 +28,7 @@ flow_offload_fill_dir(struct flow_offloa
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+ struct dst_entry *other_dst = route->tuple[!dir].dst;
struct dst_entry *dst = route->tuple[dir].dst;
ft->dir = dir;
@@ -50,8 +51,8 @@ flow_offload_fill_dir(struct flow_offloa
ft->src_port = ctt->src.u.tcp.port;
ft->dst_port = ctt->dst.u.tcp.port;
- ft->iifidx = route->tuple[dir].ifindex;
- ft->oifidx = route->tuple[!dir].ifindex;
+ ft->iifidx = other_dst->dev->ifindex;
+ ft->oifidx = dst->dev->ifindex;
ft->dst_cache = dst;
}
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -30,9 +30,11 @@ static int nft_flow_route(const struct n
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
break;
case NFPROTO_IPV6:
fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
break;
}
@@ -41,9 +43,7 @@ static int nft_flow_route(const struct n
return -ENOENT;
route->tuple[dir].dst = this_dst;
- route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
route->tuple[!dir].dst = other_dst;
- route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
return 0;
}

@ -1,6 +1,6 @@
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -164,6 +164,8 @@ struct nf_flow_table_hw {
@@ -163,6 +163,8 @@ struct nf_flow_table_hw {
int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload);
void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload);
@ -19,7 +19,7 @@
struct flow_offload_entry {
struct flow_offload flow;
@@ -151,6 +152,22 @@ void flow_offload_free(struct flow_offlo
@@ -152,6 +153,22 @@ void flow_offload_free(struct flow_offlo
}
EXPORT_SYMBOL_GPL(flow_offload_free);

@ -98,7 +98,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
@@ -0,0 +1,408 @@
@@ -0,0 +1,421 @@
+/*
+ * Copyright (C) 2018 Felix Fietkau <nbd@nbd.name>
+ *
@ -112,8 +112,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_flow_table.h>
+
+static struct nf_flowtable nf_flowtable;
+static HLIST_HEAD(hooks);
@ -280,21 +281,24 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+}
+
+static bool
+xt_flowoffload_skip(struct sk_buff *skb)
+xt_flowoffload_skip(struct sk_buff *skb, int family)
+{
+ struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ if (skb_sec_path(skb))
+ return true;
+
+ if (family == NFPROTO_IPV4) {
+ const struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ }
+
+ return false;
+}
+
+static struct dst_entry *
+xt_flowoffload_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
+ const struct xt_action_param *par)
+ const struct xt_action_param *par, int ifindex)
+{
+ struct dst_entry *dst = NULL;
+ struct flowi fl;
@ -303,10 +307,12 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ switch (xt_family(par)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ fl.u.ip4.flowi4_oif = ifindex;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ fl.u.ip6.flowi6_oif = ifindex;
+ break;
+ }
+
@ -322,8 +328,8 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+{
+ struct dst_entry *this_dst, *other_dst;
+
+ this_dst = xt_flowoffload_dst(ct, dir, par);
+ other_dst = xt_flowoffload_dst(ct, !dir, par);
+ this_dst = xt_flowoffload_dst(ct, !dir, par, xt_out(par)->ifindex);
+ other_dst = xt_flowoffload_dst(ct, dir, par, xt_in(par)->ifindex);
+ if (!this_dst || !other_dst)
+ return -ENOENT;
+
@ -331,9 +337,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ return -EINVAL;
+
+ route->tuple[dir].dst = this_dst;
+ route->tuple[dir].ifindex = xt_in(par)->ifindex;
+ route->tuple[!dir].dst = other_dst;
+ route->tuple[!dir].ifindex = xt_out(par)->ifindex;
+
+ return 0;
+}
@ -342,7 +346,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_flowoffload_target_info *info = par->targinfo;
+ const struct nf_conn_help *help;
+ struct tcphdr _tcph, *tcph = NULL;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ struct nf_flow_route route;
@ -350,7 +354,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ struct nf_conn *ct;
+ struct net *net;
+
+ if (xt_flowoffload_skip(skb))
+ if (xt_flowoffload_skip(skb, xt_family(par)))
+ return XT_CONTINUE;
+
+ ct = nf_ct_get(skb, &ctinfo);
@ -361,6 +365,11 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ case IPPROTO_TCP:
+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ return XT_CONTINUE;
+
+ tcph = skb_header_pointer(skb, par->thoff,
+ sizeof(_tcph), &_tcph);
+ if (unlikely(!tcph || tcph->fin || tcph->rst))
+ return XT_CONTINUE;
+ break;
+ case IPPROTO_UDP:
+ break;
@ -368,12 +377,11 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ return XT_CONTINUE;
+ }
+
+ help = nfct_help(ct);
+ if (help)
+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
+ ct->status & IPS_SEQ_ADJUST)
+ return XT_CONTINUE;
+
+ if (ctinfo == IP_CT_NEW ||
+ ctinfo == IP_CT_RELATED)
+ if (!nf_ct_is_confirmed(ct))
+ return XT_CONTINUE;
+
+ if (!xt_in(par) || !xt_out(par))
@ -391,6 +399,11 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ if (!flow)
+ goto err_flow_alloc;
+
+ if (tcph) {
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ if (flow_offload_add(&nf_flowtable, flow) < 0)
+ goto err_flow_add;
+

@ -85,7 +85,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
@@ -126,6 +133,22 @@ unsigned int nf_flow_offload_ip_hook(voi
@@ -125,6 +132,22 @@ unsigned int nf_flow_offload_ip_hook(voi
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
@ -156,7 +156,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -218,10 +218,16 @@ int flow_offload_add(struct nf_flowtable
@@ -219,10 +219,16 @@ int flow_offload_add(struct nf_flowtable
}
EXPORT_SYMBOL_GPL(flow_offload_add);
@ -173,7 +173,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
@@ -236,6 +242,9 @@ static void flow_offload_del(struct nf_f
@@ -237,6 +243,9 @@ static void flow_offload_del(struct nf_f
if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
flow_offload_fixup_ct_state(e->ct);
@ -183,7 +183,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
flow_offload_free(flow);
}
@@ -349,6 +358,9 @@ static int nf_flow_offload_gc_step(struc
@@ -350,6 +359,9 @@ static int nf_flow_offload_gc_step(struc
if (!teardown)
nf_ct_offload_timeout(flow);
@ -193,7 +193,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
if (nf_flow_has_expired(flow) || teardown)
flow_offload_del(flow_table, flow);
}
@@ -484,10 +496,43 @@ int nf_flow_dnat_port(const struct flow_
@@ -485,10 +497,43 @@ int nf_flow_dnat_port(const struct flow_
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
@ -237,7 +237,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
err = rhashtable_init(&flowtable->rhashtable,
@@ -525,6 +570,8 @@ static void nf_flow_table_iterate_cleanu
@@ -526,6 +571,8 @@ static void nf_flow_table_iterate_cleanu
{
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
@ -246,7 +246,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
}
void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
@@ -538,6 +585,26 @@ void nf_flow_table_cleanup(struct net *n
@@ -539,6 +586,26 @@ void nf_flow_table_cleanup(struct net *n
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
@ -273,7 +273,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
void nf_flow_table_free(struct nf_flowtable *flow_table)
{
mutex_lock(&flowtable_lock);
@@ -547,9 +614,58 @@ void nf_flow_table_free(struct nf_flowta
@@ -548,9 +615,58 @@ void nf_flow_table_free(struct nf_flowta
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
rhashtable_destroy(&flow_table->rhashtable);
@ -553,7 +553,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -121,6 +121,9 @@ static void nft_flow_offload_eval(const
@@ -110,6 +110,9 @@ static void nft_flow_offload_eval(const
if (ret < 0)
goto err_flow_add;

@ -26,7 +26,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -358,7 +358,7 @@ static int nf_flow_offload_gc_step(struc
@@ -359,7 +359,7 @@ static int nf_flow_offload_gc_step(struc
if (!teardown)
nf_ct_offload_timeout(flow);

Loading…
Cancel
Save