You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
3.6 KiB
Diff
101 lines
3.6 KiB
Diff
From 5a4d7714faa28c03e85d696fba82716fbda5c432 Mon Sep 17 00:00:00 2001
|
|
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
|
Date: Sat, 23 Mar 2019 09:29:49 +0000
|
|
Subject: [PATCH] netfilter: connmark: introduce savedscp
|
|
|
|
savedscp is a method of storing the DSCP of an ip packet into conntrack
|
|
mark. In combination with a suitable tc filter action (conndscp but may
|
|
end up being integrated into connmark) DSCP values are able to be stored
|
|
on egress and restored on ingress across links that otherwise alter or
|
|
bleach DSCP.
|
|
|
|
This is useful for qdiscs such as CAKE which are able to shape according
|
|
to policies based on DSCP.
|
|
|
|
Ingress classification is traditionally a challenging task since
|
|
iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT
|
|
lookups, hence are unable to see internal IPv4 addresses as used on the
|
|
typical home masquerading gateway.
|
|
|
|
The ingress problem is solved by the tc filter, but the tc people didn't
|
|
like the idea of tc setting conntrack mark values, though they are ok
|
|
with reading conntrack values and hence restoring DSCP from conntrack
|
|
marks.
|
|
|
|
x_tables CONNMARK with the new savedscp action solves the problem of
|
|
storing the DSCP to the conntrack mark.
|
|
|
|
It accepts 2 parameters. The mark is a 32bit value with usually one 1
|
|
bit set. This bit is set when savedscp saves the DSCP to the mark.
|
|
This is useful to implement a 'one shot'
|
|
iptables based classification where the 'complicated' iptables rules are
|
|
only run once to classify the connection on initial (egress) packet and
|
|
subsequent packets are all marked/restored with the same DSCP. A mark
|
|
of zero disables the setting of a status bit/s.
|
|
|
|
The mask is a 32bit value of at least 6 contiguous bits and represents
|
|
the area where the DSCP will be stored.
|
|
|
|
e.g.
|
|
|
|
iptables -A QOS_MARK_eth0 -t mangle -j CONNMARK --savedscp-mark 0xfc000000/0x01000000
|
|
|
|
Would store the DSCP in the top 6 bits of the 32bit mark field, and use
|
|
the LSB of the top byte as the 'DSCP has been stored' marker.
|
|
|
|
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
|
---
|
|
include/uapi/linux/netfilter/xt_connmark.h | 3 ++-
|
|
net/netfilter/xt_connmark.c | 21 ++++++++++++++++++++-
|
|
2 files changed, 22 insertions(+), 2 deletions(-)
|
|
|
|
--- a/include/uapi/linux/netfilter/xt_connmark.h
|
|
+++ b/include/uapi/linux/netfilter/xt_connmark.h
|
|
@@ -16,7 +16,8 @@
|
|
enum {
|
|
XT_CONNMARK_SET = 0,
|
|
XT_CONNMARK_SAVE,
|
|
- XT_CONNMARK_RESTORE
|
|
+ XT_CONNMARK_RESTORE,
|
|
+ XT_CONNMARK_SAVEDSCP
|
|
};
|
|
|
|
struct xt_connmark_tginfo1 {
|
|
--- a/net/netfilter/xt_connmark.c
|
|
+++ b/net/netfilter/xt_connmark.c
|
|
@@ -42,6 +42,7 @@ connmark_tg(struct sk_buff *skb, const s
|
|
enum ip_conntrack_info ctinfo;
|
|
struct nf_conn *ct;
|
|
u_int32_t newmark;
|
|
+ u_int8_t dscp, maskshift;
|
|
|
|
ct = nf_ct_get(skb, &ctinfo);
|
|
if (ct == NULL)
|
|
@@ -57,7 +58,25 @@ connmark_tg(struct sk_buff *skb, const s
|
|
break;
|
|
case XT_CONNMARK_SAVE:
|
|
newmark = (ct->mark & ~info->ctmask) ^
|
|
- (skb->mark & info->nfmask);
|
|
+ (skb->mark & info->nfmask);
|
|
+ if (ct->mark != newmark) {
|
|
+ ct->mark = newmark;
|
|
+ nf_conntrack_event_cache(IPCT_MARK, ct);
|
|
+ }
|
|
+ break;
|
|
+ case XT_CONNMARK_SAVEDSCP:
|
|
+ if (skb->protocol == htons(ETH_P_IP))
|
|
+ dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
|
|
+ else if (skb->protocol == htons(ETH_P_IPV6))
|
|
+ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
|
|
+ else /* protocol doesn't have diffserv */
|
|
+ break;
|
|
+
|
|
+ /* nfmask contains the mask shift value */
|
|
+ maskshift = info->nfmask & 0x1f;
|
|
+ newmark = (ct->mark & ~info->ctmark) |
|
|
+ (info->ctmask | (dscp << maskshift));
|
|
+
|
|
if (ct->mark != newmark) {
|
|
ct->mark = newmark;
|
|
nf_conntrack_event_cache(IPCT_MARK, ct);
|