Merge branch 'net-fib_rules-add-dscp-mask-support'

Ido Schimmel says:

====================
net: fib_rules: Add DSCP mask support

In some deployments users would like to encode path information into
certain bits of the IPv6 flow label, the UDP source port and the DSCP
field and use this information to route packets accordingly.

Redirecting traffic to a routing table based on specific bits in the
DSCP field is not currently possible. Only exact match is currently
supported by FIB rules.

This patchset extends FIB rules to match on the DSCP field with an
optional mask.

Patches #1-#5 gradually extend FIB rules to match on the DSCP field with
an optional mask.

Patch #6 adds test cases for the new functionality.

iproute2 support can be found here [1].

[1] https://github.com/idosch/iproute2/tree/submit/fib_rule_mask_v1
====================

Link: https://patch.msgid.link/20250220080525.831924-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-02-21 16:08:54 -08:00
6 changed files with 132 additions and 5 deletions

View File

@@ -190,6 +190,10 @@ attribute-sets:
name: dport-mask
type: u16
display-hint: hex
-
name: dscp-mask
type: u8
display-hint: hex
operations:
enum-model: directional
@@ -225,6 +229,7 @@ operations:
- flowlabel-mask
- sport-mask
- dport-mask
- dscp-mask
-
name: newrule-ntf
doc: Notify a rule creation

View File

@@ -72,6 +72,7 @@ enum {
FRA_FLOWLABEL_MASK, /* flowlabel mask */
FRA_SPORT_MASK, /* sport mask */
FRA_DPORT_MASK, /* dport mask */
FRA_DSCP_MASK, /* dscp mask */
__FRA_MAX
};

View File

@@ -845,6 +845,7 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 },
[FRA_SPORT_MASK] = { .type = NLA_U16 },
[FRA_DPORT_MASK] = { .type = NLA_U16 },
[FRA_DSCP_MASK] = NLA_POLICY_MASK(NLA_U8, INET_DSCP_MASK >> 2),
};
int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,

View File

@@ -37,6 +37,7 @@ struct fib4_rule {
u8 dst_len;
u8 src_len;
dscp_t dscp;
dscp_t dscp_mask;
u8 dscp_full:1; /* DSCP or TOS selector */
__be32 src;
__be32 srcmask;
@@ -192,7 +193,8 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
* to mask the upper three DSCP bits prior to matching to maintain
* legacy behavior.
*/
if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
if (r->dscp_full &&
(r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask)
return 0;
else if (!r->dscp_full && r->dscp &&
!fib_dscp_masked_match(r->dscp, fl4))
@@ -235,11 +237,35 @@ static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
}
rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
rule4->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
rule4->dscp_full = true;
return 0;
}
static int fib4_nl2rule_dscp_mask(const struct nlattr *nla,
struct fib4_rule *rule4,
struct netlink_ext_ack *extack)
{
dscp_t dscp_mask;
if (!rule4->dscp_full) {
NL_SET_ERR_MSG_ATTR(extack, nla,
"Cannot specify DSCP mask without DSCP value");
return -EINVAL;
}
dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
if (rule4->dscp & ~dscp_mask) {
NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
return -EINVAL;
}
rule4->dscp_mask = dscp_mask;
return 0;
}
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -271,6 +297,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
goto errout;
if (tb[FRA_DSCP_MASK] &&
fib4_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule4, extack) < 0)
goto errout;
/* split local/main if they are not already split */
err = fib_unmerge(net);
if (err)
@@ -366,6 +396,14 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}
if (tb[FRA_DSCP_MASK]) {
dscp_t dscp_mask;
dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
if (!rule4->dscp_full || rule4->dscp_mask != dscp_mask)
return 0;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
@@ -391,7 +429,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule4->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
inet_dscp_to_dsfield(rule4->dscp) >> 2))
inet_dscp_to_dsfield(rule4->dscp) >> 2) ||
nla_put_u8(skb, FRA_DSCP_MASK,
inet_dscp_to_dsfield(rule4->dscp_mask) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
@@ -418,7 +458,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
return nla_total_size(4) /* dst */
+ nla_total_size(4) /* src */
+ nla_total_size(4) /* flow */
+ nla_total_size(1); /* dscp */
+ nla_total_size(1) /* dscp */
+ nla_total_size(1); /* dscp mask */
}
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)

View File

@@ -29,6 +29,7 @@ struct fib6_rule {
__be32 flowlabel;
__be32 flowlabel_mask;
dscp_t dscp;
dscp_t dscp_mask;
u8 dscp_full:1; /* DSCP or TOS selector */
};
@@ -331,7 +332,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 0;
}
if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
if ((r->dscp ^ ip6_dscp(fl6->flowlabel)) & r->dscp_mask)
return 0;
if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
@@ -360,11 +361,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
}
rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
rule6->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
rule6->dscp_full = true;
return 0;
}
static int fib6_nl2rule_dscp_mask(const struct nlattr *nla,
struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
dscp_t dscp_mask;
if (!rule6->dscp_full) {
NL_SET_ERR_MSG_ATTR(extack, nla,
"Cannot specify DSCP mask without DSCP value");
return -EINVAL;
}
dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
if (rule6->dscp & ~dscp_mask) {
NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
return -EINVAL;
}
rule6->dscp_mask = dscp_mask;
return 0;
}
static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
@@ -409,10 +434,15 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
goto errout;
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
rule6->dscp_mask = frh->tos ? inet_dsfield_to_dscp(INET_DSCP_MASK) : 0;
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;
if (tb[FRA_DSCP_MASK] &&
fib6_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule6, extack) < 0)
goto errout;
if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
goto errout;
@@ -482,6 +512,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}
if (tb[FRA_DSCP_MASK]) {
dscp_t dscp_mask;
dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
if (!rule6->dscp_full || rule6->dscp_mask != dscp_mask)
return 0;
}
if (tb[FRA_FLOWLABEL] &&
nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
return 0;
@@ -512,7 +550,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule6->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
inet_dscp_to_dsfield(rule6->dscp) >> 2))
inet_dscp_to_dsfield(rule6->dscp) >> 2) ||
nla_put_u8(skb, FRA_DSCP_MASK,
inet_dscp_to_dsfield(rule6->dscp_mask) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
@@ -539,6 +579,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
return nla_total_size(16) /* dst */
+ nla_total_size(16) /* src */
+ nla_total_size(1) /* dscp */
+ nla_total_size(1) /* dscp mask */
+ nla_total_size(4) /* flowlabel */
+ nla_total_size(4); /* flowlabel mask */
}

View File

@@ -310,6 +310,25 @@ fib_rule6_test()
"iif dscp no redirect to table"
fi
ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
if [ $? -eq 0 ]; then
match="dscp 0x0f/0x0f"
tosmatch=$(printf 0x"%x" $((0x1f << 2)))
tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
getmatch="tos $tosmatch"
getnomatch="tos $tosnomatch"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "dscp masked redirect to table" \
"dscp masked no redirect to table"
match="dscp 0x0f/0x0f"
getmatch="from $SRC_IP6 iif $DEV tos $tosmatch"
getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif dscp masked redirect to table" \
"iif dscp masked no redirect to table"
fi
fib_check_iproute_support "flowlabel" "flowlabel"
if [ $? -eq 0 ]; then
match="flowlabel 0xfffff"
@@ -597,6 +616,25 @@ fib_rule4_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
if [ $? -eq 0 ]; then
match="dscp 0x0f/0x0f"
tosmatch=$(printf 0x"%x" $((0x1f << 2)))
tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
getmatch="tos $tosmatch"
getnomatch="tos $tosnomatch"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "dscp masked redirect to table" \
"dscp masked no redirect to table"
match="dscp 0x0f/0x0f"
getmatch="from $SRC_IP iif $DEV tos $tosmatch"
getnomatch="from $SRC_IP iif $DEV tos $tosnomatch"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif dscp masked redirect to table" \
"iif dscp masked no redirect to table"
fi
}
fib_rule4_vrf_test()