summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-04-07 20:20:59 -0700
committerJakub Kicinski <kuba@kernel.org>2026-04-07 20:21:00 -0700
commitf821664dde29302e8450aa0597bf1e4c7c5b0a22 (patch)
tree8015badc6619dc635972bc7d47c2e2f6d88bc7fd
parentefaa71faf212324ecbf6d5339e9717fe53254f58 (diff)
parent32dfd742f06a68fac6499a58f52025990c854031 (diff)
Merge branch 'seg6-fix-dst_cache-sharing-in-seg6-lwtunnel'
Andrea Mayer says: ==================== seg6: fix dst_cache sharing in seg6 lwtunnel The seg6 lwtunnel encap uses a single per-route dst_cache shared between seg6_input_core() and seg6_output_core(). These two paths can perform the post-encap SID lookup in different routing contexts (e.g., ip rules matching on the ingress interface, or VRF table separation). Whichever path runs first populates the cache, and the other reuses it blindly, bypassing its own lookup. Patch 1 fixes this by splitting the cache into cache_input and cache_output. Patch 2 adds a selftest that validates the isolation. ==================== Link: https://patch.msgid.link/20260404004405.4057-1-andrea.mayer@uniroma2.it Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/ipv6/seg6_iptunnel.c34
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/srv6_iptunnel_cache.sh197
3 files changed, 221 insertions, 11 deletions
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 3e1b9991131a..d6a0f7df9080 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -48,7 +48,8 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
}
struct seg6_lwt {
- struct dst_cache cache;
+ struct dst_cache cache_input;
+ struct dst_cache cache_output;
struct seg6_iptunnel_encap tuninfo[];
};
@@ -488,7 +489,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
slwt = seg6_lwt_lwtunnel(lwtst);
local_bh_disable();
- dst = dst_cache_get(&slwt->cache);
+ dst = dst_cache_get(&slwt->cache_input);
local_bh_enable();
err = seg6_do_srh(skb, dst);
@@ -504,7 +505,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
/* cache only if we don't create a dst reference loop */
if (!dst->error && lwtst != dst->lwtstate) {
local_bh_disable();
- dst_cache_set_ip6(&slwt->cache, dst,
+ dst_cache_set_ip6(&slwt->cache_input, dst,
&ipv6_hdr(skb)->saddr);
local_bh_enable();
}
@@ -564,7 +565,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
- dst = dst_cache_get(&slwt->cache);
+ dst = dst_cache_get(&slwt->cache_output);
local_bh_enable();
err = seg6_do_srh(skb, dst);
@@ -591,7 +592,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
/* cache only if we don't create a dst reference loop */
if (orig_dst->lwtstate != dst->lwtstate) {
local_bh_disable();
- dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+ dst_cache_set_ip6(&slwt->cache_output, dst, &fl6.saddr);
local_bh_enable();
}
@@ -701,11 +702,13 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
slwt = seg6_lwt_lwtunnel(newts);
- err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
- if (err) {
- kfree(newts);
- return err;
- }
+ err = dst_cache_init(&slwt->cache_input, GFP_ATOMIC);
+ if (err)
+ goto err_free_newts;
+
+ err = dst_cache_init(&slwt->cache_output, GFP_ATOMIC);
+ if (err)
+ goto err_destroy_input;
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
@@ -720,11 +723,20 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
*ts = newts;
return 0;
+
+err_destroy_input:
+ dst_cache_destroy(&slwt->cache_input);
+err_free_newts:
+ kfree(newts);
+ return err;
}
static void seg6_destroy_state(struct lwtunnel_state *lwt)
{
- dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
+ struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwt);
+
+ dst_cache_destroy(&slwt->cache_input);
+ dst_cache_destroy(&slwt->cache_output);
}
static int seg6_fill_encap_info(struct sk_buff *skb,
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 605c54c0e8a3..c709523c99c6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -89,6 +89,7 @@ TEST_PROGS := \
srv6_end_x_next_csid_l3vpn_test.sh \
srv6_hencap_red_l3vpn_test.sh \
srv6_hl2encap_red_l2vpn_test.sh \
+ srv6_iptunnel_cache.sh \
stress_reuseport_listen.sh \
tcp_fastopen_backup_key.sh \
test_bpf.sh \
diff --git a/tools/testing/selftests/net/srv6_iptunnel_cache.sh b/tools/testing/selftests/net/srv6_iptunnel_cache.sh
new file mode 100755
index 000000000000..62638ab679d9
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_iptunnel_cache.sh
@@ -0,0 +1,197 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+
+# This test verifies that the seg6 lwtunnel does not share the dst_cache
+# between the input (forwarding) and output (locally generated) paths.
+#
+# A shared dst_cache allows a forwarded packet to populate the cache and a
+# subsequent locally generated packet to silently reuse that entry, bypassing
+# its own route lookup. To expose this, the SID is made reachable only for
+# forwarded traffic (via an ip rule matching iif) and blackholed for everything
+# else. A local ping on ns_router must always hit the blackhole;
+# if it succeeds after a forwarded packet has populated the
+# cache, the bug is confirmed.
+#
+# Both forwarded and local packets are pinned to the same CPU with taskset,
+# since dst_cache is per-cpu.
+#
+#
+# +--------------------+ +--------------------+
+# | ns_src | | ns_dst |
+# | | | |
+# | veth-s0 | | veth-d0 |
+# | fd00::1/64 | | fd01::2/64 |
+# +-------+------------+ +----------+---------+
+# | |
+# | +--------------------+ |
+# | | ns_router | |
+# | | | |
+# +------------+ veth-r0 veth-r1 +--------------+
+# | fd00::2 fd01::1 |
+# +--------------------+
+#
+#
+# ns_router: encap (main table)
+# +---------+---------------------------------------+
+# | dst | action |
+# +---------+---------------------------------------+
+# | cafe::1 | encap seg6 mode encap segs fc00::100 |
+# +---------+---------------------------------------+
+#
+# ns_router: post-encap SID resolution
+# +-------+------------+----------------------------+
+# | table | dst | action |
+# +-------+------------+----------------------------+
+# | 100 | fc00::100 | via fd01::2 dev veth-r1 |
+# +-------+------------+----------------------------+
+# | main | fc00::100 | blackhole |
+# +-------+------------+----------------------------+
+#
+# ns_router: ip rule
+# +------------------+------------------------------+
+# | match | action |
+# +------------------+------------------------------+
+# | iif veth-r0 | lookup 100 |
+# +------------------+------------------------------+
+#
+# ns_dst: SRv6 decap (main table)
+# +--------------+----------------------------------+
+# | SID | action |
+# +--------------+----------------------------------+
+# | fc00::100 | End.DT6 table 255 (local) |
+# +--------------+----------------------------------+
+
+source lib.sh
+
+readonly SID="fc00::100"
+readonly DEST="cafe::1"
+
+readonly SRC_MAC="02:00:00:00:00:01"
+readonly RTR_R0_MAC="02:00:00:00:00:02"
+readonly RTR_R1_MAC="02:00:00:00:00:03"
+readonly DST_MAC="02:00:00:00:00:04"
+
+cleanup()
+{
+ cleanup_ns "${NS_SRC}" "${NS_RTR}" "${NS_DST}"
+}
+
+check_prerequisites()
+{
+ if ! command -v ip &>/dev/null; then
+ echo "SKIP: ip tool not found"
+ exit "${ksft_skip}"
+ fi
+
+ if ! command -v ping &>/dev/null; then
+ echo "SKIP: ping not found"
+ exit "${ksft_skip}"
+ fi
+
+ if ! command -v sysctl &>/dev/null; then
+ echo "SKIP: sysctl not found"
+ exit "${ksft_skip}"
+ fi
+
+ if ! command -v taskset &>/dev/null; then
+ echo "SKIP: taskset not found"
+ exit "${ksft_skip}"
+ fi
+}
+
+setup()
+{
+ setup_ns NS_SRC NS_RTR NS_DST
+
+ ip link add veth-s0 netns "${NS_SRC}" type veth \
+ peer name veth-r0 netns "${NS_RTR}"
+ ip link add veth-r1 netns "${NS_RTR}" type veth \
+ peer name veth-d0 netns "${NS_DST}"
+
+ ip -n "${NS_SRC}" link set veth-s0 address "${SRC_MAC}"
+ ip -n "${NS_RTR}" link set veth-r0 address "${RTR_R0_MAC}"
+ ip -n "${NS_RTR}" link set veth-r1 address "${RTR_R1_MAC}"
+ ip -n "${NS_DST}" link set veth-d0 address "${DST_MAC}"
+
+ # ns_src
+ ip -n "${NS_SRC}" link set veth-s0 up
+ ip -n "${NS_SRC}" addr add fd00::1/64 dev veth-s0 nodad
+ ip -n "${NS_SRC}" -6 route add "${DEST}"/128 via fd00::2
+
+ # ns_router
+ ip -n "${NS_RTR}" link set veth-r0 up
+ ip -n "${NS_RTR}" addr add fd00::2/64 dev veth-r0 nodad
+ ip -n "${NS_RTR}" link set veth-r1 up
+ ip -n "${NS_RTR}" addr add fd01::1/64 dev veth-r1 nodad
+ ip netns exec "${NS_RTR}" sysctl -qw net.ipv6.conf.all.forwarding=1
+
+ ip -n "${NS_RTR}" -6 route add "${DEST}"/128 \
+ encap seg6 mode encap segs "${SID}" dev veth-r0
+ ip -n "${NS_RTR}" -6 route add "${SID}"/128 table 100 \
+ via fd01::2 dev veth-r1
+ ip -n "${NS_RTR}" -6 route add blackhole "${SID}"/128
+ ip -n "${NS_RTR}" -6 rule add iif veth-r0 lookup 100
+
+ # ns_dst
+ ip -n "${NS_DST}" link set veth-d0 up
+ ip -n "${NS_DST}" addr add fd01::2/64 dev veth-d0 nodad
+ ip -n "${NS_DST}" addr add "${DEST}"/128 dev lo nodad
+ ip -n "${NS_DST}" -6 route add "${SID}"/128 \
+ encap seg6local action End.DT6 table 255 dev veth-d0
+ ip -n "${NS_DST}" -6 route add fd00::/64 via fd01::1
+
+ # static neighbors
+ ip -n "${NS_SRC}" -6 neigh add fd00::2 dev veth-s0 \
+ lladdr "${RTR_R0_MAC}" nud permanent
+ ip -n "${NS_RTR}" -6 neigh add fd00::1 dev veth-r0 \
+ lladdr "${SRC_MAC}" nud permanent
+ ip -n "${NS_RTR}" -6 neigh add fd01::2 dev veth-r1 \
+ lladdr "${DST_MAC}" nud permanent
+ ip -n "${NS_DST}" -6 neigh add fd01::1 dev veth-d0 \
+ lladdr "${RTR_R1_MAC}" nud permanent
+}
+
+test_cache_isolation()
+{
+ RET=0
+
+ # local ping with empty cache: must fail (SID is blackholed)
+ if ip netns exec "${NS_RTR}" taskset -c 0 \
+ ping -c 1 -W 2 "${DEST}" &>/dev/null; then
+ echo "SKIP: local ping succeeded, topology broken"
+ exit "${ksft_skip}"
+ fi
+
+ # forward from ns_src to populate the input cache
+ if ! ip netns exec "${NS_SRC}" taskset -c 0 \
+ ping -c 1 -W 2 "${DEST}" &>/dev/null; then
+ echo "SKIP: forwarded ping failed, topology broken"
+ exit "${ksft_skip}"
+ fi
+
+ # local ping again: must still fail; if the output path reuses
+ # the input cache, it bypasses the blackhole and the ping succeeds
+ if ip netns exec "${NS_RTR}" taskset -c 0 \
+ ping -c 1 -W 2 "${DEST}" &>/dev/null; then
+ echo "FAIL: output path used dst cached by input path"
+ RET="${ksft_fail}"
+ else
+ echo "PASS: output path dst_cache is independent"
+ fi
+
+ return "${RET}"
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+check_prerequisites
+setup
+test_cache_isolation
+exit "${RET}"