Skip to content

Commit a04a480

Browse files
David Aherndavem330
authored andcommitted
net: Require exact match for TCP socket lookups if dif is l3mdev
Currently, socket lookups for l3mdev (vrf) use cases can match a socket that is bound to a port but not a device (ie., a global socket). If the sysctl tcp_l3mdev_accept is not set this leads to ack packets going out based on the main table even though the packet came in from an L3 domain. The end result is that the connection does not establish creating confusion for users since the service is running and a socket shows in ss output. Fix by requiring an exact dif to sk_bound_dev_if match if the skb came through an interface enslaved to an l3mdev device and the tcp_l3mdev_accept is not set. skb's through an l3mdev interface are marked by setting a flag in inet{6}_skb_parm. The IPv6 variant is already set; this patch adds the flag for IPv4. Using an skb flag avoids a device lookup on the dif. The flag is set in the VRF driver using the IP{6}CB macros. For IPv4, the inet_skb_parm struct is moved in the cb per commit 971f10e, so the match function in the TCP stack needs to use TCP_SKB_CB. For IPv6, the move is done after the socket lookup, so IP6CB is used. The flags field in inet_skb_parm struct needs to be increased to add another flag. There is currently a 1-byte hole following the flags, so it can be expanded to u16 without increasing the size of the struct. Fixes: 193125d ("net: Introduce VRF device driver") Signed-off-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent fb5c6cf commit a04a480

File tree

6 files changed

+44
-11
lines changed

6 files changed

+44
-11
lines changed

drivers/net/vrf.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
956956
if (skb->pkt_type == PACKET_LOOPBACK) {
957957
skb->dev = vrf_dev;
958958
skb->skb_iif = vrf_dev->ifindex;
959+
IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
959960
skb->pkt_type = PACKET_HOST;
960961
goto out;
961962
}
@@ -996,6 +997,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
996997
{
997998
skb->dev = vrf_dev;
998999
skb->skb_iif = vrf_dev->ifindex;
1000+
IPCB(skb)->flags |= IPSKB_L3SLAVE;
9991001

10001002
/* loopback traffic; do not push through packet taps again.
10011003
* Reset pkt_type for upper layers to process skb

include/linux/ipv6.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,12 @@ struct inet6_skb_parm {
123123
};
124124

125125
#if defined(CONFIG_NET_L3_MASTER_DEV)
126-
static inline bool skb_l3mdev_slave(__u16 flags)
126+
static inline bool ipv6_l3mdev_skb(__u16 flags)
127127
{
128128
return flags & IP6SKB_L3SLAVE;
129129
}
130130
#else
131-
static inline bool skb_l3mdev_slave(__u16 flags)
131+
static inline bool ipv6_l3mdev_skb(__u16 flags)
132132
{
133133
return false;
134134
}
@@ -139,11 +139,22 @@ static inline bool skb_l3mdev_slave(__u16 flags)
139139

140140
static inline int inet6_iif(const struct sk_buff *skb)
141141
{
142-
bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags);
142+
bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags);
143143

144144
return l3_slave ? skb->skb_iif : IP6CB(skb)->iif;
145145
}
146146

147+
/* can not be used in TCP layer after tcp_v6_fill_cb */
148+
static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
149+
{
150+
#if defined(CONFIG_NET_L3_MASTER_DEV)
151+
if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
152+
ipv6_l3mdev_skb(IP6CB(skb)->flags))
153+
return true;
154+
#endif
155+
return false;
156+
}
157+
147158
struct tcp6_request_sock {
148159
struct tcp_request_sock tcp6rsk_tcp;
149160
};

include/net/ip.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ struct sock;
3838
struct inet_skb_parm {
3939
int iif;
4040
struct ip_options opt; /* Compiled IP options */
41-
unsigned char flags;
41+
u16 flags;
4242

4343
#define IPSKB_FORWARDED BIT(0)
4444
#define IPSKB_XFRM_TUNNEL_SIZE BIT(1)
@@ -48,10 +48,16 @@ struct inet_skb_parm {
4848
#define IPSKB_DOREDIRECT BIT(5)
4949
#define IPSKB_FRAG_PMTU BIT(6)
5050
#define IPSKB_FRAG_SEGS BIT(7)
51+
#define IPSKB_L3SLAVE BIT(8)
5152

5253
u16 frag_max_size;
5354
};
5455

56+
static inline bool ipv4_l3mdev_skb(u16 flags)
57+
{
58+
return !!(flags & IPSKB_L3SLAVE);
59+
}
60+
5561
static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
5662
{
5763
return ip_hdr(skb)->ihl * 4;

include/net/tcp.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,12 +794,23 @@ struct tcp_skb_cb {
794794
*/
795795
static inline int tcp_v6_iif(const struct sk_buff *skb)
796796
{
797-
bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags);
797+
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
798798

799799
return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
800800
}
801801
#endif
802802

803+
/* TCP_SKB_CB reference means this can not be used from early demux */
804+
static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
805+
{
806+
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
807+
if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
808+
ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
809+
return true;
810+
#endif
811+
return false;
812+
}
813+
803814
/* Due to TSO, an SKB can be composed of multiple actual
804815
* packets. To keep these tracked properly, we use this.
805816
*/

net/ipv4/inet_hashtables.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <net/inet_hashtables.h>
2626
#include <net/secure_seq.h>
2727
#include <net/ip.h>
28+
#include <net/tcp.h>
2829
#include <net/sock_reuseport.h>
2930

3031
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
@@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
172173

173174
static inline int compute_score(struct sock *sk, struct net *net,
174175
const unsigned short hnum, const __be32 daddr,
175-
const int dif)
176+
const int dif, bool exact_dif)
176177
{
177178
int score = -1;
178179
struct inet_sock *inet = inet_sk(sk);
@@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
186187
return -1;
187188
score += 4;
188189
}
189-
if (sk->sk_bound_dev_if) {
190+
if (sk->sk_bound_dev_if || exact_dif) {
190191
if (sk->sk_bound_dev_if != dif)
191192
return -1;
192193
score += 4;
@@ -215,11 +216,12 @@ struct sock *__inet_lookup_listener(struct net *net,
215216
unsigned int hash = inet_lhashfn(net, hnum);
216217
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
217218
int score, hiscore = 0, matches = 0, reuseport = 0;
219+
bool exact_dif = inet_exact_dif_match(net, skb);
218220
struct sock *sk, *result = NULL;
219221
u32 phash = 0;
220222

221223
sk_for_each_rcu(sk, &ilb->head) {
222-
score = compute_score(sk, net, hnum, daddr, dif);
224+
score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
223225
if (score > hiscore) {
224226
reuseport = sk->sk_reuseport;
225227
if (reuseport) {

net/ipv6/inet6_hashtables.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
9696
static inline int compute_score(struct sock *sk, struct net *net,
9797
const unsigned short hnum,
9898
const struct in6_addr *daddr,
99-
const int dif)
99+
const int dif, bool exact_dif)
100100
{
101101
int score = -1;
102102

@@ -109,7 +109,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
109109
return -1;
110110
score++;
111111
}
112-
if (sk->sk_bound_dev_if) {
112+
if (sk->sk_bound_dev_if || exact_dif) {
113113
if (sk->sk_bound_dev_if != dif)
114114
return -1;
115115
score++;
@@ -131,11 +131,12 @@ struct sock *inet6_lookup_listener(struct net *net,
131131
unsigned int hash = inet_lhashfn(net, hnum);
132132
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
133133
int score, hiscore = 0, matches = 0, reuseport = 0;
134+
bool exact_dif = inet6_exact_dif_match(net, skb);
134135
struct sock *sk, *result = NULL;
135136
u32 phash = 0;
136137

137138
sk_for_each(sk, &ilb->head) {
138-
score = compute_score(sk, net, hnum, daddr, dif);
139+
score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
139140
if (score > hiscore) {
140141
reuseport = sk->sk_reuseport;
141142
if (reuseport) {

0 commit comments

Comments
 (0)