Skip to content

Commit 10742ef

Browse files
w1ldptrSaeed Mahameed
authored andcommitted
net/mlx5e: VF tunnel TX traffic offloading
When tunnel endpoint is on VF, driver still assumes that endpoint is on uplink and incorrectly configures encap rule offload according to that assumption. As a result, traffic is sent directly to the uplink and rules installed on representor of tunnel endpoint VF are ignored. Implement following changes to allow offloading tx traffic with tunnel endpoint on VF: - For tunneling flows perform route lookup on route and out devices pair. If out device is uplink and route device is VF of same physical port, then modify packet reg_c_0 metadata register (source port) with the value of VF vport. Use eswitch vhca_id->vport mapping introduced in one of previous patches in the series to obtain vport from route netdevice. - Recirculate encapsulated packets to VF vport in order to apply any flow rules installed on VF representor that match on encapsulated traffic. Only enable support for this functionality when all following conditions are true: - Hardware advertises capability to preserve reg_c_0 value on packet recirculation. - Vport metadata matching is enabled. - Termination tables are to be used by the flow. Example TC rules for VF tunnel traffic: 1. Rule that redirects packets from UL to VF rep that has the tunnel endpoint IP address: $ tc -s filter show dev enp8s0f0 ingress filter protocol ip pref 4 flower chain 0 filter protocol ip pref 4 flower chain 0 handle 0x1 dst_mac 16:c9:a0:2d:69:2c src_mac 0c:42:a1:58:ab:e4 eth_type ipv4 ip_flags nofrag in_hw in_hw_count 1 action order 1: mirred (Egress Redirect to device enp8s0f0_0) stolen index 3 ref 1 bind 1 installed 377 sec used 0 sec Action statistics: Sent 114096 bytes 952 pkt (dropped 0, overlimits 0 requeues 0) Sent software 0 bytes 0 pkt Sent hardware 114096 bytes 952 pkt backlog 0b 0p requeues 0 cookie 878fa48d8c423fc08c3b6ca599b50a97 no_percpu used_hw_stats delayed 2. Rule that decapsulates the tunneled flow and redirects to destination VF representor: $ tc -s filter show dev vxlan_sys_4789 ingress filter protocol ip pref 4 flower chain 0 filter protocol ip pref 4 flower chain 0 handle 0x1 dst_mac ca:2e:a7:3f:f5:0f src_mac 0a:40:bd:30:89:99 eth_type ipv4 enc_dst_ip 7.7.7.5 enc_src_ip 7.7.7.1 enc_key_id 98 enc_dst_port 4789 enc_tos 0 ip_flags nofrag in_hw in_hw_count 1 action order 1: tunnel_key unset pipe index 2 ref 1 bind 1 installed 434 sec used 434 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 used_hw_stats delayed action order 2: mirred (Egress Redirect to device enp8s0f0_1) stolen index 4 ref 1 bind 1 installed 434 sec used 0 sec Action statistics: Sent 129936 bytes 1082 pkt (dropped 0, overlimits 0 requeues 0) Sent software 0 bytes 0 pkt Sent hardware 129936 bytes 1082 pkt backlog 0b 0p requeues 0 cookie ac17cf398c4c69e4a5b2f7aabd1b88ff no_percpu used_hw_stats delayed Co-developed-by: Dmytro Linkin <[email protected]> Signed-off-by: Dmytro Linkin <[email protected]> Signed-off-by: Vlad Buslov <[email protected]> Reviewed-by: Roi Dayan <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent 9e51c0a commit 10742ef

File tree

5 files changed

+201
-11
lines changed

5 files changed

+201
-11
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
165165
.moffset = 0,
166166
.mlen = 2,
167167
},
168+
[VPORT_TO_REG] = {
169+
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
170+
.moffset = 2,
171+
.mlen = 2,
172+
},
168173
[TUNNEL_TO_REG] = {
169174
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
170175
.moffset = 1,
@@ -1315,6 +1320,44 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
13151320
mutex_unlock(&uplink_priv->unready_flows_lock);
13161321
}
13171322

1323+
static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
1324+
1325+
static bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1326+
{
1327+
struct mlx5_core_dev *out_mdev, *route_mdev;
1328+
struct mlx5e_priv *out_priv, *route_priv;
1329+
1330+
out_priv = netdev_priv(out_dev);
1331+
out_mdev = out_priv->mdev;
1332+
route_priv = netdev_priv(route_dev);
1333+
route_mdev = route_priv->mdev;
1334+
1335+
if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
1336+
route_mdev->coredev_type != MLX5_COREDEV_VF)
1337+
return false;
1338+
1339+
return same_hw_devs(out_priv, route_priv);
1340+
}
1341+
1342+
static int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev,
1343+
u16 *vport)
1344+
{
1345+
struct mlx5e_priv *out_priv, *route_priv;
1346+
struct mlx5_core_dev *route_mdev;
1347+
struct mlx5_eswitch *esw;
1348+
u16 vhca_id;
1349+
int err;
1350+
1351+
out_priv = netdev_priv(out_dev);
1352+
esw = out_priv->mdev->priv.eswitch;
1353+
route_priv = netdev_priv(route_dev);
1354+
route_mdev = route_priv->mdev;
1355+
1356+
vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1357+
err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1358+
return err;
1359+
}
1360+
13181361
static int
13191362
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
13201363
struct mlx5e_tc_flow *flow,
@@ -3700,6 +3743,45 @@ static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
37003743
return false;
37013744
}
37023745

3746+
static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
3747+
struct mlx5_flow_attr *attr,
3748+
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
3749+
struct net_device *out_dev,
3750+
int route_dev_ifindex,
3751+
int out_index)
3752+
{
3753+
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
3754+
struct net_device *route_dev;
3755+
u16 vport_num;
3756+
int err = 0;
3757+
u32 data;
3758+
3759+
route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
3760+
3761+
if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
3762+
!mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
3763+
goto out;
3764+
3765+
err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
3766+
if (err)
3767+
goto out;
3768+
3769+
attr->dest_chain = 0;
3770+
attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3771+
esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
3772+
data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
3773+
vport_num);
3774+
err = mlx5e_tc_match_to_reg_set(esw->dev, mod_hdr_acts,
3775+
MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, data);
3776+
if (err)
3777+
goto out;
3778+
3779+
out:
3780+
if (route_dev)
3781+
dev_put(route_dev);
3782+
return err;
3783+
}
3784+
37033785
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
37043786
struct mlx5e_tc_flow *flow,
37053787
struct net_device *mirred_dev,
@@ -3791,6 +3873,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
37913873
e->compl_result = 1;
37923874

37933875
attach_flow:
3876+
err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
3877+
e->route_dev_ifindex, out_index);
3878+
if (err)
3879+
goto out_err;
3880+
37943881
flow->encaps[out_index].e = e;
37953882
list_add(&flow->encaps[out_index].list, &e->flows);
37963883
flow->encaps[out_index].index = out_index;

drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
167167

168168
enum mlx5e_tc_attr_to_reg {
169169
CHAIN_TO_REG,
170+
VPORT_TO_REG,
170171
TUNNEL_TO_REG,
171172
CTSTATE_TO_REG,
172173
ZONE_TO_REG,

drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,12 +389,14 @@ enum mlx5_flow_match_level {
389389
enum {
390390
MLX5_ESW_DEST_ENCAP = BIT(0),
391391
MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
392+
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2),
392393
};
393394

394395
enum {
395396
MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0),
396397
MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1),
397398
MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2),
399+
MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3),
398400
};
399401

400402
struct mlx5_esw_flow_attr {

drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

Lines changed: 109 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,65 @@ esw_setup_chain_dest(struct mlx5_flow_destination *dest,
337337
return 0;
338338
}
339339

340+
static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
341+
int from, int to)
342+
{
343+
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
344+
struct mlx5_fs_chains *chains = esw_chains(esw);
345+
int i;
346+
347+
for (i = from; i < to; i++)
348+
if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
349+
mlx5_chains_put_table(chains, 0, 1, 0);
350+
}
351+
352+
static bool
353+
esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr)
354+
{
355+
int i;
356+
357+
for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
358+
if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
359+
return true;
360+
return false;
361+
}
362+
363+
static int
364+
esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
365+
struct mlx5_flow_act *flow_act,
366+
struct mlx5_eswitch *esw,
367+
struct mlx5_fs_chains *chains,
368+
struct mlx5_flow_attr *attr,
369+
int *i)
370+
{
371+
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
372+
int j, err;
373+
374+
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
375+
return -EOPNOTSUPP;
376+
377+
for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
378+
err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
379+
if (err)
380+
goto err_setup_chain;
381+
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
382+
flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat;
383+
}
384+
return 0;
385+
386+
err_setup_chain:
387+
esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
388+
return err;
389+
}
390+
391+
static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw,
392+
struct mlx5_flow_attr *attr)
393+
{
394+
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
395+
396+
esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
397+
}
398+
340399
static void
341400
esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level)
342401
{
@@ -381,12 +440,18 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
381440
struct mlx5_flow_act *flow_act,
382441
struct mlx5_eswitch *esw,
383442
struct mlx5_flow_attr *attr,
443+
struct mlx5_flow_spec *spec,
384444
int *i)
385445
{
386446
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
387447
struct mlx5_fs_chains *chains = esw_chains(esw);
388448
int err = 0;
389449

450+
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
451+
MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) &&
452+
mlx5_eswitch_vport_match_metadata_enabled(esw))
453+
attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
454+
390455
if (attr->dest_ft) {
391456
esw_setup_ft_dest(dest, flow_act, attr, *i);
392457
(*i)++;
@@ -397,6 +462,8 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
397462
err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
398463
1, 0, *i);
399464
(*i)++;
465+
} else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) {
466+
err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i);
400467
} else {
401468
*i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i);
402469
}
@@ -408,10 +475,15 @@ static void
408475
esw_cleanup_dests(struct mlx5_eswitch *esw,
409476
struct mlx5_flow_attr *attr)
410477
{
478+
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
411479
struct mlx5_fs_chains *chains = esw_chains(esw);
412480

413-
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain)
414-
esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
481+
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) {
482+
if (attr->dest_chain)
483+
esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
484+
else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
485+
esw_cleanup_chain_src_port_rewrite(esw, attr);
486+
}
415487
}
416488

417489
struct mlx5_flow_handle *
@@ -448,10 +520,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
448520
}
449521
}
450522

523+
mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
524+
451525
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
452526
int err;
453527

454-
err = esw_setup_dests(dest, &flow_act, esw, attr, &i);
528+
err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i);
455529
if (err) {
456530
rule = ERR_PTR(err);
457531
goto err_create_goto_table;
@@ -498,8 +572,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
498572
goto err_esw_get;
499573
}
500574

501-
mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
502-
503575
if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
504576
rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
505577
&flow_act, dest, i);
@@ -536,7 +608,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
536608
struct mlx5_flow_table *fast_fdb;
537609
struct mlx5_flow_table *fwd_fdb;
538610
struct mlx5_flow_handle *rule;
539-
int i;
611+
int i, err = 0;
540612

541613
fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
542614
if (IS_ERR(fast_fdb)) {
@@ -554,8 +626,18 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
554626
}
555627

556628
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
557-
for (i = 0; i < esw_attr->split_count; i++)
558-
esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
629+
for (i = 0; i < esw_attr->split_count; i++) {
630+
if (esw_is_chain_src_port_rewrite(esw, esw_attr))
631+
err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr,
632+
&i);
633+
else
634+
esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
635+
636+
if (err) {
637+
rule = ERR_PTR(err);
638+
goto err_chain_src_rewrite;
639+
}
640+
}
559641
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
560642
dest[i].ft = fwd_fdb;
561643
i++;
@@ -570,13 +652,16 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
570652
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
571653
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
572654

573-
if (IS_ERR(rule))
574-
goto add_err;
655+
if (IS_ERR(rule)) {
656+
i = esw_attr->split_count;
657+
goto err_chain_src_rewrite;
658+
}
575659

576660
atomic64_inc(&esw->offloads.num_flows);
577661

578662
return rule;
579-
add_err:
663+
err_chain_src_rewrite:
664+
esw_put_dest_tables_loop(esw, attr, 0, i);
580665
esw_vport_tbl_put(esw, &fwd_attr);
581666
err_get_fwd:
582667
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
@@ -617,6 +702,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
617702
if (fwd_rule) {
618703
esw_vport_tbl_put(esw, &fwd_attr);
619704
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
705+
esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
620706
} else {
621707
if (split)
622708
esw_vport_tbl_put(esw, &fwd_attr);
@@ -3020,3 +3106,15 @@ int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vp
30203106
*vport_num = *res;
30213107
return 0;
30223108
}
3109+
3110+
u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
3111+
u16 vport_num)
3112+
{
3113+
struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
3114+
3115+
if (WARN_ON_ONCE(IS_ERR(vport)))
3116+
return 0;
3117+
3118+
return vport->metadata;
3119+
}
3120+
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set);

include/linux/mlx5/eswitch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ static inline u32 mlx5_eswitch_get_vport_metadata_mask(void)
9696

9797
u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
9898
u16 vport_num);
99+
u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
100+
u16 vport_num);
99101
u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
100102
#else /* CONFIG_MLX5_ESWITCH */
101103

0 commit comments

Comments
 (0)