Skip to content

Commit b958451

Browse files
mikijoyjgunthorpe
authored andcommitted
RDMA/mlx5: Change the cache structure to an RB-tree
Currently, the cache structure is a static linear array. Therefore, his size is limited to the number of entries in it and is not expandable. The entries are dedicated to mkeys of size 2^x and no access_flags. Mkeys with different properties are not cacheable. In this patch, we change the cache structure to an RB-tree. This will allow to extend the cache to support more entries with different mkey properties. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Michael Guralnik <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 18b1746 commit b958451

File tree

3 files changed

+132
-47
lines changed

3 files changed

+132
-47
lines changed

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,8 @@ struct mlx5_cache_ent {
741741
u32 access_mode;
742742
unsigned int ndescs;
743743

744+
struct rb_node node;
745+
744746
u8 disabled:1;
745747
u8 fill_to_high_water:1;
746748

@@ -770,8 +772,9 @@ struct mlx5r_async_create_mkey {
770772

771773
struct mlx5_mkey_cache {
772774
struct workqueue_struct *wq;
773-
struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES];
774-
struct dentry *root;
775+
struct rb_root rb_root;
776+
struct mutex rb_lock;
777+
struct dentry *fs_root;
775778
unsigned long last_add;
776779
};
777780

@@ -1316,11 +1319,15 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
13161319
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
13171320
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
13181321
int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
1322+
struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
1323+
int order);
13191324

13201325
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
13211326
struct mlx5_cache_ent *ent,
13221327
int access_flags);
13231328

1329+
struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, u32 order,
1330+
int access_flags);
13241331
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
13251332
struct ib_mr_status *mr_status);
13261333
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 120 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -515,18 +515,22 @@ static const struct file_operations limit_fops = {
515515

516516
static bool someone_adding(struct mlx5_mkey_cache *cache)
517517
{
518-
unsigned int i;
519-
520-
for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
521-
struct mlx5_cache_ent *ent = &cache->ent[i];
522-
bool ret;
518+
struct mlx5_cache_ent *ent;
519+
struct rb_node *node;
520+
bool ret;
523521

522+
mutex_lock(&cache->rb_lock);
523+
for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
524+
ent = rb_entry(node, struct mlx5_cache_ent, node);
524525
xa_lock_irq(&ent->mkeys);
525526
ret = ent->stored < ent->limit;
526527
xa_unlock_irq(&ent->mkeys);
527-
if (ret)
528+
if (ret) {
529+
mutex_unlock(&cache->rb_lock);
528530
return true;
531+
}
529532
}
533+
mutex_unlock(&cache->rb_lock);
530534
return false;
531535
}
532536

@@ -637,6 +641,59 @@ static void delayed_cache_work_func(struct work_struct *work)
637641
__cache_work_func(ent);
638642
}
639643

644+
static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
645+
struct mlx5_cache_ent *ent)
646+
{
647+
struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
648+
struct mlx5_cache_ent *cur;
649+
650+
mutex_lock(&cache->rb_lock);
651+
/* Figure out where to put new node */
652+
while (*new) {
653+
cur = rb_entry(*new, struct mlx5_cache_ent, node);
654+
parent = *new;
655+
if (ent->order < cur->order)
656+
new = &((*new)->rb_left);
657+
if (ent->order > cur->order)
658+
new = &((*new)->rb_right);
659+
if (ent->order == cur->order) {
660+
mutex_unlock(&cache->rb_lock);
661+
return -EEXIST;
662+
}
663+
}
664+
665+
/* Add new node and rebalance tree. */
666+
rb_link_node(&ent->node, parent, new);
667+
rb_insert_color(&ent->node, &cache->rb_root);
668+
669+
mutex_unlock(&cache->rb_lock);
670+
return 0;
671+
}
672+
673+
static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
674+
unsigned int order)
675+
{
676+
struct rb_node *node = dev->cache.rb_root.rb_node;
677+
struct mlx5_cache_ent *cur, *smallest = NULL;
678+
679+
/*
680+
* Find the smallest ent with order >= requested_order.
681+
*/
682+
while (node) {
683+
cur = rb_entry(node, struct mlx5_cache_ent, node);
684+
if (cur->order > order) {
685+
smallest = cur;
686+
node = node->rb_left;
687+
}
688+
if (cur->order < order)
689+
node = node->rb_right;
690+
if (cur->order == order)
691+
return cur;
692+
}
693+
694+
return smallest;
695+
}
696+
640697
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
641698
struct mlx5_cache_ent *ent,
642699
int access_flags)
@@ -677,10 +734,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
677734
return mr;
678735
}
679736

680-
static void clean_keys(struct mlx5_ib_dev *dev, int c)
737+
struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev,
738+
u32 order, int access_flags)
739+
{
740+
struct mlx5_cache_ent *ent = mkey_cache_ent_from_order(dev, order);
741+
742+
return mlx5_mr_cache_alloc(dev, ent, access_flags);
743+
}
744+
745+
static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
681746
{
682-
struct mlx5_mkey_cache *cache = &dev->cache;
683-
struct mlx5_cache_ent *ent = &cache->ent[c];
684747
u32 mkey;
685748

686749
cancel_delayed_work(&ent->dwork);
@@ -699,8 +762,8 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
699762
if (!mlx5_debugfs_root || dev->is_rep)
700763
return;
701764

702-
debugfs_remove_recursive(dev->cache.root);
703-
dev->cache.root = NULL;
765+
debugfs_remove_recursive(dev->cache.fs_root);
766+
dev->cache.fs_root = NULL;
704767
}
705768

706769
static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
@@ -713,12 +776,13 @@ static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
713776
if (!mlx5_debugfs_root || dev->is_rep)
714777
return;
715778

716-
cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
779+
dir = mlx5_debugfs_get_dev_root(dev->mdev);
780+
cache->fs_root = debugfs_create_dir("mr_cache", dir);
717781

718782
for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
719-
ent = &cache->ent[i];
783+
ent = mkey_cache_ent_from_order(dev, i);
720784
sprintf(ent->name, "%d", ent->order);
721-
dir = debugfs_create_dir(ent->name, cache->root);
785+
dir = debugfs_create_dir(ent->name, cache->fs_root);
722786
debugfs_create_file("size", 0600, dir, ent, &size_fops);
723787
debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
724788
debugfs_create_ulong("cur", 0400, dir, &ent->stored);
@@ -733,13 +797,39 @@ static void delay_time_func(struct timer_list *t)
733797
WRITE_ONCE(dev->fill_delay, 0);
734798
}
735799

800+
struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
801+
int order)
802+
{
803+
struct mlx5_cache_ent *ent;
804+
int ret;
805+
806+
ent = kzalloc(sizeof(*ent), GFP_KERNEL);
807+
if (!ent)
808+
return ERR_PTR(-ENOMEM);
809+
810+
xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
811+
ent->order = order;
812+
ent->dev = dev;
813+
814+
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
815+
816+
ret = mlx5_cache_ent_insert(&dev->cache, ent);
817+
if (ret) {
818+
kfree(ent);
819+
return ERR_PTR(ret);
820+
}
821+
return ent;
822+
}
823+
736824
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
737825
{
738826
struct mlx5_mkey_cache *cache = &dev->cache;
739827
struct mlx5_cache_ent *ent;
740828
int i;
741829

742830
mutex_init(&dev->slow_path_mutex);
831+
mutex_init(&dev->cache.rb_lock);
832+
dev->cache.rb_root = RB_ROOT;
743833
cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
744834
if (!cache->wq) {
745835
mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -749,13 +839,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
749839
mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
750840
timer_setup(&dev->delay_timer, delay_time_func, 0);
751841
for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
752-
ent = &cache->ent[i];
753-
xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
754-
ent->order = i + 2;
755-
ent->dev = dev;
756-
ent->limit = 0;
757-
758-
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
842+
ent = mlx5r_cache_create_ent(dev, i);
759843

760844
if (i > MKEY_CACHE_LAST_STD_ENTRY) {
761845
mlx5_odp_init_mkey_cache_entry(ent);
@@ -785,14 +869,16 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
785869

786870
int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
787871
{
788-
unsigned int i;
872+
struct rb_root *root = &dev->cache.rb_root;
873+
struct mlx5_cache_ent *ent;
874+
struct rb_node *node;
789875

790876
if (!dev->cache.wq)
791877
return 0;
792878

793-
for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
794-
struct mlx5_cache_ent *ent = &dev->cache.ent[i];
795-
879+
mutex_lock(&dev->cache.rb_lock);
880+
for (node = rb_first(root); node; node = rb_next(node)) {
881+
ent = rb_entry(node, struct mlx5_cache_ent, node);
796882
xa_lock_irq(&ent->mkeys);
797883
ent->disabled = true;
798884
xa_unlock_irq(&ent->mkeys);
@@ -802,8 +888,15 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
802888
mlx5_mkey_cache_debugfs_cleanup(dev);
803889
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
804890

805-
for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
806-
clean_keys(dev, i);
891+
node = rb_first(root);
892+
while (node) {
893+
ent = rb_entry(node, struct mlx5_cache_ent, node);
894+
node = rb_next(node);
895+
clean_keys(dev, ent);
896+
rb_erase(&ent->node, root);
897+
kfree(ent);
898+
}
899+
mutex_unlock(&dev->cache.rb_lock);
807900

808901
destroy_workqueue(dev->cache.wq);
809902
del_timer_sync(&dev->delay_timer);
@@ -876,19 +969,6 @@ static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
876969
return MLX5_MAX_UMR_SHIFT;
877970
}
878971

879-
static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
880-
unsigned int order)
881-
{
882-
struct mlx5_mkey_cache *cache = &dev->cache;
883-
884-
if (order < cache->ent[0].order)
885-
return &cache->ent[0];
886-
order = order - cache->ent[0].order;
887-
if (order > MKEY_CACHE_LAST_STD_ENTRY)
888-
return NULL;
889-
return &cache->ent[order];
890-
}
891-
892972
static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
893973
u64 length, int access_flags, u64 iova)
894974
{

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -419,8 +419,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
419419
return ERR_CAST(odp);
420420

421421
BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY);
422-
mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[order],
423-
imr->access_flags);
422+
mr = mlx5_mr_cache_alloc_order(dev, order, imr->access_flags);
424423
if (IS_ERR(mr)) {
425424
ib_umem_odp_release(odp);
426425
return mr;
@@ -494,9 +493,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
494493
if (IS_ERR(umem_odp))
495494
return ERR_CAST(umem_odp);
496495

497-
imr = mlx5_mr_cache_alloc(dev,
498-
&dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY],
499-
access_flags);
496+
imr = mlx5_mr_cache_alloc_order(dev, MLX5_IMR_KSM_CACHE_ENTRY,
497+
access_flags);
500498
if (IS_ERR(imr)) {
501499
ib_umem_odp_release(umem_odp);
502500
return imr;

0 commit comments

Comments
 (0)