Skip to content

Commit 32086d8

Browse files
hnazsfrothwell
authored andcommitted
mm: memcontrol: per-lruvec stats infrastructure
lruvecs are at the intersection of the NUMA node and memcg, which is the scope for most paging activity. Introduce a convenient accounting infrastructure that maintains statistics per node, per memcg, and the lruvec itself. Then convert over accounting sites for statistics that are already tracked in both nodes and memcgs and can be easily switched. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Johannes Weiner <[email protected]> Acked-by: Vladimir Davydov <[email protected]> Cc: Josef Bacik <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Rik van Riel <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 9766ee9 commit 32086d8

File tree

6 files changed

+225
-52
lines changed

6 files changed

+225
-52
lines changed

include/linux/memcontrol.h

Lines changed: 208 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
#include <linux/page_counter.h>
2727
#include <linux/vmpressure.h>
2828
#include <linux/eventfd.h>
29-
#include <linux/mmzone.h>
29+
#include <linux/mm.h>
30+
#include <linux/vmstat.h>
3031
#include <linux/writeback.h>
3132
#include <linux/page-flags.h>
3233

@@ -98,11 +99,16 @@ struct mem_cgroup_reclaim_iter {
9899
unsigned int generation;
99100
};
100101

102+
struct lruvec_stat {
103+
long count[NR_VM_NODE_STAT_ITEMS];
104+
};
105+
101106
/*
102107
* per-zone information in memory controller.
103108
*/
104109
struct mem_cgroup_per_node {
105110
struct lruvec lruvec;
111+
struct lruvec_stat __percpu *lruvec_stat;
106112
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
107113

108114
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
@@ -496,23 +502,18 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
496502
return val;
497503
}
498504

499-
static inline void mod_memcg_state(struct mem_cgroup *memcg,
500-
enum memcg_stat_item idx, int val)
505+
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
506+
enum memcg_stat_item idx, int val)
501507
{
502508
if (!mem_cgroup_disabled())
503-
this_cpu_add(memcg->stat->count[idx], val);
504-
}
505-
506-
static inline void inc_memcg_state(struct mem_cgroup *memcg,
507-
enum memcg_stat_item idx)
508-
{
509-
mod_memcg_state(memcg, idx, 1);
509+
__this_cpu_add(memcg->stat->count[idx], val);
510510
}
511511

512-
static inline void dec_memcg_state(struct mem_cgroup *memcg,
513-
enum memcg_stat_item idx)
512+
static inline void mod_memcg_state(struct mem_cgroup *memcg,
513+
enum memcg_stat_item idx, int val)
514514
{
515-
mod_memcg_state(memcg, idx, -1);
515+
if (!mem_cgroup_disabled())
516+
this_cpu_add(memcg->stat->count[idx], val);
516517
}
517518

518519
/**
@@ -532,23 +533,82 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg,
532533
*
533534
* Kernel pages are an exception to this, since they'll never move.
534535
*/
536+
static inline void __mod_memcg_page_state(struct page *page,
537+
enum memcg_stat_item idx, int val)
538+
{
539+
if (page->mem_cgroup)
540+
__mod_memcg_state(page->mem_cgroup, idx, val);
541+
}
542+
535543
static inline void mod_memcg_page_state(struct page *page,
536544
enum memcg_stat_item idx, int val)
537545
{
538546
if (page->mem_cgroup)
539547
mod_memcg_state(page->mem_cgroup, idx, val);
540548
}
541549

542-
static inline void inc_memcg_page_state(struct page *page,
543-
enum memcg_stat_item idx)
550+
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
551+
enum node_stat_item idx)
544552
{
545-
mod_memcg_page_state(page, idx, 1);
553+
struct mem_cgroup_per_node *pn;
554+
long val = 0;
555+
int cpu;
556+
557+
if (mem_cgroup_disabled())
558+
return node_page_state(lruvec_pgdat(lruvec), idx);
559+
560+
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
561+
for_each_possible_cpu(cpu)
562+
val += per_cpu(pn->lruvec_stat->count[idx], cpu);
563+
564+
if (val < 0)
565+
val = 0;
566+
567+
return val;
546568
}
547569

548-
static inline void dec_memcg_page_state(struct page *page,
549-
enum memcg_stat_item idx)
570+
static inline void __mod_lruvec_state(struct lruvec *lruvec,
571+
enum node_stat_item idx, int val)
550572
{
551-
mod_memcg_page_state(page, idx, -1);
573+
struct mem_cgroup_per_node *pn;
574+
575+
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
576+
if (mem_cgroup_disabled())
577+
return;
578+
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
579+
__mod_memcg_state(pn->memcg, idx, val);
580+
__this_cpu_add(pn->lruvec_stat->count[idx], val);
581+
}
582+
583+
static inline void mod_lruvec_state(struct lruvec *lruvec,
584+
enum node_stat_item idx, int val)
585+
{
586+
struct mem_cgroup_per_node *pn;
587+
588+
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
589+
if (mem_cgroup_disabled())
590+
return;
591+
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
592+
mod_memcg_state(pn->memcg, idx, val);
593+
this_cpu_add(pn->lruvec_stat->count[idx], val);
594+
}
595+
596+
static inline void __mod_lruvec_page_state(struct page *page,
597+
enum node_stat_item idx, int val)
598+
{
599+
struct lruvec *lruvec;
600+
601+
lruvec = mem_cgroup_lruvec(page_pgdat(page), page->mem_cgroup);
602+
__mod_lruvec_state(lruvec, idx, val);
603+
}
604+
605+
static inline void mod_lruvec_page_state(struct page *page,
606+
enum node_stat_item idx, int val)
607+
{
608+
struct lruvec *lruvec;
609+
610+
lruvec = mem_cgroup_lruvec(page_pgdat(page), page->mem_cgroup);
611+
mod_lruvec_state(lruvec, idx, val);
552612
}
553613

554614
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
@@ -777,19 +837,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
777837
return 0;
778838
}
779839

780-
static inline void mod_memcg_state(struct mem_cgroup *memcg,
781-
enum memcg_stat_item idx,
782-
int nr)
840+
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
841+
enum memcg_stat_item idx,
842+
int nr)
783843
{
784844
}
785845

786-
static inline void inc_memcg_state(struct mem_cgroup *memcg,
787-
enum memcg_stat_item idx)
846+
static inline void mod_memcg_state(struct mem_cgroup *memcg,
847+
enum memcg_stat_item idx,
848+
int nr)
788849
{
789850
}
790851

791-
static inline void dec_memcg_state(struct mem_cgroup *memcg,
792-
enum memcg_stat_item idx)
852+
static inline void __mod_memcg_page_state(struct page *page,
853+
enum memcg_stat_item idx,
854+
int nr)
793855
{
794856
}
795857

@@ -799,14 +861,34 @@ static inline void mod_memcg_page_state(struct page *page,
799861
{
800862
}
801863

802-
static inline void inc_memcg_page_state(struct page *page,
803-
enum memcg_stat_item idx)
864+
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
865+
enum node_stat_item idx)
804866
{
867+
return node_page_state(lruvec_pgdat(lruvec), idx);
805868
}
806869

807-
static inline void dec_memcg_page_state(struct page *page,
808-
enum memcg_stat_item idx)
870+
static inline void __mod_lruvec_state(struct lruvec *lruvec,
871+
enum node_stat_item idx, int val)
809872
{
873+
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
874+
}
875+
876+
static inline void mod_lruvec_state(struct lruvec *lruvec,
877+
enum node_stat_item idx, int val)
878+
{
879+
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
880+
}
881+
882+
static inline void __mod_lruvec_page_state(struct page *page,
883+
enum node_stat_item idx, int val)
884+
{
885+
__mod_node_page_state(page_pgdat(page), idx, val);
886+
}
887+
888+
static inline void mod_lruvec_page_state(struct page *page,
889+
enum node_stat_item idx, int val)
890+
{
891+
mod_node_page_state(page_pgdat(page), idx, val);
810892
}
811893

812894
static inline
@@ -838,6 +920,102 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
838920
}
839921
#endif /* CONFIG_MEMCG */
840922

923+
static inline void __inc_memcg_state(struct mem_cgroup *memcg,
924+
enum memcg_stat_item idx)
925+
{
926+
__mod_memcg_state(memcg, idx, 1);
927+
}
928+
929+
static inline void __dec_memcg_state(struct mem_cgroup *memcg,
930+
enum memcg_stat_item idx)
931+
{
932+
__mod_memcg_state(memcg, idx, -1);
933+
}
934+
935+
static inline void __inc_memcg_page_state(struct page *page,
936+
enum memcg_stat_item idx)
937+
{
938+
__mod_memcg_page_state(page, idx, 1);
939+
}
940+
941+
static inline void __dec_memcg_page_state(struct page *page,
942+
enum memcg_stat_item idx)
943+
{
944+
__mod_memcg_page_state(page, idx, -1);
945+
}
946+
947+
static inline void __inc_lruvec_state(struct lruvec *lruvec,
948+
enum node_stat_item idx)
949+
{
950+
__mod_lruvec_state(lruvec, idx, 1);
951+
}
952+
953+
static inline void __dec_lruvec_state(struct lruvec *lruvec,
954+
enum node_stat_item idx)
955+
{
956+
__mod_lruvec_state(lruvec, idx, -1);
957+
}
958+
959+
static inline void __inc_lruvec_page_state(struct page *page,
960+
enum node_stat_item idx)
961+
{
962+
__mod_lruvec_page_state(page, idx, 1);
963+
}
964+
965+
static inline void __dec_lruvec_page_state(struct page *page,
966+
enum node_stat_item idx)
967+
{
968+
__mod_lruvec_page_state(page, idx, -1);
969+
}
970+
971+
static inline void inc_memcg_state(struct mem_cgroup *memcg,
972+
enum memcg_stat_item idx)
973+
{
974+
mod_memcg_state(memcg, idx, 1);
975+
}
976+
977+
static inline void dec_memcg_state(struct mem_cgroup *memcg,
978+
enum memcg_stat_item idx)
979+
{
980+
mod_memcg_state(memcg, idx, -1);
981+
}
982+
983+
static inline void inc_memcg_page_state(struct page *page,
984+
enum memcg_stat_item idx)
985+
{
986+
mod_memcg_page_state(page, idx, 1);
987+
}
988+
989+
static inline void dec_memcg_page_state(struct page *page,
990+
enum memcg_stat_item idx)
991+
{
992+
mod_memcg_page_state(page, idx, -1);
993+
}
994+
995+
static inline void inc_lruvec_state(struct lruvec *lruvec,
996+
enum node_stat_item idx)
997+
{
998+
mod_lruvec_state(lruvec, idx, 1);
999+
}
1000+
1001+
static inline void dec_lruvec_state(struct lruvec *lruvec,
1002+
enum node_stat_item idx)
1003+
{
1004+
mod_lruvec_state(lruvec, idx, -1);
1005+
}
1006+
1007+
static inline void inc_lruvec_page_state(struct page *page,
1008+
enum node_stat_item idx)
1009+
{
1010+
mod_lruvec_page_state(page, idx, 1);
1011+
}
1012+
1013+
static inline void dec_lruvec_page_state(struct page *page,
1014+
enum node_stat_item idx)
1015+
{
1016+
mod_lruvec_page_state(page, idx, -1);
1017+
}
1018+
8411019
#ifdef CONFIG_CGROUP_WRITEBACK
8421020

8431021
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);

include/linux/vmstat.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include <linux/types.h>
55
#include <linux/percpu.h>
6-
#include <linux/mm.h>
76
#include <linux/mmzone.h>
87
#include <linux/vm_event_item.h>
98
#include <linux/atomic.h>

mm/memcontrol.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
41224122
if (!pn)
41234123
return 1;
41244124

4125+
pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
4126+
if (!pn->lruvec_stat) {
4127+
kfree(pn);
4128+
return 1;
4129+
}
4130+
41254131
lruvec_init(&pn->lruvec);
41264132
pn->usage_in_excess = 0;
41274133
pn->on_tree = false;

mm/page-writeback.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2433,8 +2433,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
24332433
inode_attach_wb(inode, page);
24342434
wb = inode_to_wb(inode);
24352435

2436-
inc_memcg_page_state(page, NR_FILE_DIRTY);
2437-
__inc_node_page_state(page, NR_FILE_DIRTY);
2436+
__inc_lruvec_page_state(page, NR_FILE_DIRTY);
24382437
__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
24392438
__inc_node_page_state(page, NR_DIRTIED);
24402439
__inc_wb_stat(wb, WB_RECLAIMABLE);
@@ -2455,8 +2454,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
24552454
struct bdi_writeback *wb)
24562455
{
24572456
if (mapping_cap_account_dirty(mapping)) {
2458-
dec_memcg_page_state(page, NR_FILE_DIRTY);
2459-
dec_node_page_state(page, NR_FILE_DIRTY);
2457+
dec_lruvec_page_state(page, NR_FILE_DIRTY);
24602458
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
24612459
dec_wb_stat(wb, WB_RECLAIMABLE);
24622460
task_io_account_cancelled_write(PAGE_SIZE);
@@ -2712,8 +2710,7 @@ int clear_page_dirty_for_io(struct page *page)
27122710
*/
27132711
wb = unlocked_inode_to_wb_begin(inode, &locked);
27142712
if (TestClearPageDirty(page)) {
2715-
dec_memcg_page_state(page, NR_FILE_DIRTY);
2716-
dec_node_page_state(page, NR_FILE_DIRTY);
2713+
dec_lruvec_page_state(page, NR_FILE_DIRTY);
27172714
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
27182715
dec_wb_stat(wb, WB_RECLAIMABLE);
27192716
ret = 1;
@@ -2759,8 +2756,7 @@ int test_clear_page_writeback(struct page *page)
27592756
ret = TestClearPageWriteback(page);
27602757
}
27612758
if (ret) {
2762-
dec_memcg_page_state(page, NR_WRITEBACK);
2763-
dec_node_page_state(page, NR_WRITEBACK);
2759+
dec_lruvec_page_state(page, NR_WRITEBACK);
27642760
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
27652761
inc_node_page_state(page, NR_WRITTEN);
27662762
}
@@ -2814,8 +2810,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
28142810
ret = TestSetPageWriteback(page);
28152811
}
28162812
if (!ret) {
2817-
inc_memcg_page_state(page, NR_WRITEBACK);
2818-
inc_node_page_state(page, NR_WRITEBACK);
2813+
inc_lruvec_page_state(page, NR_WRITEBACK);
28192814
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
28202815
}
28212816
unlock_page_memcg(page);

0 commit comments

Comments
 (0)