Skip to content

Commit 991836e

Browse files
authored
Merge pull request #49 from sidkumar99/vfs_cache_pressure_tuning
adaptivemm: initial vm.vfs_cache_pressure tuning algorithm Implement a simple vm.vfs_cache_pressure tuning algorithm. The algorithm works by using a user supplied hint which is the PREFER_OBJECT_CACHING option in the configuration file. This hint indicates if the user would rather have their system preserve dentry and inode object caches at the cost of higher memory utilizaiton. Some workloads are strongly impacted by the level on dentry and inode caching so it may benefit these workloads to have the mm system deprioritize reclaiming these caches. If this option is selected, the inode and dentry numbers are monitored and if they are projected to increase (via the slope of a best line fit), the sysctl is lowered by 10% and if they are projected to decrease, the sysctl is increased by 10%. The opposite behavior occurs if PREFER_OBJECT_CACHING is specified to 0.
2 parents 9274436 + 1a17ed7 commit 991836e

File tree

4 files changed

+126
-16
lines changed

4 files changed

+126
-16
lines changed

adaptivemm/adaptivemmd.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ AGGRESSIVENESS=2
1414
# Enable management of free pages through watermarks tuning
1515
ENABLE_FREE_PAGE_MGMT=1
1616

17+
# Guidance for tuning of vm.vfs_cache_pressure
18+
PREFER_OBJECT_CACHING=1
19+
1720
# Maximum gap between low and high watermarks (in GB)
1821
# MAXGAP=5
1922

adaptivemm/src/adaptivemmd.c

Lines changed: 114 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,15 @@
5858
#define KPAGEFLAGS "/proc/kpageflags"
5959
#define MODULES "/proc/modules"
6060
#define HUGEPAGESINFO "/sys/kernel/mm/hugepages"
61+
#define DENTRYINFO "/proc/sys/fs/dentry-state"
62+
#define INODEINFO "/proc/sys/fs/inode-nr"
6163

6264
/*
6365
* System files to control reclamation and compaction
6466
*/
6567
#define RESCALE_WMARK "/proc/sys/vm/watermark_scale_factor"
6668
#define COMPACT_PATH_FORMAT "/sys/devices/system/node/node%d/compact"
69+
#define VFS_CACHE_PRESSURE "/proc/sys/vm/vfs_cache_pressure"
6770

6871
/*
6972
* System files to control negative dentries
@@ -76,12 +79,16 @@
7679
#define CONFIG_FILE1 "/etc/sysconfig/adaptivemmd"
7780
#define CONFIG_FILE2 "/etc/default/adaptivemmd"
7881

82+
#define FS_FIELDS 2
7983
#define MAX_NUMANODES 1024
8084

8185
#define MAX_VERBOSE 5
8286
#define MAX_AGGRESSIVE 3
8387
#define MAX_NEGDENTRY 100
8488

89+
#define MAX_VFS_CACHE_PRESSURE 1000
90+
#define MIN_VFS_CACHE_PRESSURE 3
91+
8592
/*
8693
* Number of consecutive samples showing growth in unaccounted memory
8794
* that will trigger memory leak warning
@@ -91,17 +98,22 @@
9198
/* Minimum % change in meminfo numbers to trigger warnings */
9299
#define MEM_TRIGGER_DELTA 10
93100

101+
int MAX(unsigned long a, unsigned long b) { return((a) > (b) ? a : b); }
102+
int MIN(unsigned long a, unsigned long b) { return((a) < (b) ? a : b); }
103+
94104
unsigned long min_wmark[MAX_NUMANODES], low_wmark[MAX_NUMANODES];
95105
unsigned long high_wmark[MAX_NUMANODES], managed_pages[MAX_NUMANODES];
96106
unsigned long total_free_pages, total_cache_pages, total_hugepages, base_psize;
97107
long compaction_rate, reclaim_rate;
98108
struct lsq_struct page_lsq[MAX_NUMANODES][MAX_ORDER];
109+
struct lsq_struct fs_lsq[FS_FIELDS];
99110
int dry_run;
100111
int debug_mode, verbose, del_lock = 0;
101112
unsigned long maxgap;
102113
int aggressiveness = 2;
103114
int periodicity, skip_dmazone;
104115
int neg_dentry_pct = 15; /* default is 1.5% */
116+
int prefer_object_caching = 1;
105117

106118
/* Flags to enable various modules */
107119
bool memory_pressure_check_enabled = true;
@@ -127,7 +139,6 @@ unsigned int mywsf;
127139
*/
128140
int max_compaction_order = MAX_ORDER - 4;
129141

130-
131142
/*
132143
* Clean up before exiting
133144
*/
@@ -185,6 +196,14 @@ void log_msg(int level, char *fmt, ...)
185196
va_end(args);
186197
}
187198

199+
static inline unsigned long get_msecs(struct timespec *spec)
200+
{
201+
if (!spec)
202+
return -1;
203+
204+
return (unsigned long)((spec->tv_sec * 1000) + (spec->tv_nsec / 1000));
205+
}
206+
188207
/*
189208
* Initiate memory compactiomn in the kernel on a given node.
190209
*/
@@ -519,6 +538,95 @@ void rescale_maxwsf()
519538
log_warn("Failed to compute reasonable WSF, %ld, total pages %ld, reclaimable pages %ld", new_wsf, total_managed, reclaimable_pages);
520539
}
521540

541+
int parse_fs_files(char * path, unsigned long *data)
542+
{
543+
FILE *fp = NULL;
544+
char line[80];
545+
546+
fp = fopen(path, "r");
547+
if (!fp)
548+
return -1;
549+
550+
if (fgets(line, sizeof(line), fp) != NULL)
551+
sscanf(line, "%lu\n", data);
552+
else {
553+
fclose(fp);
554+
return -1;
555+
}
556+
557+
fclose(fp);
558+
return 0;
559+
}
560+
561+
int set_vfs_cache_pressure(unsigned long new_cache_pressure)
562+
{
563+
int fd;
564+
char scaled_cache_pressure[20];
565+
566+
sprintf(scaled_cache_pressure, "%ld\n", new_cache_pressure);
567+
if ((fd = open(VFS_CACHE_PRESSURE, O_WRONLY)) == -1) {
568+
log_err("Failed to open "VFS_CACHE_PRESSURE" (%s)", strerror(errno));
569+
return -1;
570+
}
571+
572+
if (write(fd, scaled_cache_pressure, strlen(scaled_cache_pressure)) < 0) {
573+
log_err("Failed to write to "VFS_CACHE_PRESSURE" (%s)", strerror(errno));
574+
return -1;
575+
}
576+
577+
log_info(1, "New vfs_cache_pressure = %ld", new_cache_pressure);
578+
return 0;
579+
}
580+
581+
/*
582+
* rescale vm.vfs_cache_pressure value based on current memory trends
583+
*/
584+
void rescale_vfs_cache_pressure()
585+
{
586+
struct timespec spec;
587+
unsigned long curr_inodes;
588+
unsigned long curr_dentries;
589+
unsigned long curr_vfs_cache_pressure;
590+
long long dentry_m, inode_m;
591+
long long dentry_c, inode_c;
592+
593+
if (parse_fs_files(VFS_CACHE_PRESSURE, &curr_vfs_cache_pressure) ||
594+
parse_fs_files(INODEINFO, &curr_inodes) ||
595+
parse_fs_files(DENTRYINFO, &curr_dentries))
596+
return;
597+
598+
clock_gettime(CLOCK_MONOTONIC_RAW, &spec);
599+
600+
if(lsq_fit(&fs_lsq[0], curr_dentries,
601+
(long long)get_msecs(&spec),&dentry_m, &dentry_c) ||
602+
lsq_fit(&fs_lsq[1], curr_inodes,
603+
(long long)get_msecs(&spec), &inode_m, &inode_c))
604+
return;
605+
/*
606+
* adjust vm.vfs_cache_pressure based on the
607+
* growth of cached inode and dentry objects
608+
*/
609+
if (prefer_object_caching) {
610+
if ((dentry_m > 0 || inode_m > 0) &&
611+
curr_vfs_cache_pressure > MIN_VFS_CACHE_PRESSURE) {
612+
set_vfs_cache_pressure(curr_vfs_cache_pressure * 0.9);
613+
} else if ((dentry_m < 0 || inode_m < 0) &&
614+
curr_vfs_cache_pressure < MAX_VFS_CACHE_PRESSURE)
615+
set_vfs_cache_pressure(MIN(MAX_VFS_CACHE_PRESSURE,
616+
curr_vfs_cache_pressure * 1.1));
617+
} else {
618+
if ((dentry_m > 0 || inode_m > 0) &&
619+
curr_vfs_cache_pressure < MAX_VFS_CACHE_PRESSURE) {
620+
set_vfs_cache_pressure(MIN(MAX_VFS_CACHE_PRESSURE,
621+
curr_vfs_cache_pressure * 1.1));
622+
} else if ((dentry_m < 0 || inode_m < 0) &&
623+
curr_vfs_cache_pressure > MIN_VFS_CACHE_PRESSURE)
624+
set_vfs_cache_pressure(curr_vfs_cache_pressure * 0.9);
625+
}
626+
627+
return;
628+
}
629+
522630
/*
523631
* Get the number of pages stolen by kswapd from /proc/vmstat.
524632
*/
@@ -783,14 +891,6 @@ void rescale_watermarks(int scale_up)
783891
close(fd);
784892
}
785893

786-
static inline unsigned long get_msecs(struct timespec *spec)
787-
{
788-
if (!spec)
789-
return -1;
790-
791-
return (unsigned long)((spec->tv_sec * 1000) + (spec->tv_nsec / 1000));
792-
}
793-
794894
/*
795895
* check_permissions() - Check all required permissions for this program to
796896
* run successfully
@@ -1596,8 +1696,7 @@ void check_memory_pressure(bool init)
15961696

15971697
/*
15981698
* one_time_initializations() - Initialize settings that are set once at
1599-
* adaptivemmd startup
1600-
*
1699+
* adaptivemmd startup
16011700
*/
16021701
void one_time_initializations()
16031702
{
@@ -1631,6 +1730,7 @@ void one_time_initializations()
16311730
#define OPT_NEG_DENTRY1 "NEG-DENTRY-CAP"
16321731
#define OPT_NEG_DENTRY2 "NEG_DENTRY_CAP"
16331732
#define OPT_ENB_MEMLEAK "ENABLE_MEMLEAK_CHECK"
1733+
#define OPT_PREFER_OBJECT_CACHING "PREFER_OBJECT_CACHING"
16341734

16351735
int parse_config()
16361736
{
@@ -1728,6 +1828,8 @@ int parse_config()
17281828
neg_dentry_pct = val;
17291829
} else if (strncmp(token, OPT_ENB_MEMLEAK, sizeof(OPT_ENB_MEMLEAK)) == 0)
17301830
memleak_check_enabled = ((val==0)?false:true);
1831+
else if (strncmp(token, OPT_PREFER_OBJECT_CACHING, sizeof(OPT_PREFER_OBJECT_CACHING)) == 0)
1832+
prefer_object_caching = val;
17311833
else {
17321834
log_err("Error in configuration file at token \"%s\". Proceeding with defaults", token);
17331835
break;
@@ -1908,7 +2010,6 @@ int main(int argc, char **argv)
19082010
base_psize = getpagesize()/1024;
19092011

19102012
pr_info("adaptivemmd "VERSION" started (verbose=%d, aggressiveness=%d, maxgap=%d)", verbose, aggressiveness, maxgap);
1911-
19122013
one_time_initializations();
19132014

19142015
while (1) {
@@ -1927,9 +2028,9 @@ int main(int argc, char **argv)
19272028
updates_for_hugepages(retval);
19282029
if (maxgap == 0)
19292030
rescale_maxwsf();
1930-
19312031
check_memory_pressure(false);
19322032
check_memory_leak(false);
2033+
rescale_vfs_cache_pressure();
19332034

19342035
sleep(periodicity);
19352036
}

adaptivemm/src/predict.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,13 @@
3333
* best. The formulation is for the special case in which x_i = i + 1 - N;
3434
* this reduces the need for storage and permits constant time updates.
3535
*/
36-
static int lsq_fit(struct lsq_struct *lsq, long long new_y, long long new_x,
36+
int lsq_fit(struct lsq_struct *lsq, long long new_y, long long new_x,
3737
long long *m, long long *c)
3838
{
3939
long long sigma_x, sigma_y;
4040
long sigma_xy, sigma_xx;
4141
long long slope_divisor;
42+
long long numerator;
4243
int i, next;
4344
long x_offset;
4445

@@ -97,7 +98,8 @@ static int lsq_fit(struct lsq_struct *lsq, long long new_y, long long new_x,
9798
if (slope_divisor == 0)
9899
return -1;
99100

100-
*m = ((LSQ_LOOKBACK * sigma_xy - sigma_x * sigma_y) * 100) / slope_divisor;
101+
numerator = (LSQ_LOOKBACK * sigma_xy - sigma_x * sigma_y) * 100;
102+
*m = numerator / slope_divisor;
101103
*c = (sigma_y - *m * sigma_x) / LSQ_LOOKBACK;
102104

103105
/*
@@ -300,7 +302,7 @@ unsigned long predict(struct frag_info *frag_vec, struct lsq_struct *lsq,
300302
* graph.
301303
*/
302304
clock_gettime(CLOCK_MONOTONIC_RAW, &tspec);
303-
current_time = tspec.tv_sec*1000 + tspec.tv_nsec/1000 - lsq->x[lsq->next];;
305+
current_time = tspec.tv_sec*1000 + tspec.tv_nsec/1000 - lsq->x[lsq->next];
304306
if (current_time < 0)
305307
current_time = 0;
306308
if ((x_cross < 0) ||

adaptivemm/src/predict.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ struct frag_info {
6868
long long msecs;
6969
};
7070

71+
72+
int lsq_fit(struct lsq_struct *lsq, long long new_y, long long new_x,
73+
long long *m, long long *c);
74+
7175
unsigned long predict(struct frag_info *, struct lsq_struct *,
7276
unsigned long, unsigned long, int);
7377

0 commit comments

Comments
 (0)