Skip to content

Commit 92042ca

Browse files
Paul Gofmankakra
authored andcommitted
mm: Support soft dirty flag read with reset.
Signed-off-by: Kai Krakow <[email protected]>
1 parent c7fe22e commit 92042ca

File tree

3 files changed

+130
-18
lines changed

3 files changed

+130
-18
lines changed

fs/proc/base.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3281,6 +3281,9 @@ static const struct pid_entry tgid_base_stuff[] = {
32813281
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
32823282
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
32833283
REG("pagemap", S_IRUSR, proc_pagemap_operations),
3284+
#ifdef CONFIG_MEM_SOFT_DIRTY
3285+
REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
3286+
#endif
32843287
#endif
32853288
#ifdef CONFIG_SECURITY
32863289
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),

fs/proc/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ extern const struct file_operations proc_pid_smaps_operations;
305305
extern const struct file_operations proc_pid_smaps_rollup_operations;
306306
extern const struct file_operations proc_clear_refs_operations;
307307
extern const struct file_operations proc_pagemap_operations;
308+
extern const struct file_operations proc_pagemap_reset_operations;
308309

309310
extern unsigned long task_vsize(struct mm_struct *);
310311
extern unsigned long task_statm(struct mm_struct *,

fs/proc/task_mmu.c

Lines changed: 126 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,8 +1095,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
10951095
return page_maybe_dma_pinned(page);
10961096
}
10971097

1098-
static inline void clear_soft_dirty(struct vm_area_struct *vma,
1099-
unsigned long addr, pte_t *pte)
1098+
static inline bool clear_soft_dirty(struct vm_area_struct *vma,
1099+
unsigned long addr, pte_t *pte)
11001100
{
11011101
/*
11021102
* The soft-dirty tracker uses #PF-s to catch writes
@@ -1105,37 +1105,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
11051105
* of how soft-dirty works.
11061106
*/
11071107
pte_t ptent = *pte;
1108+
bool ret = false;
11081109

11091110
if (pte_present(ptent)) {
11101111
pte_t old_pte;
11111112

11121113
if (pte_is_pinned(vma, addr, ptent))
1113-
return;
1114+
return ret;
11141115
old_pte = ptep_modify_prot_start(vma, addr, pte);
1116+
ret = pte_soft_dirty(old_pte);
11151117
ptent = pte_wrprotect(old_pte);
11161118
ptent = pte_clear_soft_dirty(ptent);
11171119
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
11181120
} else if (is_swap_pte(ptent)) {
1121+
ret = pte_swp_soft_dirty(ptent);
11191122
ptent = pte_swp_clear_soft_dirty(ptent);
11201123
set_pte_at(vma->vm_mm, addr, pte, ptent);
11211124
}
1125+
return ret;
11221126
}
11231127
#else
1124-
static inline void clear_soft_dirty(struct vm_area_struct *vma,
1128+
static inline bool clear_soft_dirty(struct vm_area_struct *vma,
11251129
unsigned long addr, pte_t *pte)
11261130
{
1131+
return false;
11271132
}
11281133
#endif
11291134

11301135
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1131-
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1136+
static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
11321137
unsigned long addr, pmd_t *pmdp)
11331138
{
11341139
pmd_t old, pmd = *pmdp;
1140+
bool ret = false;
11351141

11361142
if (pmd_present(pmd)) {
11371143
/* See comment in change_huge_pmd() */
11381144
old = pmdp_invalidate(vma, addr, pmdp);
1145+
1146+
ret = pmd_soft_dirty(old);
1147+
11391148
if (pmd_dirty(old))
11401149
pmd = pmd_mkdirty(pmd);
11411150
if (pmd_young(old))
@@ -1146,14 +1155,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
11461155

11471156
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
11481157
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
1158+
ret = pmd_swp_soft_dirty(pmd);
11491159
pmd = pmd_swp_clear_soft_dirty(pmd);
11501160
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
11511161
}
1162+
return ret;
11521163
}
11531164
#else
1154-
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1165+
static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
11551166
unsigned long addr, pmd_t *pmdp)
11561167
{
1168+
return false;
11571169
}
11581170
#endif
11591171

@@ -1406,6 +1418,7 @@ struct pagemapread {
14061418
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
14071419
pagemap_entry_t *buffer;
14081420
bool show_pfn;
1421+
bool reset;
14091422
};
14101423

14111424
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -1438,13 +1451,24 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
14381451
return 0;
14391452
}
14401453

1454+
static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
1455+
{
1456+
((unsigned long *)pm->buffer)[pm->pos++] = addr;
1457+
if (pm->pos >= pm->len)
1458+
return PM_END_OF_BUFFER;
1459+
return 0;
1460+
}
1461+
14411462
static int pagemap_pte_hole(unsigned long start, unsigned long end,
14421463
__always_unused int depth, struct mm_walk *walk)
14431464
{
14441465
struct pagemapread *pm = walk->private;
14451466
unsigned long addr = start;
14461467
int err = 0;
14471468

1469+
if (pm->reset)
1470+
goto out;
1471+
14481472
while (addr < end) {
14491473
struct vm_area_struct *vma = find_vma(walk->mm, addr);
14501474
pagemap_entry_t pme = make_pme(0, 0);
@@ -1479,8 +1503,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
14791503
}
14801504

14811505
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
1482-
struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1506+
struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr)
14831507
{
1508+
pte_t pte = *pte_addr;
14841509
u64 frame = 0, flags = 0;
14851510
struct page *page = NULL;
14861511
bool migration = false;
@@ -1549,6 +1574,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
15491574
pmd_t pmd = *pmdp;
15501575
struct page *page = NULL;
15511576

1577+
if (pm->reset)
1578+
{
1579+
if (clear_soft_dirty_pmd(vma, addr, pmdp))
1580+
{
1581+
for (; addr != end; addr += PAGE_SIZE)
1582+
{
1583+
err = add_addr_to_pagemap(addr, pm);
1584+
if (err)
1585+
break;
1586+
}
1587+
}
1588+
goto trans_huge_done;
1589+
}
1590+
15521591
if (vma->vm_flags & VM_SOFTDIRTY)
15531592
flags |= PM_SOFT_DIRTY;
15541593

@@ -1606,6 +1645,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
16061645
frame += (1 << MAX_SWAPFILES_SHIFT);
16071646
}
16081647
}
1648+
trans_huge_done:
16091649
spin_unlock(ptl);
16101650
return err;
16111651
}
@@ -1620,10 +1660,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
16201660
*/
16211661
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
16221662
for (; addr < end; pte++, addr += PAGE_SIZE) {
1623-
pagemap_entry_t pme;
1663+
if (pm->reset)
1664+
{
1665+
if (clear_soft_dirty(vma, addr, pte))
1666+
err = add_addr_to_pagemap(addr, pm);
1667+
}
1668+
else
1669+
{
1670+
pagemap_entry_t pme;
16241671

1625-
pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
1626-
err = add_to_pagemap(addr, &pme, pm);
1672+
pme = pte_to_pagemap_entry(pm, vma, addr, pte);
1673+
err = add_to_pagemap(addr, &pme, pm);
1674+
}
16271675
if (err)
16281676
break;
16291677
}
@@ -1721,8 +1769,8 @@ static const struct mm_walk_ops pagemap_ops = {
17211769
* determine which areas of memory are actually mapped and llseek to
17221770
* skip over unmapped regions.
17231771
*/
1724-
static ssize_t pagemap_read(struct file *file, char __user *buf,
1725-
size_t count, loff_t *ppos)
1772+
static ssize_t do_pagemap_read(struct file *file, char __user *buf,
1773+
size_t count, loff_t *ppos, bool reset)
17261774
{
17271775
struct mm_struct *mm = file->private_data;
17281776
struct pagemapread pm;
@@ -1731,6 +1779,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
17311779
unsigned long start_vaddr;
17321780
unsigned long end_vaddr;
17331781
int ret = 0, copied = 0;
1782+
struct mmu_notifier_range range;
1783+
size_t buffer_len;
17341784

17351785
if (!mm || !mmget_not_zero(mm))
17361786
goto out;
@@ -1746,19 +1796,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
17461796

17471797
/* do not disclose physical addresses: attack vector */
17481798
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
1799+
pm.reset = reset;
17491800

1750-
pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
1751-
pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
1801+
buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
1802+
1803+
pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
17521804
ret = -ENOMEM;
17531805
if (!pm.buffer)
17541806
goto out_mm;
17551807

17561808
src = *ppos;
17571809
svpfn = src / PM_ENTRY_BYTES;
1758-
end_vaddr = mm->task_size;
1810+
1811+
start_vaddr = svpfn << PAGE_SHIFT;
1812+
1813+
if (reset)
1814+
{
1815+
if (count < sizeof(end_vaddr))
1816+
{
1817+
ret = -EINVAL;
1818+
goto out_mm;
1819+
}
1820+
if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
1821+
return -EFAULT;
1822+
end_vaddr = min(end_vaddr, mm->task_size);
1823+
}
1824+
else
1825+
{
1826+
end_vaddr = mm->task_size;
1827+
start_vaddr = end_vaddr;
1828+
}
17591829

17601830
/* watch out for wraparound */
1761-
start_vaddr = end_vaddr;
17621831
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
17631832
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
17641833

@@ -1778,25 +1847,44 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
17781847
unsigned long end;
17791848

17801849
pm.pos = 0;
1781-
end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
1850+
pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
1851+
1852+
end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
17821853
/* overflow ? */
17831854
if (end < start_vaddr || end > end_vaddr)
17841855
end = end_vaddr;
1856+
17851857
ret = mmap_read_lock_killable(mm);
17861858
if (ret)
17871859
goto out_free;
1860+
1861+
if (reset)
1862+
{
1863+
inc_tlb_flush_pending(mm);
1864+
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
1865+
0, NULL, mm, start_vaddr, end);
1866+
mmu_notifier_invalidate_range_start(&range);
1867+
}
17881868
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
1869+
if (reset)
1870+
{
1871+
mmu_notifier_invalidate_range_end(&range);
1872+
flush_tlb_mm(mm);
1873+
dec_tlb_flush_pending(mm);
1874+
}
17891875
mmap_read_unlock(mm);
1790-
start_vaddr = end;
17911876

17921877
len = min(count, PM_ENTRY_BYTES * pm.pos);
1878+
BUG_ON(ret && ret != PM_END_OF_BUFFER);
17931879
if (copy_to_user(buf, pm.buffer, len)) {
17941880
ret = -EFAULT;
17951881
goto out_free;
17961882
}
17971883
copied += len;
17981884
buf += len;
17991885
count -= len;
1886+
1887+
start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
18001888
}
18011889
*ppos += copied;
18021890
if (!ret || ret == PM_END_OF_BUFFER)
@@ -1810,6 +1898,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
18101898
return ret;
18111899
}
18121900

1901+
static ssize_t pagemap_read(struct file *file, char __user *buf,
1902+
size_t count, loff_t *ppos)
1903+
{
1904+
return do_pagemap_read(file, buf, count, ppos, false);
1905+
}
1906+
1907+
static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
1908+
size_t count, loff_t *ppos)
1909+
{
1910+
return do_pagemap_read(file, buf, count, ppos, true);
1911+
}
1912+
18131913
static int pagemap_open(struct inode *inode, struct file *file)
18141914
{
18151915
struct mm_struct *mm;
@@ -1836,6 +1936,14 @@ const struct file_operations proc_pagemap_operations = {
18361936
.open = pagemap_open,
18371937
.release = pagemap_release,
18381938
};
1939+
1940+
const struct file_operations proc_pagemap_reset_operations = {
1941+
.llseek = mem_lseek, /* borrow this */
1942+
.read = pagemap_reset_read,
1943+
.open = pagemap_open,
1944+
.release = pagemap_release,
1945+
};
1946+
18391947
#endif /* CONFIG_PROC_PAGE_MONITOR */
18401948

18411949
#ifdef CONFIG_NUMA

0 commit comments

Comments
 (0)