@@ -1095,8 +1095,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
1095
1095
return page_maybe_dma_pinned (page );
1096
1096
}
1097
1097
1098
- static inline void clear_soft_dirty (struct vm_area_struct * vma ,
1099
- unsigned long addr , pte_t * pte )
1098
+ static inline bool clear_soft_dirty (struct vm_area_struct * vma ,
1099
+ unsigned long addr , pte_t * pte )
1100
1100
{
1101
1101
/*
1102
1102
* The soft-dirty tracker uses #PF-s to catch writes
@@ -1105,37 +1105,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
1105
1105
* of how soft-dirty works.
1106
1106
*/
1107
1107
pte_t ptent = * pte ;
1108
+ bool ret = false;
1108
1109
1109
1110
if (pte_present (ptent )) {
1110
1111
pte_t old_pte ;
1111
1112
1112
1113
if (pte_is_pinned (vma , addr , ptent ))
1113
- return ;
1114
+ return ret ;
1114
1115
old_pte = ptep_modify_prot_start (vma , addr , pte );
1116
+ ret = pte_soft_dirty (old_pte );
1115
1117
ptent = pte_wrprotect (old_pte );
1116
1118
ptent = pte_clear_soft_dirty (ptent );
1117
1119
ptep_modify_prot_commit (vma , addr , pte , old_pte , ptent );
1118
1120
} else if (is_swap_pte (ptent )) {
1121
+ ret = pte_swp_soft_dirty (ptent );
1119
1122
ptent = pte_swp_clear_soft_dirty (ptent );
1120
1123
set_pte_at (vma -> vm_mm , addr , pte , ptent );
1121
1124
}
1125
+ return ret ;
1122
1126
}
1123
1127
#else
1124
- static inline void clear_soft_dirty (struct vm_area_struct * vma ,
1128
+ static inline bool clear_soft_dirty (struct vm_area_struct * vma ,
1125
1129
unsigned long addr , pte_t * pte )
1126
1130
{
1131
+ return false;
1127
1132
}
1128
1133
#endif
1129
1134
1130
1135
#if defined(CONFIG_MEM_SOFT_DIRTY ) && defined(CONFIG_TRANSPARENT_HUGEPAGE )
1131
- static inline void clear_soft_dirty_pmd (struct vm_area_struct * vma ,
1136
+ static inline bool clear_soft_dirty_pmd (struct vm_area_struct * vma ,
1132
1137
unsigned long addr , pmd_t * pmdp )
1133
1138
{
1134
1139
pmd_t old , pmd = * pmdp ;
1140
+ bool ret = false;
1135
1141
1136
1142
if (pmd_present (pmd )) {
1137
1143
/* See comment in change_huge_pmd() */
1138
1144
old = pmdp_invalidate (vma , addr , pmdp );
1145
+
1146
+ ret = pmd_soft_dirty (old );
1147
+
1139
1148
if (pmd_dirty (old ))
1140
1149
pmd = pmd_mkdirty (pmd );
1141
1150
if (pmd_young (old ))
@@ -1146,14 +1155,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1146
1155
1147
1156
set_pmd_at (vma -> vm_mm , addr , pmdp , pmd );
1148
1157
} else if (is_migration_entry (pmd_to_swp_entry (pmd ))) {
1158
+ ret = pmd_swp_soft_dirty (pmd );
1149
1159
pmd = pmd_swp_clear_soft_dirty (pmd );
1150
1160
set_pmd_at (vma -> vm_mm , addr , pmdp , pmd );
1151
1161
}
1162
+ return ret ;
1152
1163
}
1153
1164
#else
1154
- static inline void clear_soft_dirty_pmd (struct vm_area_struct * vma ,
1165
+ static inline bool clear_soft_dirty_pmd (struct vm_area_struct * vma ,
1155
1166
unsigned long addr , pmd_t * pmdp )
1156
1167
{
1168
+ return false;
1157
1169
}
1158
1170
#endif
1159
1171
@@ -1406,6 +1418,7 @@ struct pagemapread {
1406
1418
int pos , len ; /* units: PM_ENTRY_BYTES, not bytes */
1407
1419
pagemap_entry_t * buffer ;
1408
1420
bool show_pfn ;
1421
+ bool reset ;
1409
1422
};
1410
1423
1411
1424
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -1438,13 +1451,24 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
1438
1451
return 0 ;
1439
1452
}
1440
1453
1454
+ static int add_addr_to_pagemap (unsigned long addr , struct pagemapread * pm )
1455
+ {
1456
+ ((unsigned long * )pm -> buffer )[pm -> pos ++ ] = addr ;
1457
+ if (pm -> pos >= pm -> len )
1458
+ return PM_END_OF_BUFFER ;
1459
+ return 0 ;
1460
+ }
1461
+
1441
1462
static int pagemap_pte_hole (unsigned long start , unsigned long end ,
1442
1463
__always_unused int depth , struct mm_walk * walk )
1443
1464
{
1444
1465
struct pagemapread * pm = walk -> private ;
1445
1466
unsigned long addr = start ;
1446
1467
int err = 0 ;
1447
1468
1469
+ if (pm -> reset )
1470
+ goto out ;
1471
+
1448
1472
while (addr < end ) {
1449
1473
struct vm_area_struct * vma = find_vma (walk -> mm , addr );
1450
1474
pagemap_entry_t pme = make_pme (0 , 0 );
@@ -1479,8 +1503,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
1479
1503
}
1480
1504
1481
1505
static pagemap_entry_t pte_to_pagemap_entry (struct pagemapread * pm ,
1482
- struct vm_area_struct * vma , unsigned long addr , pte_t pte )
1506
+ struct vm_area_struct * vma , unsigned long addr , pte_t * pte_addr )
1483
1507
{
1508
+ pte_t pte = * pte_addr ;
1484
1509
u64 frame = 0 , flags = 0 ;
1485
1510
struct page * page = NULL ;
1486
1511
bool migration = false;
@@ -1549,6 +1574,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1549
1574
pmd_t pmd = * pmdp ;
1550
1575
struct page * page = NULL ;
1551
1576
1577
+ if (pm -> reset )
1578
+ {
1579
+ if (clear_soft_dirty_pmd (vma , addr , pmdp ))
1580
+ {
1581
+ for (; addr != end ; addr += PAGE_SIZE )
1582
+ {
1583
+ err = add_addr_to_pagemap (addr , pm );
1584
+ if (err )
1585
+ break ;
1586
+ }
1587
+ }
1588
+ goto trans_huge_done ;
1589
+ }
1590
+
1552
1591
if (vma -> vm_flags & VM_SOFTDIRTY )
1553
1592
flags |= PM_SOFT_DIRTY ;
1554
1593
@@ -1606,6 +1645,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1606
1645
frame += (1 << MAX_SWAPFILES_SHIFT );
1607
1646
}
1608
1647
}
1648
+ trans_huge_done :
1609
1649
spin_unlock (ptl );
1610
1650
return err ;
1611
1651
}
@@ -1620,10 +1660,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1620
1660
*/
1621
1661
orig_pte = pte = pte_offset_map_lock (walk -> mm , pmdp , addr , & ptl );
1622
1662
for (; addr < end ; pte ++ , addr += PAGE_SIZE ) {
1623
- pagemap_entry_t pme ;
1663
+ if (pm -> reset )
1664
+ {
1665
+ if (clear_soft_dirty (vma , addr , pte ))
1666
+ err = add_addr_to_pagemap (addr , pm );
1667
+ }
1668
+ else
1669
+ {
1670
+ pagemap_entry_t pme ;
1624
1671
1625
- pme = pte_to_pagemap_entry (pm , vma , addr , * pte );
1626
- err = add_to_pagemap (addr , & pme , pm );
1672
+ pme = pte_to_pagemap_entry (pm , vma , addr , pte );
1673
+ err = add_to_pagemap (addr , & pme , pm );
1674
+ }
1627
1675
if (err )
1628
1676
break ;
1629
1677
}
@@ -1721,8 +1769,8 @@ static const struct mm_walk_ops pagemap_ops = {
1721
1769
* determine which areas of memory are actually mapped and llseek to
1722
1770
* skip over unmapped regions.
1723
1771
*/
1724
- static ssize_t pagemap_read (struct file * file , char __user * buf ,
1725
- size_t count , loff_t * ppos )
1772
+ static ssize_t do_pagemap_read (struct file * file , char __user * buf ,
1773
+ size_t count , loff_t * ppos , bool reset )
1726
1774
{
1727
1775
struct mm_struct * mm = file -> private_data ;
1728
1776
struct pagemapread pm ;
@@ -1731,6 +1779,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
1731
1779
unsigned long start_vaddr ;
1732
1780
unsigned long end_vaddr ;
1733
1781
int ret = 0 , copied = 0 ;
1782
+ struct mmu_notifier_range range ;
1783
+ size_t buffer_len ;
1734
1784
1735
1785
if (!mm || !mmget_not_zero (mm ))
1736
1786
goto out ;
@@ -1746,19 +1796,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
1746
1796
1747
1797
/* do not disclose physical addresses: attack vector */
1748
1798
pm .show_pfn = file_ns_capable (file , & init_user_ns , CAP_SYS_ADMIN );
1799
+ pm .reset = reset ;
1749
1800
1750
- pm .len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT );
1751
- pm .buffer = kmalloc_array (pm .len , PM_ENTRY_BYTES , GFP_KERNEL );
1801
+ buffer_len = min (PAGEMAP_WALK_SIZE >> PAGE_SHIFT , count / PM_ENTRY_BYTES );
1802
+
1803
+ pm .buffer = kmalloc_array (buffer_len , PM_ENTRY_BYTES , GFP_KERNEL );
1752
1804
ret = - ENOMEM ;
1753
1805
if (!pm .buffer )
1754
1806
goto out_mm ;
1755
1807
1756
1808
src = * ppos ;
1757
1809
svpfn = src / PM_ENTRY_BYTES ;
1758
- end_vaddr = mm -> task_size ;
1810
+
1811
+ start_vaddr = svpfn << PAGE_SHIFT ;
1812
+
1813
+ if (reset )
1814
+ {
1815
+ if (count < sizeof (end_vaddr ))
1816
+ {
1817
+ ret = - EINVAL ;
1818
+ goto out_mm ;
1819
+ }
1820
+ if (copy_from_user (& end_vaddr , buf , sizeof (end_vaddr )))
1821
+ return - EFAULT ;
1822
+ end_vaddr = min (end_vaddr , mm -> task_size );
1823
+ }
1824
+ else
1825
+ {
1826
+ end_vaddr = mm -> task_size ;
1827
+ start_vaddr = end_vaddr ;
1828
+ }
1759
1829
1760
1830
/* watch out for wraparound */
1761
- start_vaddr = end_vaddr ;
1762
1831
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT ))
1763
1832
start_vaddr = untagged_addr (svpfn << PAGE_SHIFT );
1764
1833
@@ -1778,25 +1847,44 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
1778
1847
unsigned long end ;
1779
1848
1780
1849
pm .pos = 0 ;
1781
- end = (start_vaddr + PAGEMAP_WALK_SIZE ) & PAGEMAP_WALK_MASK ;
1850
+ pm .len = min (buffer_len , count / PM_ENTRY_BYTES );
1851
+
1852
+ end = reset ? end_vaddr : (start_vaddr + (pm .len << PAGE_SHIFT ));
1782
1853
/* overflow ? */
1783
1854
if (end < start_vaddr || end > end_vaddr )
1784
1855
end = end_vaddr ;
1856
+
1785
1857
ret = mmap_read_lock_killable (mm );
1786
1858
if (ret )
1787
1859
goto out_free ;
1860
+
1861
+ if (reset )
1862
+ {
1863
+ inc_tlb_flush_pending (mm );
1864
+ mmu_notifier_range_init (& range , MMU_NOTIFY_SOFT_DIRTY ,
1865
+ 0 , NULL , mm , start_vaddr , end );
1866
+ mmu_notifier_invalidate_range_start (& range );
1867
+ }
1788
1868
ret = walk_page_range (mm , start_vaddr , end , & pagemap_ops , & pm );
1869
+ if (reset )
1870
+ {
1871
+ mmu_notifier_invalidate_range_end (& range );
1872
+ flush_tlb_mm (mm );
1873
+ dec_tlb_flush_pending (mm );
1874
+ }
1789
1875
mmap_read_unlock (mm );
1790
- start_vaddr = end ;
1791
1876
1792
1877
len = min (count , PM_ENTRY_BYTES * pm .pos );
1878
+ BUG_ON (ret && ret != PM_END_OF_BUFFER );
1793
1879
if (copy_to_user (buf , pm .buffer , len )) {
1794
1880
ret = - EFAULT ;
1795
1881
goto out_free ;
1796
1882
}
1797
1883
copied += len ;
1798
1884
buf += len ;
1799
1885
count -= len ;
1886
+
1887
+ start_vaddr = reset && pm .pos == pm .len ? ((unsigned long * )pm .buffer )[pm .pos - 1 ] + PAGE_SIZE : end ;
1800
1888
}
1801
1889
* ppos += copied ;
1802
1890
if (!ret || ret == PM_END_OF_BUFFER )
@@ -1810,6 +1898,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
1810
1898
return ret ;
1811
1899
}
1812
1900
1901
+ static ssize_t pagemap_read (struct file * file , char __user * buf ,
1902
+ size_t count , loff_t * ppos )
1903
+ {
1904
+ return do_pagemap_read (file , buf , count , ppos , false);
1905
+ }
1906
+
1907
+ static ssize_t pagemap_reset_read (struct file * file , char __user * buf ,
1908
+ size_t count , loff_t * ppos )
1909
+ {
1910
+ return do_pagemap_read (file , buf , count , ppos , true);
1911
+ }
1912
+
1813
1913
static int pagemap_open (struct inode * inode , struct file * file )
1814
1914
{
1815
1915
struct mm_struct * mm ;
@@ -1836,6 +1936,14 @@ const struct file_operations proc_pagemap_operations = {
1836
1936
.open = pagemap_open ,
1837
1937
.release = pagemap_release ,
1838
1938
};
1939
+
1940
+ const struct file_operations proc_pagemap_reset_operations = {
1941
+ .llseek = mem_lseek , /* borrow this */
1942
+ .read = pagemap_reset_read ,
1943
+ .open = pagemap_open ,
1944
+ .release = pagemap_release ,
1945
+ };
1946
+
1839
1947
#endif /* CONFIG_PROC_PAGE_MONITOR */
1840
1948
1841
1949
#ifdef CONFIG_NUMA
0 commit comments