Skip to content

Commit 99cb0db

Browse files
liu-song-6torvalds
authored andcommitted
mm,thp: add read-only THP support for (non-shmem) FS
This patch is (hopefully) the first step to enable THP for non-shmem filesystems. This patch enables an application to put part of its text sections to THP via madvise, for example: madvise((void *)0x600000, 0x200000, MADV_HUGEPAGE); We tried to reuse the logic for THP on tmpfs. Currently, write is not supported for non-shmem THP. khugepaged will only process vma with VM_DENYWRITE. sys_mmap() ignores VM_DENYWRITE requests (see ksys_mmap_pgoff). The only way to create vma with VM_DENYWRITE is execve(). This requirement limits non-shmem THP to text sections. The next patch will handle writes, which would only happen when the all the vmas with VM_DENYWRITE are unmapped. An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this feature. [[email protected]: fix build without CONFIG_SHMEM] Link: http://lkml.kernel.org/r/[email protected] [[email protected]: fix double unlock in collapse_file()] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Song Liu <[email protected]> Acked-by: Rik van Riel <[email protected]> Acked-by: Kirill A. Shutemov <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Stephen Rothwell <[email protected]> Cc: Dan Carpenter <[email protected]> Cc: Hillf Danton <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: William Kucharski <[email protected]> Cc: Oleg Nesterov <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 579c571 commit 99cb0db

File tree

4 files changed

+128
-48
lines changed

4 files changed

+128
-48
lines changed

mm/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,17 @@ config GUP_BENCHMARK
712712
config GUP_GET_PTE_LOW_HIGH
713713
bool
714714

715+
config READ_ONLY_THP_FOR_FS
716+
bool "Read-only THP for filesystems (EXPERIMENTAL)"
717+
depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
718+
719+
help
720+
Allow khugepaged to put read-only file-backed pages in THP.
721+
722+
This is marked experimental because it is a new feature. Write
723+
support of file THPs will be developed in the next few release
724+
cycles.
725+
715726
config ARCH_HAS_PTE_SPECIAL
716727
bool
717728

mm/filemap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ static void unaccount_page_cache_page(struct address_space *mapping,
203203
__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
204204
if (PageTransHuge(page))
205205
__dec_node_page_state(page, NR_SHMEM_THPS);
206-
} else {
207-
VM_BUG_ON_PAGE(PageTransHuge(page), page);
206+
} else if (PageTransHuge(page)) {
207+
__dec_node_page_state(page, NR_FILE_THPS);
208208
}
209209

210210
/*

mm/khugepaged.c

Lines changed: 107 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum scan_result {
4848
SCAN_CGROUP_CHARGE_FAIL,
4949
SCAN_EXCEED_SWAP_PTE,
5050
SCAN_TRUNCATED,
51+
SCAN_PAGE_HAS_PRIVATE,
5152
};
5253

5354
#define CREATE_TRACE_POINTS
@@ -404,7 +405,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
404405
(vm_flags & VM_NOHUGEPAGE) ||
405406
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
406407
return false;
407-
if (shmem_file(vma->vm_file)) {
408+
409+
if (shmem_file(vma->vm_file) ||
410+
(IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
411+
vma->vm_file &&
412+
(vm_flags & VM_DENYWRITE))) {
408413
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
409414
return false;
410415
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -456,8 +461,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
456461
unsigned long hstart, hend;
457462

458463
/*
459-
* khugepaged does not yet work on non-shmem files or special
460-
* mappings. And file-private shmem THP is not supported.
464+
* khugepaged only supports read-only files for non-shmem files.
465+
* khugepaged does not yet work on special mappings. And
466+
* file-private shmem THP is not supported.
461467
*/
462468
if (!hugepage_vma_check(vma, vm_flags))
463469
return 0;
@@ -1287,12 +1293,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
12871293
}
12881294

12891295
/**
1290-
* collapse_file - collapse small tmpfs/shmem pages into huge one.
1296+
* collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
12911297
*
12921298
* Basic scheme is simple, details are more complex:
12931299
* - allocate and lock a new huge page;
12941300
* - scan page cache replacing old pages with the new one
1295-
* + swap in pages if necessary;
1301+
* + swap/gup in pages if necessary;
12961302
* + fill in gaps;
12971303
* + keep old pages around in case rollback is required;
12981304
* - if replacing succeeds:
@@ -1316,7 +1322,9 @@ static void collapse_file(struct mm_struct *mm,
13161322
LIST_HEAD(pagelist);
13171323
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
13181324
int nr_none = 0, result = SCAN_SUCCEED;
1325+
bool is_shmem = shmem_file(file);
13191326

1327+
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
13201328
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
13211329

13221330
/* Only allocate from the target node */
@@ -1348,7 +1356,8 @@ static void collapse_file(struct mm_struct *mm,
13481356
} while (1);
13491357

13501358
__SetPageLocked(new_page);
1351-
__SetPageSwapBacked(new_page);
1359+
if (is_shmem)
1360+
__SetPageSwapBacked(new_page);
13521361
new_page->index = start;
13531362
new_page->mapping = mapping;
13541363

@@ -1363,41 +1372,75 @@ static void collapse_file(struct mm_struct *mm,
13631372
struct page *page = xas_next(&xas);
13641373

13651374
VM_BUG_ON(index != xas.xa_index);
1366-
if (!page) {
1367-
/*
1368-
* Stop if extent has been truncated or hole-punched,
1369-
* and is now completely empty.
1370-
*/
1371-
if (index == start) {
1372-
if (!xas_next_entry(&xas, end - 1)) {
1373-
result = SCAN_TRUNCATED;
1375+
if (is_shmem) {
1376+
if (!page) {
1377+
/*
1378+
* Stop if extent has been truncated or
1379+
* hole-punched, and is now completely
1380+
* empty.
1381+
*/
1382+
if (index == start) {
1383+
if (!xas_next_entry(&xas, end - 1)) {
1384+
result = SCAN_TRUNCATED;
1385+
goto xa_locked;
1386+
}
1387+
xas_set(&xas, index);
1388+
}
1389+
if (!shmem_charge(mapping->host, 1)) {
1390+
result = SCAN_FAIL;
13741391
goto xa_locked;
13751392
}
1376-
xas_set(&xas, index);
1393+
xas_store(&xas, new_page);
1394+
nr_none++;
1395+
continue;
13771396
}
1378-
if (!shmem_charge(mapping->host, 1)) {
1379-
result = SCAN_FAIL;
1397+
1398+
if (xa_is_value(page) || !PageUptodate(page)) {
1399+
xas_unlock_irq(&xas);
1400+
/* swap in or instantiate fallocated page */
1401+
if (shmem_getpage(mapping->host, index, &page,
1402+
SGP_NOHUGE)) {
1403+
result = SCAN_FAIL;
1404+
goto xa_unlocked;
1405+
}
1406+
} else if (trylock_page(page)) {
1407+
get_page(page);
1408+
xas_unlock_irq(&xas);
1409+
} else {
1410+
result = SCAN_PAGE_LOCK;
13801411
goto xa_locked;
13811412
}
1382-
xas_store(&xas, new_page);
1383-
nr_none++;
1384-
continue;
1385-
}
1386-
1387-
if (xa_is_value(page) || !PageUptodate(page)) {
1388-
xas_unlock_irq(&xas);
1389-
/* swap in or instantiate fallocated page */
1390-
if (shmem_getpage(mapping->host, index, &page,
1391-
SGP_NOHUGE)) {
1413+
} else { /* !is_shmem */
1414+
if (!page || xa_is_value(page)) {
1415+
xas_unlock_irq(&xas);
1416+
page_cache_sync_readahead(mapping, &file->f_ra,
1417+
file, index,
1418+
PAGE_SIZE);
1419+
/* drain pagevecs to help isolate_lru_page() */
1420+
lru_add_drain();
1421+
page = find_lock_page(mapping, index);
1422+
if (unlikely(page == NULL)) {
1423+
result = SCAN_FAIL;
1424+
goto xa_unlocked;
1425+
}
1426+
} else if (!PageUptodate(page)) {
1427+
xas_unlock_irq(&xas);
1428+
wait_on_page_locked(page);
1429+
if (!trylock_page(page)) {
1430+
result = SCAN_PAGE_LOCK;
1431+
goto xa_unlocked;
1432+
}
1433+
get_page(page);
1434+
} else if (PageDirty(page)) {
13921435
result = SCAN_FAIL;
1393-
goto xa_unlocked;
1436+
goto xa_locked;
1437+
} else if (trylock_page(page)) {
1438+
get_page(page);
1439+
xas_unlock_irq(&xas);
1440+
} else {
1441+
result = SCAN_PAGE_LOCK;
1442+
goto xa_locked;
13941443
}
1395-
} else if (trylock_page(page)) {
1396-
get_page(page);
1397-
xas_unlock_irq(&xas);
1398-
} else {
1399-
result = SCAN_PAGE_LOCK;
1400-
goto xa_locked;
14011444
}
14021445

14031446
/*
@@ -1426,6 +1469,12 @@ static void collapse_file(struct mm_struct *mm,
14261469
goto out_unlock;
14271470
}
14281471

1472+
if (page_has_private(page) &&
1473+
!try_to_release_page(page, GFP_KERNEL)) {
1474+
result = SCAN_PAGE_HAS_PRIVATE;
1475+
goto out_unlock;
1476+
}
1477+
14291478
if (page_mapped(page))
14301479
unmap_mapping_pages(mapping, index, 1, false);
14311480

@@ -1463,12 +1512,18 @@ static void collapse_file(struct mm_struct *mm,
14631512
goto xa_unlocked;
14641513
}
14651514

1466-
__inc_node_page_state(new_page, NR_SHMEM_THPS);
1515+
if (is_shmem)
1516+
__inc_node_page_state(new_page, NR_SHMEM_THPS);
1517+
else
1518+
__inc_node_page_state(new_page, NR_FILE_THPS);
1519+
14671520
if (nr_none) {
14681521
struct zone *zone = page_zone(new_page);
14691522

14701523
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
1471-
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
1524+
if (is_shmem)
1525+
__mod_node_page_state(zone->zone_pgdat,
1526+
NR_SHMEM, nr_none);
14721527
}
14731528

14741529
xa_locked:
@@ -1506,10 +1561,15 @@ static void collapse_file(struct mm_struct *mm,
15061561

15071562
SetPageUptodate(new_page);
15081563
page_ref_add(new_page, HPAGE_PMD_NR - 1);
1509-
set_page_dirty(new_page);
15101564
mem_cgroup_commit_charge(new_page, memcg, false, true);
1565+
1566+
if (is_shmem) {
1567+
set_page_dirty(new_page);
1568+
lru_cache_add_anon(new_page);
1569+
} else {
1570+
lru_cache_add_file(new_page);
1571+
}
15111572
count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
1512-
lru_cache_add_anon(new_page);
15131573

15141574
/*
15151575
* Remove pte page tables, so we can re-fault the page as huge.
@@ -1524,7 +1584,9 @@ static void collapse_file(struct mm_struct *mm,
15241584
/* Something went wrong: roll back page cache changes */
15251585
xas_lock_irq(&xas);
15261586
mapping->nrpages -= nr_none;
1527-
shmem_uncharge(mapping->host, nr_none);
1587+
1588+
if (is_shmem)
1589+
shmem_uncharge(mapping->host, nr_none);
15281590

15291591
xas_set(&xas, start);
15301592
xas_for_each(&xas, page, end - 1) {
@@ -1607,7 +1669,8 @@ static void khugepaged_scan_file(struct mm_struct *mm,
16071669
break;
16081670
}
16091671

1610-
if (page_count(page) != 1 + page_mapcount(page)) {
1672+
if (page_count(page) !=
1673+
1 + page_mapcount(page) + page_has_private(page)) {
16111674
result = SCAN_PAGE_COUNT;
16121675
break;
16131676
}
@@ -1713,11 +1776,13 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
17131776
VM_BUG_ON(khugepaged_scan.address < hstart ||
17141777
khugepaged_scan.address + HPAGE_PMD_SIZE >
17151778
hend);
1716-
if (shmem_file(vma->vm_file)) {
1779+
if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
17171780
struct file *file;
17181781
pgoff_t pgoff = linear_page_index(vma,
17191782
khugepaged_scan.address);
1720-
if (!shmem_huge_enabled(vma))
1783+
1784+
if (shmem_file(vma->vm_file)
1785+
&& !shmem_huge_enabled(vma))
17211786
goto skip;
17221787
file = get_file(vma->vm_file);
17231788
up_read(&mm->mmap_sem);

mm/rmap.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,8 +1189,10 @@ void page_add_file_rmap(struct page *page, bool compound)
11891189
}
11901190
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
11911191
goto out;
1192-
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
1193-
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
1192+
if (PageSwapBacked(page))
1193+
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
1194+
else
1195+
__inc_node_page_state(page, NR_FILE_PMDMAPPED);
11941196
} else {
11951197
if (PageTransCompound(page) && page_mapping(page)) {
11961198
VM_WARN_ON_ONCE(!PageLocked(page));
@@ -1229,8 +1231,10 @@ static void page_remove_file_rmap(struct page *page, bool compound)
12291231
}
12301232
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
12311233
goto out;
1232-
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
1233-
__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
1234+
if (PageSwapBacked(page))
1235+
__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
1236+
else
1237+
__dec_node_page_state(page, NR_FILE_PMDMAPPED);
12341238
} else {
12351239
if (!atomic_add_negative(-1, &page->_mapcount))
12361240
goto out;

0 commit comments

Comments
 (0)