Skip to content

Commit ead07f6

Browse files
Naoya Horiguchitorvalds
authored andcommitted
mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling
memory_failure() can run in 2 different mode (specified by MF_COUNT_INCREASED) in page refcount perspective. When MF_COUNT_INCREASED is set, memory_failure() assumes that the caller takes a refcount of the target page. And if cleared, memory_failure() takes it in it's own. In current code, however, refcounting is done differently in each caller. For example, madvise_hwpoison() uses get_user_pages_fast() and hwpoison_inject() uses get_page_unless_zero(). So this inconsistent refcounting causes refcount failure especially for thp tail pages. Typical user visible effects are like memory leak or VM_BUG_ON_PAGE(!page_count(page)) in isolate_lru_page(). To fix this refcounting issue, this patch introduces get_hwpoison_page() to handle thp tail pages in the same manner for each caller of hwpoison code. memory_failure() might fail to split thp and in such case it returns without completing page isolation. This is not good because PageHWPoison on the thp is still set and there's no easy way to unpoison such thps. So this patch try to roll back any action to the thp in "non anonymous thp" case and "thp split failed" case, expecting an MCE(SRAR) generated by later access afterward will properly free such thps. [[email protected]: fix CONFIG_HWPOISON_INJECT=m] Signed-off-by: Naoya Horiguchi <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Tony Luck <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 415c64c commit ead07f6

File tree

3 files changed

+48
-7
lines changed

3 files changed

+48
-7
lines changed

include/linux/mm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,6 +2146,7 @@ enum mf_flags {
21462146
extern int memory_failure(unsigned long pfn, int trapno, int flags);
21472147
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
21482148
extern int unpoison_memory(unsigned long pfn);
2149+
extern int get_hwpoison_page(struct page *page);
21492150
extern int sysctl_memory_failure_early_kill;
21502151
extern int sysctl_memory_failure_recovery;
21512152
extern void shake_page(struct page *p, int access);

mm/hwpoison-inject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ static int hwpoison_inject(void *data, u64 val)
2828
/*
2929
* This implies unable to support free buddy pages.
3030
*/
31-
if (!get_page_unless_zero(hpage))
31+
if (!get_hwpoison_page(p))
3232
return 0;
3333

3434
if (!hwpoison_filter_enable)
@@ -58,7 +58,7 @@ static int hwpoison_inject(void *data, u64 val)
5858
pr_info("Injecting memory failure at pfn %#lx\n", pfn);
5959
return memory_failure(pfn, 18, MF_COUNT_INCREASED);
6060
put_out:
61-
put_page(hpage);
61+
put_page(p);
6262
return 0;
6363
}
6464

mm/memory-failure.c

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,39 @@ static int page_action(struct page_state *ps, struct page *p,
915915
return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY;
916916
}
917917

918+
/**
919+
* get_hwpoison_page() - Get refcount for memory error handling:
920+
* @page: raw error page (hit by memory error)
921+
*
922+
* Return: return 0 if failed to grab the refcount, otherwise true (some
923+
* non-zero value.)
924+
*/
925+
int get_hwpoison_page(struct page *page)
926+
{
927+
struct page *head = compound_head(page);
928+
929+
if (PageHuge(head))
930+
return get_page_unless_zero(head);
931+
932+
/*
933+
* Thp tail page has special refcounting rule (refcount of tail pages
934+
* is stored in ->_mapcount,) so we can't call get_page_unless_zero()
935+
* directly for tail pages.
936+
*/
937+
if (PageTransHuge(head)) {
938+
if (get_page_unless_zero(head)) {
939+
if (PageTail(page))
940+
get_page(page);
941+
return 1;
942+
} else {
943+
return 0;
944+
}
945+
}
946+
947+
return get_page_unless_zero(page);
948+
}
949+
EXPORT_SYMBOL_GPL(get_hwpoison_page);
950+
918951
/*
919952
* Do all that is necessary to remove user space mappings. Unmap
920953
* the pages and send SIGBUS to the processes if the data was dirty.
@@ -1097,8 +1130,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
10971130
* In fact it's dangerous to directly bump up page count from 0,
10981131
* that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
10991132
*/
1100-
if (!(flags & MF_COUNT_INCREASED) &&
1101-
!get_page_unless_zero(hpage)) {
1133+
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
11021134
if (is_free_buddy_page(p)) {
11031135
action_result(pfn, MSG_BUDDY, DELAYED);
11041136
return 0;
@@ -1130,12 +1162,20 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
11301162
if (!PageHuge(p) && PageTransHuge(hpage)) {
11311163
if (!PageAnon(hpage)) {
11321164
pr_err("MCE: %#lx: non anonymous thp\n", pfn);
1165+
if (TestClearPageHWPoison(p))
1166+
atomic_long_sub(nr_pages, &num_poisoned_pages);
11331167
put_page(p);
1168+
if (p != hpage)
1169+
put_page(hpage);
11341170
return -EBUSY;
11351171
}
11361172
if (unlikely(split_huge_page(hpage))) {
11371173
pr_err("MCE: %#lx: thp split failed\n", pfn);
1174+
if (TestClearPageHWPoison(p))
1175+
atomic_long_sub(nr_pages, &num_poisoned_pages);
11381176
put_page(p);
1177+
if (p != hpage)
1178+
put_page(hpage);
11391179
return -EBUSY;
11401180
}
11411181
VM_BUG_ON_PAGE(!page_count(p), p);
@@ -1413,12 +1453,12 @@ int unpoison_memory(unsigned long pfn)
14131453
*/
14141454
if (!PageHuge(page) && PageTransHuge(page)) {
14151455
pr_info("MCE: Memory failure is now running on %#lx\n", pfn);
1416-
return 0;
1456+
return 0;
14171457
}
14181458

14191459
nr_pages = 1 << compound_order(page);
14201460

1421-
if (!get_page_unless_zero(page)) {
1461+
if (!get_hwpoison_page(p)) {
14221462
/*
14231463
* Since HWPoisoned hugepage should have non-zero refcount,
14241464
* race between memory failure and unpoison seems to happen.
@@ -1486,7 +1526,7 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
14861526
* When the target page is a free hugepage, just remove it
14871527
* from free hugepage list.
14881528
*/
1489-
if (!get_page_unless_zero(compound_head(p))) {
1529+
if (!get_hwpoison_page(p)) {
14901530
if (PageHuge(p)) {
14911531
pr_info("%s: %#lx free huge page\n", __func__, pfn);
14921532
ret = 0;

0 commit comments

Comments
 (0)