32
32
#include <linux/security.h>
33
33
#include <linux/memcontrol.h>
34
34
#include <linux/syscalls.h>
35
+ #include <linux/hugetlb.h>
35
36
#include <linux/gfp.h>
36
37
37
38
#include "internal.h"
@@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
95
96
pte_t * ptep , pte ;
96
97
spinlock_t * ptl ;
97
98
98
- pgd = pgd_offset (mm , addr );
99
- if (!pgd_present (* pgd ))
100
- goto out ;
99
+ if (unlikely (PageHuge (new ))) {
100
+ ptep = huge_pte_offset (mm , addr );
101
+ if (!ptep )
102
+ goto out ;
103
+ ptl = & mm -> page_table_lock ;
104
+ } else {
105
+ pgd = pgd_offset (mm , addr );
106
+ if (!pgd_present (* pgd ))
107
+ goto out ;
101
108
102
- pud = pud_offset (pgd , addr );
103
- if (!pud_present (* pud ))
104
- goto out ;
109
+ pud = pud_offset (pgd , addr );
110
+ if (!pud_present (* pud ))
111
+ goto out ;
105
112
106
- pmd = pmd_offset (pud , addr );
107
- if (!pmd_present (* pmd ))
108
- goto out ;
113
+ pmd = pmd_offset (pud , addr );
114
+ if (!pmd_present (* pmd ))
115
+ goto out ;
109
116
110
- ptep = pte_offset_map (pmd , addr );
117
+ ptep = pte_offset_map (pmd , addr );
111
118
112
- if (!is_swap_pte (* ptep )) {
113
- pte_unmap (ptep );
114
- goto out ;
115
- }
119
+ if (!is_swap_pte (* ptep )) {
120
+ pte_unmap (ptep );
121
+ goto out ;
122
+ }
123
+
124
+ ptl = pte_lockptr (mm , pmd );
125
+ }
116
126
117
- ptl = pte_lockptr (mm , pmd );
118
127
spin_lock (ptl );
119
128
pte = * ptep ;
120
129
if (!is_swap_pte (pte ))
@@ -130,10 +139,17 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
130
139
pte = pte_mkold (mk_pte (new , vma -> vm_page_prot ));
131
140
if (is_write_migration_entry (entry ))
132
141
pte = pte_mkwrite (pte );
142
+ if (PageHuge (new ))
143
+ pte = pte_mkhuge (pte );
133
144
flush_cache_page (vma , addr , pte_pfn (pte ));
134
145
set_pte_at (mm , addr , ptep , pte );
135
146
136
- if (PageAnon (new ))
147
+ if (PageHuge (new )) {
148
+ if (PageAnon (new ))
149
+ hugepage_add_anon_rmap (new , vma , addr );
150
+ else
151
+ page_dup_rmap (new );
152
+ } else if (PageAnon (new ))
137
153
page_add_anon_rmap (new , vma , addr );
138
154
else
139
155
page_add_file_rmap (new );
@@ -275,12 +291,60 @@ static int migrate_page_move_mapping(struct address_space *mapping,
275
291
return 0 ;
276
292
}
277
293
294
+ /*
295
+ * The expected number of remaining references is the same as that
296
+ * of migrate_page_move_mapping().
297
+ */
298
+ int migrate_huge_page_move_mapping (struct address_space * mapping ,
299
+ struct page * newpage , struct page * page )
300
+ {
301
+ int expected_count ;
302
+ void * * pslot ;
303
+
304
+ if (!mapping ) {
305
+ if (page_count (page ) != 1 )
306
+ return - EAGAIN ;
307
+ return 0 ;
308
+ }
309
+
310
+ spin_lock_irq (& mapping -> tree_lock );
311
+
312
+ pslot = radix_tree_lookup_slot (& mapping -> page_tree ,
313
+ page_index (page ));
314
+
315
+ expected_count = 2 + page_has_private (page );
316
+ if (page_count (page ) != expected_count ||
317
+ (struct page * )radix_tree_deref_slot (pslot ) != page ) {
318
+ spin_unlock_irq (& mapping -> tree_lock );
319
+ return - EAGAIN ;
320
+ }
321
+
322
+ if (!page_freeze_refs (page , expected_count )) {
323
+ spin_unlock_irq (& mapping -> tree_lock );
324
+ return - EAGAIN ;
325
+ }
326
+
327
+ get_page (newpage );
328
+
329
+ radix_tree_replace_slot (pslot , newpage );
330
+
331
+ page_unfreeze_refs (page , expected_count );
332
+
333
+ __put_page (page );
334
+
335
+ spin_unlock_irq (& mapping -> tree_lock );
336
+ return 0 ;
337
+ }
338
+
278
339
/*
279
340
* Copy the page to its new location
280
341
*/
281
- static void migrate_page_copy (struct page * newpage , struct page * page )
342
+ void migrate_page_copy (struct page * newpage , struct page * page )
282
343
{
283
- copy_highpage (newpage , page );
344
+ if (PageHuge (page ))
345
+ copy_huge_page (newpage , page );
346
+ else
347
+ copy_highpage (newpage , page );
284
348
285
349
if (PageError (page ))
286
350
SetPageError (newpage );
@@ -723,6 +787,92 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
723
787
return rc ;
724
788
}
725
789
790
+ /*
791
+ * Counterpart of unmap_and_move_page() for hugepage migration.
792
+ *
793
+ * This function doesn't wait the completion of hugepage I/O
794
+ * because there is no race between I/O and migration for hugepage.
795
+ * Note that currently hugepage I/O occurs only in direct I/O
796
+ * where no lock is held and PG_writeback is irrelevant,
797
+ * and writeback status of all subpages are counted in the reference
798
+ * count of the head page (i.e. if all subpages of a 2MB hugepage are
799
+ * under direct I/O, the reference of the head page is 512 and a bit more.)
800
+ * This means that when we try to migrate hugepage whose subpages are
801
+ * doing direct I/O, some references remain after try_to_unmap() and
802
+ * hugepage migration fails without data corruption.
803
+ *
804
+ * There is also no race when direct I/O is issued on the page under migration,
805
+ * because then pte is replaced with migration swap entry and direct I/O code
806
+ * will wait in the page fault for migration to complete.
807
+ */
808
+ static int unmap_and_move_huge_page (new_page_t get_new_page ,
809
+ unsigned long private , struct page * hpage ,
810
+ int force , int offlining )
811
+ {
812
+ int rc = 0 ;
813
+ int * result = NULL ;
814
+ struct page * new_hpage = get_new_page (hpage , private , & result );
815
+ int rcu_locked = 0 ;
816
+ struct anon_vma * anon_vma = NULL ;
817
+
818
+ if (!new_hpage )
819
+ return - ENOMEM ;
820
+
821
+ rc = - EAGAIN ;
822
+
823
+ if (!trylock_page (hpage )) {
824
+ if (!force )
825
+ goto out ;
826
+ lock_page (hpage );
827
+ }
828
+
829
+ if (PageAnon (hpage )) {
830
+ rcu_read_lock ();
831
+ rcu_locked = 1 ;
832
+
833
+ if (page_mapped (hpage )) {
834
+ anon_vma = page_anon_vma (hpage );
835
+ atomic_inc (& anon_vma -> external_refcount );
836
+ }
837
+ }
838
+
839
+ try_to_unmap (hpage , TTU_MIGRATION |TTU_IGNORE_MLOCK |TTU_IGNORE_ACCESS );
840
+
841
+ if (!page_mapped (hpage ))
842
+ rc = move_to_new_page (new_hpage , hpage , 1 );
843
+
844
+ if (rc )
845
+ remove_migration_ptes (hpage , hpage );
846
+
847
+ if (anon_vma && atomic_dec_and_lock (& anon_vma -> external_refcount ,
848
+ & anon_vma -> lock )) {
849
+ int empty = list_empty (& anon_vma -> head );
850
+ spin_unlock (& anon_vma -> lock );
851
+ if (empty )
852
+ anon_vma_free (anon_vma );
853
+ }
854
+
855
+ if (rcu_locked )
856
+ rcu_read_unlock ();
857
+ out :
858
+ unlock_page (hpage );
859
+
860
+ if (rc != - EAGAIN ) {
861
+ list_del (& hpage -> lru );
862
+ put_page (hpage );
863
+ }
864
+
865
+ put_page (new_hpage );
866
+
867
+ if (result ) {
868
+ if (rc )
869
+ * result = rc ;
870
+ else
871
+ * result = page_to_nid (new_hpage );
872
+ }
873
+ return rc ;
874
+ }
875
+
726
876
/*
727
877
* migrate_pages
728
878
*
@@ -788,6 +938,52 @@ int migrate_pages(struct list_head *from,
788
938
return nr_failed + retry ;
789
939
}
790
940
941
+ int migrate_huge_pages (struct list_head * from ,
942
+ new_page_t get_new_page , unsigned long private , int offlining )
943
+ {
944
+ int retry = 1 ;
945
+ int nr_failed = 0 ;
946
+ int pass = 0 ;
947
+ struct page * page ;
948
+ struct page * page2 ;
949
+ int rc ;
950
+
951
+ for (pass = 0 ; pass < 10 && retry ; pass ++ ) {
952
+ retry = 0 ;
953
+
954
+ list_for_each_entry_safe (page , page2 , from , lru ) {
955
+ cond_resched ();
956
+
957
+ rc = unmap_and_move_huge_page (get_new_page ,
958
+ private , page , pass > 2 , offlining );
959
+
960
+ switch (rc ) {
961
+ case - ENOMEM :
962
+ goto out ;
963
+ case - EAGAIN :
964
+ retry ++ ;
965
+ break ;
966
+ case 0 :
967
+ break ;
968
+ default :
969
+ /* Permanent failure */
970
+ nr_failed ++ ;
971
+ break ;
972
+ }
973
+ }
974
+ }
975
+ rc = 0 ;
976
+ out :
977
+
978
+ list_for_each_entry_safe (page , page2 , from , lru )
979
+ put_page (page );
980
+
981
+ if (rc )
982
+ return rc ;
983
+
984
+ return nr_failed + retry ;
985
+ }
986
+
791
987
#ifdef CONFIG_NUMA
792
988
/*
793
989
* Move a list of individual pages
0 commit comments