Skip to content

Commit 49624ef

Browse files
committed
Merge tag 'denywrite-for-5.15' of git://github.com/davidhildenbrand/linux
Pull MAP_DENYWRITE removal from David Hildenbrand: "Remove all in-tree usage of MAP_DENYWRITE from the kernel and remove VM_DENYWRITE. There are some (minor) user-visible changes: - We no longer deny write access to shared libaries loaded via legacy uselib(); this behavior matches modern user space e.g. dlopen(). - We no longer deny write access to the elf interpreter after exec completed, treating it just like shared libraries (which it often is). - We always deny write access to the file linked via /proc/pid/exe: sys_prctl(PR_SET_MM_MAP/EXE_FILE) will fail if write access to the file cannot be denied, and write access to the file will remain denied until the link is effectivel gone (exec, termination, sys_prctl(PR_SET_MM_MAP/EXE_FILE)) -- just as if exec'ing the file. Cross-compiled for a bunch of architectures (alpha, microblaze, i386, s390x, ...) and verified via ltp that especially the relevant tests (i.e., creat07 and execve04) continue working as expected" * tag 'denywrite-for-5.15' of git://github.com/davidhildenbrand/linux: fs: update documentation of get_write_access() and friends mm: ignore MAP_DENYWRITE in ksys_mmap_pgoff() mm: remove VM_DENYWRITE binfmt: remove in-tree usage of MAP_DENYWRITE kernel/fork: always deny write access to current MM exe_file kernel/fork: factor out replacing the current MM exe_file binfmt: don't use MAP_DENYWRITE when loading shared libraries via uselib()
2 parents f746406 + 592ca09 commit 49624ef

File tree

16 files changed

+119
-103
lines changed

16 files changed

+119
-103
lines changed

arch/x86/ia32/ia32_aout.c

+3-5
Original file line numberDiff line numberDiff line change
@@ -202,17 +202,15 @@ static int load_aout_binary(struct linux_binprm *bprm)
202202

203203
error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
204204
PROT_READ | PROT_EXEC,
205-
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
206-
MAP_32BIT,
205+
MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
207206
fd_offset);
208207

209208
if (error != N_TXTADDR(ex))
210209
return error;
211210

212211
error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
213212
PROT_READ | PROT_WRITE | PROT_EXEC,
214-
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
215-
MAP_32BIT,
213+
MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
216214
fd_offset + ex.a_text);
217215
if (error != N_DATADDR(ex))
218216
return error;
@@ -293,7 +291,7 @@ static int load_aout_library(struct file *file)
293291
/* Now use mmap to map the library into memory. */
294292
error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
295293
PROT_READ | PROT_WRITE | PROT_EXEC,
296-
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
294+
MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
297295
N_TXTOFF(ex));
298296
retval = error;
299297
if (error != start_addr)

fs/binfmt_aout.c

+3-4
Original file line numberDiff line numberDiff line change
@@ -221,16 +221,15 @@ static int load_aout_binary(struct linux_binprm * bprm)
221221
}
222222

223223
error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
224-
PROT_READ | PROT_EXEC,
225-
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
224+
PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
226225
fd_offset);
227226

228227
if (error != N_TXTADDR(ex))
229228
return error;
230229

231230
error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
232231
PROT_READ | PROT_WRITE | PROT_EXEC,
233-
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
232+
MAP_FIXED | MAP_PRIVATE,
234233
fd_offset + ex.a_text);
235234
if (error != N_DATADDR(ex))
236235
return error;
@@ -309,7 +308,7 @@ static int load_aout_library(struct file *file)
309308
/* Now use mmap to map the library into memory. */
310309
error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
311310
PROT_READ | PROT_WRITE | PROT_EXEC,
312-
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
311+
MAP_FIXED | MAP_PRIVATE;
313312
N_TXTOFF(ex));
314313
retval = error;
315314
if (error != start_addr)

fs/binfmt_elf.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
622622
eppnt = interp_elf_phdata;
623623
for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
624624
if (eppnt->p_type == PT_LOAD) {
625-
int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
625+
int elf_type = MAP_PRIVATE;
626626
int elf_prot = make_prot(eppnt->p_flags, arch_state,
627627
true, true);
628628
unsigned long vaddr = 0;
@@ -1070,7 +1070,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
10701070
elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
10711071
!!interpreter, false);
10721072

1073-
elf_flags = MAP_PRIVATE | MAP_DENYWRITE;
1073+
elf_flags = MAP_PRIVATE;
10741074

10751075
vaddr = elf_ppnt->p_vaddr;
10761076
/*
@@ -1384,7 +1384,7 @@ static int load_elf_library(struct file *file)
13841384
(eppnt->p_filesz +
13851385
ELF_PAGEOFFSET(eppnt->p_vaddr)),
13861386
PROT_READ | PROT_WRITE | PROT_EXEC,
1387-
MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1387+
MAP_FIXED_NOREPLACE | MAP_PRIVATE,
13881388
(eppnt->p_offset -
13891389
ELF_PAGEOFFSET(eppnt->p_vaddr)));
13901390
if (error != ELF_PAGESTART(eppnt->p_vaddr))

fs/binfmt_elf_fdpic.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
10411041
if (phdr->p_flags & PF_W) prot |= PROT_WRITE;
10421042
if (phdr->p_flags & PF_X) prot |= PROT_EXEC;
10431043

1044-
flags = MAP_PRIVATE | MAP_DENYWRITE;
1044+
flags = MAP_PRIVATE;
10451045
maddr = 0;
10461046

10471047
switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {

fs/exec.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -1272,7 +1272,9 @@ int begin_new_exec(struct linux_binprm * bprm)
12721272
* not visibile until then. This also enables the update
12731273
* to be lockless.
12741274
*/
1275-
set_mm_exe_file(bprm->mm, bprm->file);
1275+
retval = set_mm_exe_file(bprm->mm, bprm->file);
1276+
if (retval)
1277+
goto out;
12761278

12771279
/* If the binary is not readable then enforce mm->dumpable=0 */
12781280
would_dump(bprm, bprm->file);

fs/proc/task_mmu.c

-1
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
619619
[ilog2(VM_MAYSHARE)] = "ms",
620620
[ilog2(VM_GROWSDOWN)] = "gd",
621621
[ilog2(VM_PFNMAP)] = "pf",
622-
[ilog2(VM_DENYWRITE)] = "dw",
623622
[ilog2(VM_LOCKED)] = "lo",
624623
[ilog2(VM_IO)] = "io",
625624
[ilog2(VM_SEQ_READ)] = "sr",

include/linux/fs.h

+12-7
Original file line numberDiff line numberDiff line change
@@ -3023,15 +3023,20 @@ static inline void file_end_write(struct file *file)
30233023
}
30243024

30253025
/*
3026+
* This is used for regular files where some users -- especially the
3027+
* currently executed binary in a process, previously handled via
3028+
* VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap
3029+
* read-write shared) accesses.
3030+
*
30263031
* get_write_access() gets write permission for a file.
30273032
* put_write_access() releases this write permission.
3028-
* This is used for regular files.
3029-
* We cannot support write (and maybe mmap read-write shared) accesses and
3030-
* MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
3031-
* can have the following values:
3032-
* 0: no writers, no VM_DENYWRITE mappings
3033-
* < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
3034-
* > 0: (i_writecount) users are writing to the file.
3033+
* deny_write_access() denies write access to a file.
3034+
* allow_write_access() re-enables write access to a file.
3035+
*
3036+
* The i_writecount field of an inode can have the following values:
3037+
* 0: no write access, no denied write access
3038+
* < 0: (-i_writecount) users that denied write access to the file.
3039+
* > 0: (i_writecount) users that have write access to the file.
30353040
*
30363041
* Normally we operate on that counter with atomic_{inc,dec} and it's safe
30373042
* except for the cases where we don't hold i_writecount yet. Then we need to

include/linux/mm.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,6 @@ extern unsigned int kobjsize(const void *objp);
281281
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
282282
#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */
283283
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
284-
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
285284
#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
286285

287286
#define VM_LOCKED 0x00002000
@@ -2573,7 +2572,8 @@ static inline int check_data_rlimit(unsigned long rlim,
25732572
extern int mm_take_all_locks(struct mm_struct *mm);
25742573
extern void mm_drop_all_locks(struct mm_struct *mm);
25752574

2576-
extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
2575+
extern int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
2576+
extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
25772577
extern struct file *get_mm_exe_file(struct mm_struct *mm);
25782578
extern struct file *get_task_exe_file(struct task_struct *task);
25792579

include/linux/mman.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
* The historical set of flags that all mmap implementations implicitly
3333
* support when a ->mmap_validate() op is not provided in file_operations.
3434
*
35-
* MAP_EXECUTABLE is completely ignored throughout the kernel.
35+
* MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the
36+
* kernel.
3637
*/
3738
#define LEGACY_MAP_MASK (MAP_SHARED \
3839
| MAP_PRIVATE \
@@ -153,7 +154,6 @@ static inline unsigned long
153154
calc_vm_flag_bits(unsigned long flags)
154155
{
155156
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
156-
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
157157
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
158158
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
159159
arch_calc_vm_flag_bits(flags);

include/trace/events/mmflags.h

-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
165165
{VM_UFFD_MISSING, "uffd_missing" }, \
166166
IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \
167167
{VM_PFNMAP, "pfnmap" }, \
168-
{VM_DENYWRITE, "denywrite" }, \
169168
{VM_UFFD_WP, "uffd_wp" }, \
170169
{VM_LOCKED, "locked" }, \
171170
{VM_IO, "io" }, \

kernel/events/core.c

-2
Original file line numberDiff line numberDiff line change
@@ -8320,8 +8320,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
83208320
else
83218321
flags = MAP_PRIVATE;
83228322

8323-
if (vma->vm_flags & VM_DENYWRITE)
8324-
flags |= MAP_DENYWRITE;
83258323
if (vma->vm_flags & VM_LOCKED)
83268324
flags |= MAP_LOCKED;
83278325
if (is_vm_hugetlb_page(vma))

kernel/fork.c

+84-11
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,20 @@ void free_task(struct task_struct *tsk)
471471
}
472472
EXPORT_SYMBOL(free_task);
473473

474+
static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
475+
{
476+
struct file *exe_file;
477+
478+
exe_file = get_mm_exe_file(oldmm);
479+
RCU_INIT_POINTER(mm->exe_file, exe_file);
480+
/*
481+
* We depend on the oldmm having properly denied write access to the
482+
* exe_file already.
483+
*/
484+
if (exe_file && deny_write_access(exe_file))
485+
pr_warn_once("deny_write_access() failed in %s\n", __func__);
486+
}
487+
474488
#ifdef CONFIG_MMU
475489
static __latent_entropy int dup_mmap(struct mm_struct *mm,
476490
struct mm_struct *oldmm)
@@ -494,7 +508,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
494508
mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
495509

496510
/* No ordering required: file already has been exposed. */
497-
RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
511+
dup_mm_exe_file(mm, oldmm);
498512

499513
mm->total_vm = oldmm->total_vm;
500514
mm->data_vm = oldmm->data_vm;
@@ -557,12 +571,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
557571
tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
558572
file = tmp->vm_file;
559573
if (file) {
560-
struct inode *inode = file_inode(file);
561574
struct address_space *mapping = file->f_mapping;
562575

563576
get_file(file);
564-
if (tmp->vm_flags & VM_DENYWRITE)
565-
put_write_access(inode);
566577
i_mmap_lock_write(mapping);
567578
if (tmp->vm_flags & VM_SHARED)
568579
mapping_allow_writable(mapping);
@@ -640,7 +651,7 @@ static inline void mm_free_pgd(struct mm_struct *mm)
640651
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
641652
{
642653
mmap_write_lock(oldmm);
643-
RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
654+
dup_mm_exe_file(mm, oldmm);
644655
mmap_write_unlock(oldmm);
645656
return 0;
646657
}
@@ -1150,11 +1161,11 @@ void mmput_async(struct mm_struct *mm)
11501161
*
11511162
* Main users are mmput() and sys_execve(). Callers prevent concurrent
11521163
* invocations: in mmput() nobody alive left, in execve task is single
1153-
* threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
1154-
* mm->exe_file, but does so without using set_mm_exe_file() in order
1155-
* to avoid the need for any locks.
1164+
* threaded.
1165+
*
1166+
* Can only fail if new_exe_file != NULL.
11561167
*/
1157-
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1168+
int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
11581169
{
11591170
struct file *old_exe_file;
11601171

@@ -1165,11 +1176,73 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
11651176
*/
11661177
old_exe_file = rcu_dereference_raw(mm->exe_file);
11671178

1168-
if (new_exe_file)
1179+
if (new_exe_file) {
1180+
/*
1181+
* We expect the caller (i.e., sys_execve) to already denied
1182+
* write access, so this is unlikely to fail.
1183+
*/
1184+
if (unlikely(deny_write_access(new_exe_file)))
1185+
return -EACCES;
11691186
get_file(new_exe_file);
1187+
}
11701188
rcu_assign_pointer(mm->exe_file, new_exe_file);
1171-
if (old_exe_file)
1189+
if (old_exe_file) {
1190+
allow_write_access(old_exe_file);
11721191
fput(old_exe_file);
1192+
}
1193+
return 0;
1194+
}
1195+
1196+
/**
1197+
* replace_mm_exe_file - replace a reference to the mm's executable file
1198+
*
1199+
* This changes mm's executable file (shown as symlink /proc/[pid]/exe),
1200+
* dealing with concurrent invocation and without grabbing the mmap lock in
1201+
* write mode.
1202+
*
1203+
* Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE).
1204+
*/
1205+
int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1206+
{
1207+
struct vm_area_struct *vma;
1208+
struct file *old_exe_file;
1209+
int ret = 0;
1210+
1211+
/* Forbid mm->exe_file change if old file still mapped. */
1212+
old_exe_file = get_mm_exe_file(mm);
1213+
if (old_exe_file) {
1214+
mmap_read_lock(mm);
1215+
for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) {
1216+
if (!vma->vm_file)
1217+
continue;
1218+
if (path_equal(&vma->vm_file->f_path,
1219+
&old_exe_file->f_path))
1220+
ret = -EBUSY;
1221+
}
1222+
mmap_read_unlock(mm);
1223+
fput(old_exe_file);
1224+
if (ret)
1225+
return ret;
1226+
}
1227+
1228+
/* set the new file, lockless */
1229+
ret = deny_write_access(new_exe_file);
1230+
if (ret)
1231+
return -EACCES;
1232+
get_file(new_exe_file);
1233+
1234+
old_exe_file = xchg(&mm->exe_file, new_exe_file);
1235+
if (old_exe_file) {
1236+
/*
1237+
* Don't race with dup_mmap() getting the file and disallowing
1238+
* write access while someone might open the file writable.
1239+
*/
1240+
mmap_read_lock(mm);
1241+
allow_write_access(old_exe_file);
1242+
fput(old_exe_file);
1243+
mmap_read_unlock(mm);
1244+
}
1245+
return 0;
11731246
}
11741247

11751248
/**

kernel/sys.c

+1-32
Original file line numberDiff line numberDiff line change
@@ -1847,7 +1847,6 @@ SYSCALL_DEFINE1(umask, int, mask)
18471847
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
18481848
{
18491849
struct fd exe;
1850-
struct file *old_exe, *exe_file;
18511850
struct inode *inode;
18521851
int err;
18531852

@@ -1870,40 +1869,10 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
18701869
if (err)
18711870
goto exit;
18721871

1873-
/*
1874-
* Forbid mm->exe_file change if old file still mapped.
1875-
*/
1876-
exe_file = get_mm_exe_file(mm);
1877-
err = -EBUSY;
1878-
if (exe_file) {
1879-
struct vm_area_struct *vma;
1880-
1881-
mmap_read_lock(mm);
1882-
for (vma = mm->mmap; vma; vma = vma->vm_next) {
1883-
if (!vma->vm_file)
1884-
continue;
1885-
if (path_equal(&vma->vm_file->f_path,
1886-
&exe_file->f_path))
1887-
goto exit_err;
1888-
}
1889-
1890-
mmap_read_unlock(mm);
1891-
fput(exe_file);
1892-
}
1893-
1894-
err = 0;
1895-
/* set the new file, lockless */
1896-
get_file(exe.file);
1897-
old_exe = xchg(&mm->exe_file, exe.file);
1898-
if (old_exe)
1899-
fput(old_exe);
1872+
err = replace_mm_exe_file(mm, exe.file);
19001873
exit:
19011874
fdput(exe);
19021875
return err;
1903-
exit_err:
1904-
mmap_read_unlock(mm);
1905-
fput(exe_file);
1906-
goto exit;
19071876
}
19081877

19091878
/*

lib/test_printf.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -675,9 +675,8 @@ flags(void)
675675
"uptodate|dirty|lru|active|swapbacked",
676676
cmp_buffer);
677677

678-
flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC
679-
| VM_DENYWRITE;
680-
test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags);
678+
flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
679+
test("read|exec|mayread|maywrite|mayexec", "%pGv", &flags);
681680

682681
gfp = GFP_TRANSHUGE;
683682
test("GFP_TRANSHUGE", "%pGg", &gfp);

0 commit comments

Comments
 (0)