Skip to content

Commit 749df87

Browse files
mjkravetztorvalds
authored andcommitted
mm/shmem: add hugetlbfs support to memfd_create()
This patch came out of discussions in this e-mail thread: http://lkml.kernel.org/r/1499357846-7481-1-git-send-email-mike.kravetz%40oracle.com The Oracle JVM team is developing a new garbage collection model. This new model requires multiple mappings of the same anonymous memory. One straight forward way to accomplish this is with memfd_create. They can use the returned fd to create multiple mappings of the same memory. The JVM today has an option to use (static hugetlb) huge pages. If this option is specified, they would like to use the same garbage collection model requiring multiple mappings to the same memory. Using hugetlbfs, it is possible to explicitly mount a filesystem and specify file paths in order to get an fd that can be used for multiple mappings. However, this introduces additional system admin work and coordination. Ideally they would like to get a hugetlbfs fd without requiring explicit mounting of a filesystem. Today, mmap and shmget can make use of hugetlbfs without explicitly mounting a filesystem. The patch adds this functionality to memfd_create. Add a new flag MFD_HUGETLB to memfd_create() that will specify the file to be created resides in the hugetlbfs filesystem. This is the generic hugetlbfs filesystem not associated with any specific mount point. As with other system calls that request hugetlbfs backed pages, there is the ability to encode huge page size in the flag arguments. hugetlbfs does not support sealing operations, therefore specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL. Of course, the memfd_man page would need updating if this type of functionality moves forward. Link: http://lkml.kernel.org/r/1502149672-7759-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Hugh Dickins <hughd@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent ab1b597 commit 749df87

File tree

2 files changed

+55
-6
lines changed

2 files changed

+55
-6
lines changed

include/uapi/linux/memfd.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,32 @@
11
#ifndef _UAPI_LINUX_MEMFD_H
22
#define _UAPI_LINUX_MEMFD_H
33

4+
#include <asm-generic/hugetlb_encode.h>
5+
46
/* flags for memfd_create(2) (unsigned int) */
57
#define MFD_CLOEXEC 0x0001U
68
#define MFD_ALLOW_SEALING 0x0002U
9+
#define MFD_HUGETLB 0x0004U
10+
11+
/*
12+
* Huge page size encoding when MFD_HUGETLB is specified, and a huge page
13+
* size other than the default is desired. See hugetlb_encode.h.
14+
* All known huge page size encodings are provided here. It is the
15+
* responsibility of the application to know which sizes are supported on
16+
* the running system. See mmap(2) man page for details.
17+
*/
18+
#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
19+
#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
20+
21+
#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
22+
#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
23+
#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
24+
#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
25+
#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
26+
#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
27+
#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
28+
#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
29+
#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
30+
#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
731

832
#endif /* _UAPI_LINUX_MEMFD_H */

mm/shmem.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <linux/swap.h>
3535
#include <linux/uio.h>
3636
#include <linux/khugepaged.h>
37+
#include <linux/hugetlb.h>
3738

3839
#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
3940

@@ -3652,7 +3653,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
36523653
#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
36533654
#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
36543655

3655-
#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
3656+
#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
36563657

36573658
SYSCALL_DEFINE2(memfd_create,
36583659
const char __user *, uname,
@@ -3664,8 +3665,18 @@ SYSCALL_DEFINE2(memfd_create,
36643665
char *name;
36653666
long len;
36663667

3667-
if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3668-
return -EINVAL;
3668+
if (!(flags & MFD_HUGETLB)) {
3669+
if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3670+
return -EINVAL;
3671+
} else {
3672+
/* Sealing not supported in hugetlbfs (MFD_HUGETLB) */
3673+
if (flags & MFD_ALLOW_SEALING)
3674+
return -EINVAL;
3675+
/* Allow huge page size encoding in flags. */
3676+
if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
3677+
(MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
3678+
return -EINVAL;
3679+
}
36693680

36703681
/* length includes terminating zero */
36713682
len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
@@ -3696,16 +3707,30 @@ SYSCALL_DEFINE2(memfd_create,
36963707
goto err_name;
36973708
}
36983709

3699-
file = shmem_file_setup(name, 0, VM_NORESERVE);
3710+
if (flags & MFD_HUGETLB) {
3711+
struct user_struct *user = NULL;
3712+
3713+
file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
3714+
HUGETLB_ANONHUGE_INODE,
3715+
(flags >> MFD_HUGE_SHIFT) &
3716+
MFD_HUGE_MASK);
3717+
} else
3718+
file = shmem_file_setup(name, 0, VM_NORESERVE);
37003719
if (IS_ERR(file)) {
37013720
error = PTR_ERR(file);
37023721
goto err_fd;
37033722
}
3704-
info = SHMEM_I(file_inode(file));
37053723
file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
37063724
file->f_flags |= O_RDWR | O_LARGEFILE;
3707-
if (flags & MFD_ALLOW_SEALING)
3725+
3726+
if (flags & MFD_ALLOW_SEALING) {
3727+
/*
3728+
* flags check at beginning of function ensures
3729+
* this is not a hugetlbfs (MFD_HUGETLB) file.
3730+
*/
3731+
info = SHMEM_I(file_inode(file));
37083732
info->seals &= ~F_SEAL_SEAL;
3733+
}
37093734

37103735
fd_install(fd, file);
37113736
kfree(name);

0 commit comments

Comments
 (0)