#include <linux/export.h>
#include <linux/swap.h>
#include <linux/uio.h>
+#include <linux/khugepaged.h>
static struct vfsmount *shm_mnt;
pgoff_t nr_unswapped; /* how often writepage refused to swap out */
};
-/* Flag allocation requirements to shmem_getpage */
-enum sgp_type {
- SGP_READ, /* don't exceed i_size, don't allocate page */
- SGP_CACHE, /* don't exceed i_size, may allocate page */
- SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */
- SGP_HUGE, /* like SGP_CACHE, huge pages preferred */
- SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
- SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
-};
-
#ifdef CONFIG_TMPFS
static unsigned long shmem_default_max_blocks(void)
{
struct page **pagep, enum sgp_type sgp,
gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
-static inline int shmem_getpage(struct inode *inode, pgoff_t index,
+int shmem_getpage(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp)
{
return shmem_getpage_gfp(inode, index, pagep, sgp,
static const struct inode_operations shmem_dir_inode_operations;
static const struct inode_operations shmem_special_inode_operations;
static const struct vm_operations_struct shmem_vm_ops;
+static struct file_system_type shmem_fs_type;
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
#define SHMEM_HUGE_DENY (-1)
#define SHMEM_HUGE_FORCE (-2)
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
/* ifdef here to avoid bloating shmem.o when not necessary */
int shmem_huge __read_mostly;
}
}
-#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
+static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+ struct shrink_control *sc, unsigned long nr_to_split)
+{
+ LIST_HEAD(list), *pos, *next;
+ struct inode *inode;
+ struct shmem_inode_info *info;
+ struct page *page;
+ unsigned long batch = sc ? sc->nr_to_scan : 128;
+ int removed = 0, split = 0;
+
+ if (list_empty(&sbinfo->shrinklist))
+ return SHRINK_STOP;
+
+ spin_lock(&sbinfo->shrinklist_lock);
+ list_for_each_safe(pos, next, &sbinfo->shrinklist) {
+ info = list_entry(pos, struct shmem_inode_info, shrinklist);
+
+ /* pin the inode */
+ inode = igrab(&info->vfs_inode);
+
+ /* inode is about to be evicted */
+ if (!inode) {
+ list_del_init(&info->shrinklist);
+ removed++;
+ goto next;
+ }
+
+ /* Check if there's anything to gain */
+ if (round_up(inode->i_size, PAGE_SIZE) ==
+ round_up(inode->i_size, HPAGE_PMD_SIZE)) {
+ list_del_init(&info->shrinklist);
+ removed++;
+ iput(inode);
+ goto next;
+ }
+
+ list_move(&info->shrinklist, &list);
+next:
+ if (!--batch)
+ break;
+ }
+ spin_unlock(&sbinfo->shrinklist_lock);
+
+ list_for_each_safe(pos, next, &list) {
+ int ret;
+
+ info = list_entry(pos, struct shmem_inode_info, shrinklist);
+ inode = &info->vfs_inode;
+
+ if (nr_to_split && split >= nr_to_split) {
+ iput(inode);
+ continue;
+ }
+
+ page = find_lock_page(inode->i_mapping,
+ (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
+ if (!page)
+ goto drop;
+
+ if (!PageTransHuge(page)) {
+ unlock_page(page);
+ put_page(page);
+ goto drop;
+ }
+
+ ret = split_huge_page(page);
+ unlock_page(page);
+ put_page(page);
+
+ if (ret) {
+ /* split failed: leave it on the list */
+ iput(inode);
+ continue;
+ }
+
+ split++;
+drop:
+ list_del_init(&info->shrinklist);
+ removed++;
+ iput(inode);
+ }
+
+ spin_lock(&sbinfo->shrinklist_lock);
+ list_splice_tail(&list, &sbinfo->shrinklist);
+ sbinfo->shrinklist_len -= removed;
+ spin_unlock(&sbinfo->shrinklist_lock);
+
+ return split;
+}
+
+static long shmem_unused_huge_scan(struct super_block *sb,
+ struct shrink_control *sc)
+{
+ struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+
+ if (!READ_ONCE(sbinfo->shrinklist_len))
+ return SHRINK_STOP;
+
+ return shmem_unused_huge_shrink(sbinfo, sc, 0);
+}
+
+static long shmem_unused_huge_count(struct super_block *sb,
+ struct shrink_control *sc)
+{
+ struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+ return READ_ONCE(sbinfo->shrinklist_len);
+}
+#else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */
#define shmem_huge SHMEM_HUGE_DENY
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+ struct shrink_control *sc, unsigned long nr_to_split)
+{
+ return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
/*
* Like add_to_page_cache_locked, but error if expected item has gone.
if (!error) {
mapping->nrpages += nr;
if (PageTransHuge(page))
- __inc_zone_page_state(page, NR_SHMEM_THPS);
- __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr);
- __mod_zone_page_state(page_zone(page), NR_SHMEM, nr);
+ __inc_node_page_state(page, NR_SHMEM_THPS);
+ __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
+ __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
page->mapping = NULL;
mapping->nrpages--;
- __dec_zone_page_state(page, NR_FILE_PAGES);
- __dec_zone_page_state(page, NR_SHMEM);
+ __dec_node_page_state(page, NR_FILE_PAGES);
+ __dec_node_page_state(page, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
put_page(page);
BUG_ON(error);
{
struct inode *inode = d_inode(dentry);
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
int error;
error = inode_change_ok(inode, attr);
if (oldsize > holebegin)
unmap_mapping_range(inode->i_mapping,
holebegin, 0, 1);
+
+ /*
+ * Part of the huge page can be beyond i_size: subject
+ * to shrink under memory pressure.
+ */
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
+ spin_lock(&sbinfo->shrinklist_lock);
+ if (list_empty(&info->shrinklist)) {
+ list_add_tail(&info->shrinklist,
+ &sbinfo->shrinklist);
+ sbinfo->shrinklist_len++;
+ }
+ spin_unlock(&sbinfo->shrinklist_lock);
+ }
}
}
static void shmem_evict_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
if (inode->i_mapping->a_ops == &shmem_aops) {
shmem_unacct_size(info->flags, inode->i_size);
inode->i_size = 0;
shmem_truncate_range(inode, 0, (loff_t)-1);
+ if (!list_empty(&info->shrinklist)) {
+ spin_lock(&sbinfo->shrinklist_lock);
+ if (!list_empty(&info->shrinklist)) {
+ list_del_init(&info->shrinklist);
+ sbinfo->shrinklist_len--;
+ }
+ spin_unlock(&sbinfo->shrinklist_lock);
+ }
if (!list_empty(&info->swaplist)) {
mutex_lock(&shmem_swaplist_mutex);
list_del_init(&info->swaplist);
void __rcu **results;
struct page *page;
- if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
return NULL;
rcu_read_lock();
int nr;
int err = -ENOSPC;
- if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
huge = false;
nr = huge ? HPAGE_PMD_NR : 1;
error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
newpage);
if (!error) {
- __inc_zone_page_state(newpage, NR_FILE_PAGES);
- __dec_zone_page_state(oldpage, NR_FILE_PAGES);
+ __inc_node_page_state(newpage, NR_FILE_PAGES);
+ __dec_node_page_state(oldpage, NR_FILE_PAGES);
}
spin_unlock_irq(&swap_mapping->tree_lock);
index, false);
}
if (IS_ERR(page)) {
+ int retry = 5;
error = PTR_ERR(page);
page = NULL;
+ if (error != -ENOSPC)
+ goto failed;
+ /*
+ * Try to reclaim some spece by splitting a huge page
+ * beyond i_size on the filesystem.
+ */
+ while (retry--) {
+ int ret;
+ ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
+ if (ret == SHRINK_STOP)
+ break;
+ if (ret)
+ goto alloc_nohuge;
+ }
goto failed;
}
spin_unlock_irq(&info->lock);
alloced = true;
+ if (PageTransHuge(page) &&
+ DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
+ hindex + HPAGE_PMD_NR - 1) {
+ /*
+ * Part of the huge page is beyond i_size: subject
+ * to shrink under memory pressure.
+ */
+ spin_lock(&sbinfo->shrinklist_lock);
+ if (list_empty(&info->shrinklist)) {
+ list_add_tail(&info->shrinklist,
+ &sbinfo->shrinklist);
+ sbinfo->shrinklist_len++;
+ }
+ spin_unlock(&sbinfo->shrinklist_lock);
+ }
+
/*
* Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
*/
get_area = current->mm->get_unmapped_area;
addr = get_area(file, uaddr, len, pgoff, flags);
- if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
return addr;
if (IS_ERR_VALUE(addr))
return addr;
{
file_accessed(file);
vma->vm_ops = &shmem_vm_ops;
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
+ ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
+ (vma->vm_end & HPAGE_PMD_MASK)) {
+ khugepaged_enter(vma, vma->vm_flags);
+ }
return 0;
}
spin_lock_init(&info->lock);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
+ INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
sbinfo->gid = make_kgid(current_user_ns(), gid);
if (!gid_valid(sbinfo->gid))
goto bad_val;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
} else if (!strcmp(this_char, "huge")) {
int huge;
huge = shmem_parse_huge(value);
if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
seq_printf(seq, ",gid=%u",
from_kgid_munged(&init_user_ns, sbinfo->gid));
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
if (sbinfo->huge)
seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
goto failed;
sbinfo->free_inodes = sbinfo->max_inodes;
+ spin_lock_init(&sbinfo->shrinklist_lock);
+ INIT_LIST_HEAD(&sbinfo->shrinklist);
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
.evict_inode = shmem_evict_inode,
.drop_inode = generic_delete_inode,
.put_super = shmem_put_super,
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+ .nr_cached_objects = shmem_unused_huge_count,
+ .free_cached_objects = shmem_unused_huge_scan,
+#endif
};
static const struct vm_operations_struct shmem_vm_ops = {
goto out1;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
else
return error;
}
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
+#if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS)
static ssize_t shmem_enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct kobj_attribute shmem_enabled_attr =
__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
+
+bool shmem_huge_enabled(struct vm_area_struct *vma)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ loff_t i_size;
+ pgoff_t off;
+
+ if (shmem_huge == SHMEM_HUGE_FORCE)
+ return true;
+ if (shmem_huge == SHMEM_HUGE_DENY)
+ return false;
+ switch (sbinfo->huge) {
+ case SHMEM_HUGE_NEVER:
+ return false;
+ case SHMEM_HUGE_ALWAYS:
+ return true;
+ case SHMEM_HUGE_WITHIN_SIZE:
+ off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
+ i_size = round_up(i_size_read(inode), PAGE_SIZE);
+ if (i_size >= HPAGE_PMD_SIZE &&
+ i_size >> PAGE_SHIFT >= off)
+ return true;
+ case SHMEM_HUGE_ADVISE:
+ /* TODO: implement fadvise() hints */
+ return (vma->vm_flags & VM_HUGEPAGE);
+ default:
+ VM_BUG_ON(1);
+ return false;
+ }
+}
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
#else /* !CONFIG_SHMEM */
fput(vma->vm_file);
vma->vm_file = file;
vma->vm_ops = &shmem_vm_ops;
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
+ ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
+ (vma->vm_end & HPAGE_PMD_MASK)) {
+ khugepaged_enter(vma, vma->vm_flags);
+ }
+
return 0;
}