Merge tag 'armsoc-cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
[deliverable/linux.git] / mm / shmem.c
index 03eb915c82eb8472086a54fa771c2cc6388b1c6a..2ac19a61d5655b82d6aa6b01a37eb6d05ea3a72a 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/export.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
+#include <linux/khugepaged.h>
 
 static struct vfsmount *shm_mnt;
 
@@ -97,16 +98,6 @@ struct shmem_falloc {
        pgoff_t nr_unswapped;   /* how often writepage refused to swap out */
 };
 
-/* Flag allocation requirements to shmem_getpage */
-enum sgp_type {
-       SGP_READ,       /* don't exceed i_size, don't allocate page */
-       SGP_CACHE,      /* don't exceed i_size, may allocate page */
-       SGP_NOHUGE,     /* like SGP_CACHE, but no huge pages */
-       SGP_HUGE,       /* like SGP_CACHE, huge pages preferred */
-       SGP_WRITE,      /* may exceed i_size, may allocate !Uptodate page */
-       SGP_FALLOC,     /* like SGP_WRITE, but make existing page Uptodate */
-};
-
 #ifdef CONFIG_TMPFS
 static unsigned long shmem_default_max_blocks(void)
 {
@@ -126,7 +117,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
                struct page **pagep, enum sgp_type sgp,
                gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
 
-static inline int shmem_getpage(struct inode *inode, pgoff_t index,
+int shmem_getpage(struct inode *inode, pgoff_t index,
                struct page **pagep, enum sgp_type sgp)
 {
        return shmem_getpage_gfp(inode, index, pagep, sgp,
@@ -197,6 +188,7 @@ static const struct inode_operations shmem_inode_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
 static const struct vm_operations_struct shmem_vm_ops;
+static struct file_system_type shmem_fs_type;
 
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
@@ -372,7 +364,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 #define SHMEM_HUGE_DENY                (-1)
 #define SHMEM_HUGE_FORCE       (-2)
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
 /* ifdef here to avoid bloating shmem.o when not necessary */
 
 int shmem_huge __read_mostly;
@@ -415,11 +407,123 @@ static const char *shmem_format_huge(int huge)
        }
 }
 
-#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
+static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+               struct shrink_control *sc, unsigned long nr_to_split)
+{
+       LIST_HEAD(list), *pos, *next;
+       struct inode *inode;
+       struct shmem_inode_info *info;
+       struct page *page;
+       unsigned long batch = sc ? sc->nr_to_scan : 128;
+       int removed = 0, split = 0;
+
+       if (list_empty(&sbinfo->shrinklist))
+               return SHRINK_STOP;
+
+       spin_lock(&sbinfo->shrinklist_lock);
+       list_for_each_safe(pos, next, &sbinfo->shrinklist) {
+               info = list_entry(pos, struct shmem_inode_info, shrinklist);
+
+               /* pin the inode */
+               inode = igrab(&info->vfs_inode);
+
+               /* inode is about to be evicted */
+               if (!inode) {
+                       list_del_init(&info->shrinklist);
+                       removed++;
+                       goto next;
+               }
+
+               /* Check if there's anything to gain */
+               if (round_up(inode->i_size, PAGE_SIZE) ==
+                               round_up(inode->i_size, HPAGE_PMD_SIZE)) {
+                       list_del_init(&info->shrinklist);
+                       removed++;
+                       iput(inode);
+                       goto next;
+               }
+
+               list_move(&info->shrinklist, &list);
+next:
+               if (!--batch)
+                       break;
+       }
+       spin_unlock(&sbinfo->shrinklist_lock);
+
+       list_for_each_safe(pos, next, &list) {
+               int ret;
+
+               info = list_entry(pos, struct shmem_inode_info, shrinklist);
+               inode = &info->vfs_inode;
+
+               if (nr_to_split && split >= nr_to_split) {
+                       iput(inode);
+                       continue;
+               }
+
+               page = find_lock_page(inode->i_mapping,
+                               (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
+               if (!page)
+                       goto drop;
+
+               if (!PageTransHuge(page)) {
+                       unlock_page(page);
+                       put_page(page);
+                       goto drop;
+               }
+
+               ret = split_huge_page(page);
+               unlock_page(page);
+               put_page(page);
+
+               if (ret) {
+                       /* split failed: leave it on the list */
+                       iput(inode);
+                       continue;
+               }
+
+               split++;
+drop:
+               list_del_init(&info->shrinklist);
+               removed++;
+               iput(inode);
+       }
+
+       spin_lock(&sbinfo->shrinklist_lock);
+       list_splice_tail(&list, &sbinfo->shrinklist);
+       sbinfo->shrinklist_len -= removed;
+       spin_unlock(&sbinfo->shrinklist_lock);
+
+       return split;
+}
+
+static long shmem_unused_huge_scan(struct super_block *sb,
+               struct shrink_control *sc)
+{
+       struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+
+       if (!READ_ONCE(sbinfo->shrinklist_len))
+               return SHRINK_STOP;
+
+       return shmem_unused_huge_shrink(sbinfo, sc, 0);
+}
+
+static long shmem_unused_huge_count(struct super_block *sb,
+               struct shrink_control *sc)
+{
+       struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+       return READ_ONCE(sbinfo->shrinklist_len);
+}
+#else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */
 
 #define shmem_huge SHMEM_HUGE_DENY
 
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+               struct shrink_control *sc, unsigned long nr_to_split)
+{
+       return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
 
 /*
  * Like add_to_page_cache_locked, but error if expected item has gone.
@@ -471,9 +575,9 @@ static int shmem_add_to_page_cache(struct page *page,
        if (!error) {
                mapping->nrpages += nr;
                if (PageTransHuge(page))
-                       __inc_zone_page_state(page, NR_SHMEM_THPS);
-               __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr);
-               __mod_zone_page_state(page_zone(page), NR_SHMEM, nr);
+                       __inc_node_page_state(page, NR_SHMEM_THPS);
+               __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
+               __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
                spin_unlock_irq(&mapping->tree_lock);
        } else {
                page->mapping = NULL;
@@ -497,8 +601,8 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
        error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
        page->mapping = NULL;
        mapping->nrpages--;
-       __dec_zone_page_state(page, NR_FILE_PAGES);
-       __dec_zone_page_state(page, NR_SHMEM);
+       __dec_node_page_state(page, NR_FILE_PAGES);
+       __dec_node_page_state(page, NR_SHMEM);
        spin_unlock_irq(&mapping->tree_lock);
        put_page(page);
        BUG_ON(error);
@@ -852,6 +956,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        int error;
 
        error = inode_change_ok(inode, attr);
@@ -887,6 +992,20 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
                        if (oldsize > holebegin)
                                unmap_mapping_range(inode->i_mapping,
                                                        holebegin, 0, 1);
+
+                       /*
+                        * Part of the huge page can be beyond i_size: subject
+                        * to shrink under memory pressure.
+                        */
+                       if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
+                               spin_lock(&sbinfo->shrinklist_lock);
+                               if (list_empty(&info->shrinklist)) {
+                                       list_add_tail(&info->shrinklist,
+                                                       &sbinfo->shrinklist);
+                                       sbinfo->shrinklist_len++;
+                               }
+                               spin_unlock(&sbinfo->shrinklist_lock);
+                       }
                }
        }
 
@@ -899,11 +1018,20 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 static void shmem_evict_inode(struct inode *inode)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
        if (inode->i_mapping->a_ops == &shmem_aops) {
                shmem_unacct_size(info->flags, inode->i_size);
                inode->i_size = 0;
                shmem_truncate_range(inode, 0, (loff_t)-1);
+               if (!list_empty(&info->shrinklist)) {
+                       spin_lock(&sbinfo->shrinklist_lock);
+                       if (!list_empty(&info->shrinklist)) {
+                               list_del_init(&info->shrinklist);
+                               sbinfo->shrinklist_len--;
+                       }
+                       spin_unlock(&sbinfo->shrinklist_lock);
+               }
                if (!list_empty(&info->swaplist)) {
                        mutex_lock(&shmem_swaplist_mutex);
                        list_del_init(&info->swaplist);
@@ -1238,7 +1366,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
        void __rcu **results;
        struct page *page;
 
-       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
                return NULL;
 
        rcu_read_lock();
@@ -1279,7 +1407,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
        int nr;
        int err = -ENOSPC;
 
-       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
                huge = false;
        nr = huge ? HPAGE_PMD_NR : 1;
 
@@ -1365,8 +1493,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
        error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
                                                                   newpage);
        if (!error) {
-               __inc_zone_page_state(newpage, NR_FILE_PAGES);
-               __dec_zone_page_state(oldpage, NR_FILE_PAGES);
+               __inc_node_page_state(newpage, NR_FILE_PAGES);
+               __dec_node_page_state(oldpage, NR_FILE_PAGES);
        }
        spin_unlock_irq(&swap_mapping->tree_lock);
 
@@ -1572,8 +1700,23 @@ alloc_nohuge:            page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
                                        index, false);
                }
                if (IS_ERR(page)) {
+                       int retry = 5;
                        error = PTR_ERR(page);
                        page = NULL;
+                       if (error != -ENOSPC)
+                               goto failed;
+                       /*
+                        * Try to reclaim some spece by splitting a huge page
+                        * beyond i_size on the filesystem.
+                        */
+                       while (retry--) {
+                               int ret;
+                               ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
+                               if (ret == SHRINK_STOP)
+                                       break;
+                               if (ret)
+                                       goto alloc_nohuge;
+                       }
                        goto failed;
                }
 
@@ -1612,6 +1755,22 @@ alloc_nohuge:            page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
                spin_unlock_irq(&info->lock);
                alloced = true;
 
+               if (PageTransHuge(page) &&
+                               DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
+                               hindex + HPAGE_PMD_NR - 1) {
+                       /*
+                        * Part of the huge page is beyond i_size: subject
+                        * to shrink under memory pressure.
+                        */
+                       spin_lock(&sbinfo->shrinklist_lock);
+                       if (list_empty(&info->shrinklist)) {
+                               list_add_tail(&info->shrinklist,
+                                               &sbinfo->shrinklist);
+                               sbinfo->shrinklist_len++;
+                       }
+                       spin_unlock(&sbinfo->shrinklist_lock);
+               }
+
                /*
                 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
                 */
@@ -1782,7 +1941,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
        get_area = current->mm->get_unmapped_area;
        addr = get_area(file, uaddr, len, pgoff, flags);
 
-       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
                return addr;
        if (IS_ERR_VALUE(addr))
                return addr;
@@ -1899,6 +2058,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
        file_accessed(file);
        vma->vm_ops = &shmem_vm_ops;
+       if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
+                       ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
+                       (vma->vm_end & HPAGE_PMD_MASK)) {
+               khugepaged_enter(vma, vma->vm_flags);
+       }
        return 0;
 }
 
@@ -1924,6 +2088,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
                spin_lock_init(&info->lock);
                info->seals = F_SEAL_SEAL;
                info->flags = flags & VM_NORESERVE;
+               INIT_LIST_HEAD(&info->shrinklist);
                INIT_LIST_HEAD(&info->swaplist);
                simple_xattrs_init(&info->xattrs);
                cache_no_acl(inode);
@@ -3291,7 +3456,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
                        sbinfo->gid = make_kgid(current_user_ns(), gid);
                        if (!gid_valid(sbinfo->gid))
                                goto bad_val;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
                } else if (!strcmp(this_char, "huge")) {
                        int huge;
                        huge = shmem_parse_huge(value);
@@ -3388,7 +3553,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
        if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
                seq_printf(seq, ",gid=%u",
                                from_kgid_munged(&init_user_ns, sbinfo->gid));
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
        /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
        if (sbinfo->huge)
                seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
@@ -3522,6 +3687,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
        if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
                goto failed;
        sbinfo->free_inodes = sbinfo->max_inodes;
+       spin_lock_init(&sbinfo->shrinklist_lock);
+       INIT_LIST_HEAD(&sbinfo->shrinklist);
 
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_blocksize = PAGE_SIZE;
@@ -3684,6 +3851,10 @@ static const struct super_operations shmem_ops = {
        .evict_inode    = shmem_evict_inode,
        .drop_inode     = generic_delete_inode,
        .put_super      = shmem_put_super,
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+       .nr_cached_objects      = shmem_unused_huge_count,
+       .free_cached_objects    = shmem_unused_huge_scan,
+#endif
 };
 
 static const struct vm_operations_struct shmem_vm_ops = {
@@ -3734,7 +3905,7 @@ int __init shmem_init(void)
                goto out1;
        }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
        if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
                SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
        else
@@ -3751,7 +3922,7 @@ out3:
        return error;
 }
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
+#if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS)
 static ssize_t shmem_enabled_show(struct kobject *kobj,
                struct kobj_attribute *attr, char *buf)
 {
@@ -3803,7 +3974,38 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
 
 struct kobj_attribute shmem_enabled_attr =
        __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
+
+bool shmem_huge_enabled(struct vm_area_struct *vma)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+       loff_t i_size;
+       pgoff_t off;
+
+       if (shmem_huge == SHMEM_HUGE_FORCE)
+               return true;
+       if (shmem_huge == SHMEM_HUGE_DENY)
+               return false;
+       switch (sbinfo->huge) {
+               case SHMEM_HUGE_NEVER:
+                       return false;
+               case SHMEM_HUGE_ALWAYS:
+                       return true;
+               case SHMEM_HUGE_WITHIN_SIZE:
+                       off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
+                       i_size = round_up(i_size_read(inode), PAGE_SIZE);
+                       if (i_size >= HPAGE_PMD_SIZE &&
+                                       i_size >> PAGE_SHIFT >= off)
+                               return true;
+               case SHMEM_HUGE_ADVISE:
+                       /* TODO: implement fadvise() hints */
+                       return (vma->vm_flags & VM_HUGEPAGE);
+               default:
+                       VM_BUG_ON(1);
+                       return false;
+       }
+}
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
 
 #else /* !CONFIG_SHMEM */
 
@@ -3982,6 +4184,13 @@ int shmem_zero_setup(struct vm_area_struct *vma)
                fput(vma->vm_file);
        vma->vm_file = file;
        vma->vm_ops = &shmem_vm_ops;
+
+       if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
+                       ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
+                       (vma->vm_end & HPAGE_PMD_MASK)) {
+               khugepaged_enter(vma, vma->vm_flags);
+       }
+
        return 0;
 }
 
This page took 0.055576 seconds and 5 git commands to generate.