X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=fs%2Fdcache.c;h=d6847d7b123d4ca0fe4a9100b751b673d8801cd9;hb=f91e5c0c5f39b2a5c0f1a66462665eddd7021d83;hp=d5ecc6e477daa3ef63d630a42954ea57cb6d1e5b;hpb=6ea24cf79e055f0a62a64baa8587e2254a493c7b;p=deliverable%2Flinux.git diff --git a/fs/dcache.c b/fs/dcache.c index d5ecc6e477da..d6847d7b123d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -111,6 +111,17 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, return dentry_hashtable + hash_32(hash, d_hash_shift); } +#define IN_LOOKUP_SHIFT 10 +static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT]; + +static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent, + unsigned int hash) +{ + hash += (unsigned long) parent / L1_CACHE_BYTES; + return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT); +} + + /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, @@ -496,6 +507,44 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) +{ + struct dentry *next; + /* + * Inform d_walk() and shrink_dentry_list() that we are no longer + * attached to the dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (unlikely(list_empty(&dentry->d_child))) + return; + __list_del_entry(&dentry->d_child); + /* + * Cursors can move around the list of children. While we'd been + * a normal list member, it didn't matter - ->d_child.next would've + * been updated. However, from now on it won't be and for the + * things like d_walk() it might end up with a nasty surprise. + * Normally d_walk() doesn't care about cursors moving around - + * ->d_lock on parent prevents that and since a cursor has no children + * of its own, we get through it without ever unlocking the parent. + * There is one exception, though - if we ascend from a child that + * gets killed as soon as we unlock it, the next sibling is found + * using the value left in its ->d_child.next. And if _that_ + * pointed to a cursor, and cursor got moved (e.g. by lseek()) + * before d_walk() regains parent->d_lock, we'll end up skipping + * everything the cursor had been moved past. + * + * Solution: make sure that the pointer left behind in ->d_child.next + * points to something that won't be moving around. I.e. skip the + * cursors. + */ + while (dentry->d_child.next != &parent->d_subdirs) { + next = list_entry(dentry->d_child.next, struct dentry, d_child); + if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) + break; + dentry->d_child.next = next->d_child.next; + } +} + static void __dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; @@ -521,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - __list_del_entry(&dentry->d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; + dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -761,6 +805,8 @@ repeat: /* Slow case: now with the dentry lock held */ rcu_read_unlock(); + WARN_ON(d_in_lookup(dentry)); + /* Unreachable? Get rid of it */ if (unlikely(d_unhashed(dentry))) goto kill_it; @@ -1190,6 +1236,9 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); @@ -1558,7 +1607,11 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) * be overwriting an internal NUL character */ dentry->d_iname[DNAME_INLINE_LEN-1] = 0; - if (name->len > DNAME_INLINE_LEN-1) { + if (unlikely(!name)) { + static const struct qstr anon = QSTR_INIT("/", 1); + name = &anon; + dname = dentry->d_iname; + } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); struct external_name *p = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); @@ -1619,7 +1672,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -1634,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_cursor(struct dentry * parent) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + if (dentry) { + dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; + dentry->d_parent = dget(parent); + } + return dentry; +} + /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock @@ -1653,8 +1716,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) struct qstr q; q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len); + q.hash_len = hashlen_string(name); return d_alloc(parent, &q); } EXPORT_SYMBOL(d_alloc_name); @@ -1746,6 +1808,7 @@ type_determined: static void __d_instantiate(struct dentry *dentry, struct inode *inode) { unsigned add_flags = d_flags_for_inode(inode); + WARN_ON(d_in_lookup(dentry)); spin_lock(&dentry->d_lock); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); @@ -1775,11 +1838,11 @@ void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) { + security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); __d_instantiate(entry, inode); spin_unlock(&inode->i_lock); } - security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1796,6 +1859,7 @@ int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); + security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { spin_unlock(&inode->i_lock); @@ -1804,7 +1868,6 @@ int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) } __d_instantiate(entry, inode); spin_unlock(&inode->i_lock); - security_d_instantiate(entry, inode); return 0; } @@ -1815,9 +1878,7 @@ struct dentry *d_make_root(struct inode *root_inode) struct dentry *res = NULL; if (root_inode) { - static const struct qstr name = QSTR_INIT("/", 1); - - res = __d_alloc(root_inode->i_sb, &name); + res = __d_alloc(root_inode->i_sb, NULL); if (res) d_instantiate(res, root_inode); else @@ -1858,7 +1919,6 @@ EXPORT_SYMBOL(d_find_any_alias); static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) { - static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; struct dentry *res; unsigned add_flags; @@ -1872,12 +1932,13 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) if (res) goto out_iput; - tmp = __d_alloc(inode->i_sb, &anonstring); + tmp = __d_alloc(inode->i_sb, NULL); if (!tmp) { res = ERR_PTR(-ENOMEM); goto out_iput; } + security_d_instantiate(tmp, inode); spin_lock(&inode->i_lock); res = __d_find_any_alias(inode); if (res) { @@ -1900,13 +1961,10 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) hlist_bl_unlock(&tmp->d_sb->s_anon); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); - security_d_instantiate(tmp, inode); return tmp; out_iput: - if (res && !IS_ERR(res)) - security_d_instantiate(res, inode); iput(inode); return res; } @@ -1975,28 +2033,36 @@ EXPORT_SYMBOL(d_obtain_root); struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, struct qstr *name) { - struct dentry *found; - struct dentry *new; + struct dentry *found, *res; /* * First check if a dentry matching the name already exists, * if not go ahead and create it now. */ found = d_hash_and_lookup(dentry->d_parent, name); - if (!found) { - new = d_alloc(dentry->d_parent, name); - if (!new) { - found = ERR_PTR(-ENOMEM); - } else { - found = d_splice_alias(inode, new); - if (found) { - dput(new); - return found; - } - return new; + if (found) { + iput(inode); + return found; + } + if (d_in_lookup(dentry)) { + found = d_alloc_parallel(dentry->d_parent, name, + dentry->d_wait); + if (IS_ERR(found) || !d_in_lookup(found)) { + iput(inode); + return found; } + } else { + found = d_alloc(dentry->d_parent, name); + if (!found) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + } + res = d_splice_alias(inode, found); + if (res) { + dput(found); + return res; } - iput(inode); return found; } EXPORT_SYMBOL(d_add_ci); @@ -2338,7 +2404,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2363,17 +2428,201 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +static inline unsigned start_dir_add(struct inode *dir) +{ + + for (;;) { + unsigned n = dir->i_dir_seq; + if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) + return n; + cpu_relax(); + } +} + +static inline void end_dir_add(struct inode *dir, unsigned n) +{ + smp_store_release(&dir->i_dir_seq, n + 2); +} + +static void d_wait_lookup(struct dentry *dentry) +{ + if (d_in_lookup(dentry)) { + DECLARE_WAITQUEUE(wait, current); + add_wait_queue(dentry->d_wait, &wait); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&dentry->d_lock); + schedule(); + spin_lock(&dentry->d_lock); + } while (d_in_lookup(dentry)); + } +} + +struct dentry *d_alloc_parallel(struct dentry *parent, + const struct qstr *name, + wait_queue_head_t *wq) +{ + unsigned int len = name->len; + unsigned int hash = name->hash; + const unsigned char *str = name->name; + struct hlist_bl_head *b = in_lookup_hash(parent, hash); + struct hlist_bl_node *node; + struct dentry *new = d_alloc(parent, name); + struct dentry *dentry; + unsigned seq, r_seq, d_seq; + + if (unlikely(!new)) + return ERR_PTR(-ENOMEM); + +retry: + rcu_read_lock(); + seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; + r_seq = read_seqbegin(&rename_lock); + dentry = __d_lookup_rcu(parent, name, &d_seq); + if (unlikely(dentry)) { + if (!lockref_get_not_dead(&dentry->d_lockref)) { + rcu_read_unlock(); + goto retry; + } + if (read_seqcount_retry(&dentry->d_seq, d_seq)) { + rcu_read_unlock(); + dput(dentry); + goto retry; + } + rcu_read_unlock(); + dput(new); + return dentry; + } + if (unlikely(read_seqretry(&rename_lock, r_seq))) { + rcu_read_unlock(); + goto retry; + } + hlist_bl_lock(b); + if (unlikely(parent->d_inode->i_dir_seq != seq)) { + hlist_bl_unlock(b); + rcu_read_unlock(); + goto retry; + } + /* + * No changes for the parent since the beginning of d_lookup(). + * Since all removals from the chain happen with hlist_bl_lock(), + * any potential in-lookup matches are going to stay here until + * we unlock the chain. All fields are stable in everything + * we encounter. + */ + hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) { + if (dentry->d_name.hash != hash) + continue; + if (dentry->d_parent != parent) + continue; + if (parent->d_flags & DCACHE_OP_COMPARE) { + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) + continue; + } else { + if (dentry->d_name.len != len) + continue; + if (dentry_cmp(dentry, str, len)) + continue; + } + hlist_bl_unlock(b); + /* now we can try to grab a reference */ + if (!lockref_get_not_dead(&dentry->d_lockref)) { + rcu_read_unlock(); + goto retry; + } + + rcu_read_unlock(); + /* + * somebody is likely to be still doing lookup for it; + * wait for them to finish + */ + spin_lock(&dentry->d_lock); + d_wait_lookup(dentry); + /* + * it's not in-lookup anymore; in principle we should repeat + * everything from dcache lookup, but it's likely to be what + * d_lookup() would've found anyway. If it is, just return it; + * otherwise we really have to repeat the whole thing. + */ + if (unlikely(dentry->d_name.hash != hash)) + goto mismatch; + if (unlikely(dentry->d_parent != parent)) + goto mismatch; + if (unlikely(d_unhashed(dentry))) + goto mismatch; + if (parent->d_flags & DCACHE_OP_COMPARE) { + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) + goto mismatch; + } else { + if (unlikely(dentry->d_name.len != len)) + goto mismatch; + if (unlikely(dentry_cmp(dentry, str, len))) + goto mismatch; + } + /* OK, it *is* a hashed match; return it */ + spin_unlock(&dentry->d_lock); + dput(new); + return dentry; + } + rcu_read_unlock(); + /* we can't take ->d_lock here; it's OK, though. */ + new->d_flags |= DCACHE_PAR_LOOKUP; + new->d_wait = wq; + hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b); + hlist_bl_unlock(b); + return new; +mismatch: + spin_unlock(&dentry->d_lock); + dput(dentry); + goto retry; +} +EXPORT_SYMBOL(d_alloc_parallel); + +void __d_lookup_done(struct dentry *dentry) +{ + struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, + dentry->d_name.hash); + hlist_bl_lock(b); + dentry->d_flags &= ~DCACHE_PAR_LOOKUP; + __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); + wake_up_all(dentry->d_wait); + dentry->d_wait = NULL; + hlist_bl_unlock(b); + INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_LIST_HEAD(&dentry->d_lru); +} +EXPORT_SYMBOL(__d_lookup_done); /* inode->i_lock held if inode is non-NULL */ static inline void __d_add(struct dentry *dentry, struct inode *inode) { + struct inode *dir = NULL; + unsigned n; + spin_lock(&dentry->d_lock); + if (unlikely(d_in_lookup(dentry))) { + dir = dentry->d_parent->d_inode; + n = start_dir_add(dir); + __d_lookup_done(dentry); + } if (inode) { - __d_instantiate(dentry, inode); + unsigned add_flags = d_flags_for_inode(inode); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + raw_write_seqcount_begin(&dentry->d_seq); + __d_set_inode_and_type(dentry, inode, add_flags); + raw_write_seqcount_end(&dentry->d_seq); + __fsnotify_d_instantiate(dentry); + } + _d_rehash(dentry); + if (dir) + end_dir_add(dir, n); + spin_unlock(&dentry->d_lock); + if (inode) spin_unlock(&inode->i_lock); - } - security_d_instantiate(dentry, inode); - d_rehash(dentry); } /** @@ -2387,8 +2636,10 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) void d_add(struct dentry *entry, struct inode *inode) { - if (inode) + if (inode) { + security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + } __d_add(entry, inode); } EXPORT_SYMBOL(d_add); @@ -2598,6 +2849,8 @@ static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target) static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { + struct inode *dir = NULL; + unsigned n; if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2605,6 +2858,11 @@ static void __d_move(struct dentry *dentry, struct dentry *target, BUG_ON(d_ancestor(target, dentry)); dentry_lock_for_move(dentry, target); + if (unlikely(d_in_lookup(target))) { + dir = target->d_parent->d_inode; + n = start_dir_add(dir); + __d_lookup_done(target); + } write_seqcount_begin(&dentry->d_seq); write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); @@ -2637,6 +2895,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { /* splicing a tree */ + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; list_del_init(&target->d_child); @@ -2654,6 +2913,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); + if (dir) + end_dir_add(dir, n); dentry_unlock_for_move(dentry, target); } @@ -2724,7 +2985,8 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) static int __d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { - struct mutex *m1 = NULL, *m2 = NULL; + struct mutex *m1 = NULL; + struct rw_semaphore *m2 = NULL; int ret = -ESTALE; /* If alias and dentry share a parent, then no extra locks required */ @@ -2735,15 +2997,15 @@ static int __d_unalias(struct inode *inode, if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; - if (!inode_trylock(alias->d_parent->d_inode)) + if (!inode_trylock_shared(alias->d_parent->d_inode)) goto out_err; - m2 = &alias->d_parent->d_inode->i_mutex; + m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: __d_move(alias, dentry, false); ret = 0; out_err: if (m2) - mutex_unlock(m2); + up_read(m2); if (m1) mutex_unlock(m1); return ret; @@ -2782,6 +3044,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (!inode) goto out; + security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode)) { struct dentry *new = __d_find_any_alias(inode); @@ -2809,7 +3072,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) } else { __d_move(new, dentry, false); write_sequnlock(&rename_lock); - security_d_instantiate(new, inode); } iput(inode); return new;