Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | */ | |
36 | ||
37 | #include <linux/fs.h> | |
38 | #include <linux/sched.h> | |
39 | #include <linux/quotaops.h> | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_LLITE | |
42 | ||
43 | #include <obd_support.h> | |
44 | #include <lustre_lite.h> | |
45 | #include <lustre/lustre_idl.h> | |
46 | #include <lustre_dlm.h> | |
47 | ||
48 | #include "llite_internal.h" | |
49 | ||
50 | static void free_dentry_data(struct rcu_head *head) | |
51 | { | |
52 | struct ll_dentry_data *lld; | |
53 | ||
54 | lld = container_of(head, struct ll_dentry_data, lld_rcu_head); | |
55 | OBD_FREE_PTR(lld); | |
56 | } | |
57 | ||
58 | /* should NOT be called with the dcache lock, see fs/dcache.c */ | |
59 | static void ll_release(struct dentry *de) | |
60 | { | |
61 | struct ll_dentry_data *lld; | |
62 | ENTRY; | |
63 | LASSERT(de != NULL); | |
64 | lld = ll_d2d(de); | |
65 | if (lld == NULL) /* NFS copies the de->d_op methods (bug 4655) */ | |
66 | RETURN_EXIT; | |
67 | ||
68 | if (lld->lld_it) { | |
69 | ll_intent_release(lld->lld_it); | |
70 | OBD_FREE(lld->lld_it, sizeof(*lld->lld_it)); | |
71 | } | |
72 | LASSERT(lld->lld_cwd_count == 0); | |
73 | LASSERT(lld->lld_mnt_count == 0); | |
74 | de->d_fsdata = NULL; | |
75 | call_rcu(&lld->lld_rcu_head, free_dentry_data); | |
76 | ||
77 | EXIT; | |
78 | } | |
79 | ||
80 | /* Compare if two dentries are the same. Don't match if the existing dentry | |
81 | * is marked invalid. Returns 1 if different, 0 if the same. | |
82 | * | |
83 | * This avoids a race where ll_lookup_it() instantiates a dentry, but we get | |
84 | * an AST before calling d_revalidate_it(). The dentry still exists (marked | |
85 | * INVALID) so d_lookup() matches it, but we have no lock on it (so | |
86 | * lock_match() fails) and we spin around real_lookup(). */ | |
87 | int ll_dcompare(const struct dentry *parent, const struct inode *pinode, | |
88 | const struct dentry *dentry, const struct inode *inode, | |
89 | unsigned int len, const char *str, const struct qstr *name) | |
90 | { | |
91 | ENTRY; | |
92 | ||
93 | if (len != name->len) | |
94 | RETURN(1); | |
95 | ||
96 | if (memcmp(str, name->name, len)) | |
97 | RETURN(1); | |
98 | ||
99 | CDEBUG(D_DENTRY, "found name %.*s(%p) flags %#x refc %d\n", | |
100 | name->len, name->name, dentry, dentry->d_flags, | |
101 | d_refcount(dentry)); | |
102 | ||
103 | /* mountpoint is always valid */ | |
104 | if (d_mountpoint((struct dentry *)dentry)) | |
105 | RETURN(0); | |
106 | ||
107 | if (d_lustre_invalid(dentry)) | |
108 | RETURN(1); | |
109 | ||
110 | RETURN(0); | |
111 | } | |
112 | ||
113 | static inline int return_if_equal(struct ldlm_lock *lock, void *data) | |
114 | { | |
115 | if ((lock->l_flags & | |
116 | (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) == | |
117 | (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) | |
118 | return LDLM_ITER_CONTINUE; | |
119 | return LDLM_ITER_STOP; | |
120 | } | |
121 | ||
122 | /* find any ldlm lock of the inode in mdc and lov | |
123 | * return 0 not find | |
124 | * 1 find one | |
125 | * < 0 error */ | |
126 | static int find_cbdata(struct inode *inode) | |
127 | { | |
128 | struct ll_sb_info *sbi = ll_i2sbi(inode); | |
129 | struct lov_stripe_md *lsm; | |
130 | int rc = 0; | |
131 | ENTRY; | |
132 | ||
133 | LASSERT(inode); | |
134 | rc = md_find_cbdata(sbi->ll_md_exp, ll_inode2fid(inode), | |
135 | return_if_equal, NULL); | |
136 | if (rc != 0) | |
137 | RETURN(rc); | |
138 | ||
139 | lsm = ccc_inode_lsm_get(inode); | |
140 | if (lsm == NULL) | |
141 | RETURN(rc); | |
142 | ||
143 | rc = obd_find_cbdata(sbi->ll_dt_exp, lsm, return_if_equal, NULL); | |
144 | ccc_inode_lsm_put(inode, lsm); | |
145 | ||
146 | RETURN(rc); | |
147 | } | |
148 | ||
149 | /** | |
150 | * Called when last reference to a dentry is dropped and dcache wants to know | |
151 | * whether or not it should cache it: | |
152 | * - return 1 to delete the dentry immediately | |
153 | * - return 0 to cache the dentry | |
154 | * Should NOT be called with the dcache lock, see fs/dcache.c | |
155 | */ | |
156 | static int ll_ddelete(const struct dentry *de) | |
157 | { | |
158 | ENTRY; | |
159 | LASSERT(de); | |
160 | ||
161 | CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n", | |
162 | d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping", | |
163 | de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, | |
164 | d_unhashed((struct dentry *)de) ? "" : "hashed,", | |
165 | list_empty(&de->d_subdirs) ? "" : "subdirs"); | |
166 | ||
167 | /* kernel >= 2.6.38 last refcount is decreased after this function. */ | |
168 | LASSERT(d_refcount(de) == 1); | |
169 | ||
170 | /* Disable this piece of code temproarily because this is called | |
171 | * inside dcache_lock so it's not appropriate to do lots of work | |
172 | * here. ATTENTION: Before this piece of code enabling, LU-2487 must be | |
173 | * resolved. */ | |
174 | #if 0 | |
175 | /* if not ldlm lock for this inode, set i_nlink to 0 so that | |
176 | * this inode can be recycled later b=20433 */ | |
177 | if (de->d_inode && !find_cbdata(de->d_inode)) | |
178 | clear_nlink(de->d_inode); | |
179 | #endif | |
180 | ||
181 | if (d_lustre_invalid((struct dentry *)de)) | |
182 | RETURN(1); | |
183 | RETURN(0); | |
184 | } | |
185 | ||
186 | static int ll_set_dd(struct dentry *de) | |
187 | { | |
188 | ENTRY; | |
189 | LASSERT(de != NULL); | |
190 | ||
191 | CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n", | |
192 | de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, | |
193 | d_refcount(de)); | |
194 | ||
195 | if (de->d_fsdata == NULL) { | |
196 | struct ll_dentry_data *lld; | |
197 | ||
198 | OBD_ALLOC_PTR(lld); | |
199 | if (likely(lld != NULL)) { | |
200 | spin_lock(&de->d_lock); | |
201 | if (likely(de->d_fsdata == NULL)) | |
202 | de->d_fsdata = lld; | |
203 | else | |
204 | OBD_FREE_PTR(lld); | |
205 | spin_unlock(&de->d_lock); | |
206 | } else { | |
207 | RETURN(-ENOMEM); | |
208 | } | |
209 | } | |
210 | ||
211 | RETURN(0); | |
212 | } | |
213 | ||
214 | int ll_dops_init(struct dentry *de, int block, int init_sa) | |
215 | { | |
216 | struct ll_dentry_data *lld = ll_d2d(de); | |
217 | int rc = 0; | |
218 | ||
219 | if (lld == NULL && block != 0) { | |
220 | rc = ll_set_dd(de); | |
221 | if (rc) | |
222 | return rc; | |
223 | ||
224 | lld = ll_d2d(de); | |
225 | } | |
226 | ||
227 | if (lld != NULL && init_sa != 0) | |
228 | lld->lld_sa_generation = 0; | |
229 | ||
230 | /* kernel >= 2.6.38 d_op is set in d_alloc() */ | |
231 | LASSERT(de->d_op == &ll_d_ops); | |
232 | return rc; | |
233 | } | |
234 | ||
235 | void ll_intent_drop_lock(struct lookup_intent *it) | |
236 | { | |
237 | if (it->it_op && it->d.lustre.it_lock_mode) { | |
238 | struct lustre_handle handle; | |
239 | ||
240 | handle.cookie = it->d.lustre.it_lock_handle; | |
241 | ||
242 | CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64 | |
243 | " from it %p\n", handle.cookie, it); | |
244 | ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode); | |
245 | ||
246 | /* bug 494: intent_release may be called multiple times, from | |
247 | * this thread and we don't want to double-decref this lock */ | |
248 | it->d.lustre.it_lock_mode = 0; | |
249 | if (it->d.lustre.it_remote_lock_mode != 0) { | |
250 | handle.cookie = it->d.lustre.it_remote_lock_handle; | |
251 | ||
252 | CDEBUG(D_DLMTRACE, "releasing remote lock with cookie" | |
253 | LPX64" from it %p\n", handle.cookie, it); | |
254 | ldlm_lock_decref(&handle, | |
255 | it->d.lustre.it_remote_lock_mode); | |
256 | it->d.lustre.it_remote_lock_mode = 0; | |
257 | } | |
258 | } | |
259 | } | |
260 | ||
261 | void ll_intent_release(struct lookup_intent *it) | |
262 | { | |
263 | ENTRY; | |
264 | ||
265 | CDEBUG(D_INFO, "intent %p released\n", it); | |
266 | ll_intent_drop_lock(it); | |
267 | /* We are still holding extra reference on a request, need to free it */ | |
268 | if (it_disposition(it, DISP_ENQ_OPEN_REF)) | |
269 | ptlrpc_req_finished(it->d.lustre.it_data); /* ll_file_open */ | |
270 | if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */ | |
271 | ptlrpc_req_finished(it->d.lustre.it_data); | |
272 | if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate | |
273 | * to lookup */ | |
274 | ptlrpc_req_finished(it->d.lustre.it_data); | |
275 | ||
276 | it->d.lustre.it_disposition = 0; | |
277 | it->d.lustre.it_data = NULL; | |
278 | EXIT; | |
279 | } | |
280 | ||
281 | void ll_invalidate_aliases(struct inode *inode) | |
282 | { | |
283 | struct dentry *dentry; | |
284 | struct ll_d_hlist_node *p; | |
285 | ENTRY; | |
286 | ||
287 | LASSERT(inode != NULL); | |
288 | ||
289 | CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n", | |
290 | inode->i_ino, inode->i_generation, inode); | |
291 | ||
292 | ll_lock_dcache(inode); | |
293 | ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { | |
294 | CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p " | |
295 | "inode %p flags %d\n", dentry->d_name.len, | |
296 | dentry->d_name.name, dentry, dentry->d_parent, | |
297 | dentry->d_inode, dentry->d_flags); | |
298 | ||
299 | if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') { | |
300 | CERROR("called on root (?) dentry=%p, inode=%p " | |
301 | "ino=%lu\n", dentry, inode, inode->i_ino); | |
302 | lustre_dump_dentry(dentry, 1); | |
303 | libcfs_debug_dumpstack(NULL); | |
304 | } | |
305 | ||
b1d2a127 | 306 | d_lustre_invalidate(dentry, 0); |
d7e09d03 PT |
307 | } |
308 | ll_unlock_dcache(inode); | |
309 | ||
310 | EXIT; | |
311 | } | |
312 | ||
313 | int ll_revalidate_it_finish(struct ptlrpc_request *request, | |
314 | struct lookup_intent *it, | |
315 | struct dentry *de) | |
316 | { | |
317 | int rc = 0; | |
318 | ENTRY; | |
319 | ||
320 | if (!request) | |
321 | RETURN(0); | |
322 | ||
323 | if (it_disposition(it, DISP_LOOKUP_NEG)) | |
324 | RETURN(-ENOENT); | |
325 | ||
326 | rc = ll_prep_inode(&de->d_inode, request, NULL, it); | |
327 | ||
328 | RETURN(rc); | |
329 | } | |
330 | ||
331 | void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry) | |
332 | { | |
333 | LASSERT(it != NULL); | |
334 | LASSERT(dentry != NULL); | |
335 | ||
336 | if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) { | |
337 | struct inode *inode = dentry->d_inode; | |
338 | struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); | |
339 | ||
340 | CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", | |
341 | inode, inode->i_ino, inode->i_generation); | |
342 | ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); | |
343 | } | |
344 | ||
345 | /* drop lookup or getattr locks immediately */ | |
346 | if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) { | |
347 | /* on 2.6 there are situation when several lookups and | |
348 | * revalidations may be requested during single operation. | |
349 | * therefore, we don't release intent here -bzzz */ | |
350 | ll_intent_drop_lock(it); | |
351 | } | |
352 | } | |
353 | ||
354 | void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft) | |
355 | { | |
356 | struct lookup_intent *it = *itp; | |
357 | ||
358 | if (!it || it->it_op == IT_GETXATTR) | |
359 | it = *itp = deft; | |
360 | ||
361 | } | |
362 | ||
363 | int ll_revalidate_it(struct dentry *de, int lookup_flags, | |
364 | struct lookup_intent *it) | |
365 | { | |
366 | struct md_op_data *op_data; | |
367 | struct ptlrpc_request *req = NULL; | |
368 | struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; | |
369 | struct obd_export *exp; | |
370 | struct inode *parent = de->d_parent->d_inode; | |
371 | int rc; | |
372 | ||
373 | ENTRY; | |
374 | CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name, | |
375 | LL_IT2STR(it)); | |
376 | ||
377 | if (de->d_inode == NULL) { | |
378 | __u64 ibits; | |
379 | ||
380 | /* We can only use negative dentries if this is stat or lookup, | |
381 | for opens and stuff we do need to query server. */ | |
382 | /* If there is IT_CREAT in intent op set, then we must throw | |
383 | away this negative dentry and actually do the request to | |
384 | kernel to create whatever needs to be created (if possible)*/ | |
385 | if (it && (it->it_op & IT_CREAT)) | |
386 | RETURN(0); | |
387 | ||
388 | if (d_lustre_invalid(de)) | |
389 | RETURN(0); | |
390 | ||
391 | ibits = MDS_INODELOCK_UPDATE; | |
392 | rc = ll_have_md_lock(parent, &ibits, LCK_MINMODE); | |
393 | GOTO(out_sa, rc); | |
394 | } | |
395 | ||
396 | /* Never execute intents for mount points. | |
397 | * Attributes will be fixed up in ll_inode_revalidate_it */ | |
398 | if (d_mountpoint(de)) | |
399 | GOTO(out_sa, rc = 1); | |
400 | ||
401 | /* need to get attributes in case root got changed from other client */ | |
402 | if (de == de->d_sb->s_root) { | |
403 | rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP); | |
404 | if (rc == 0) | |
405 | rc = 1; | |
406 | GOTO(out_sa, rc); | |
407 | } | |
408 | ||
409 | exp = ll_i2mdexp(de->d_inode); | |
410 | ||
411 | OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5); | |
412 | ll_frob_intent(&it, &lookup_it); | |
413 | LASSERT(it); | |
414 | ||
415 | if (it->it_op == IT_LOOKUP && !d_lustre_invalid(de)) | |
416 | RETURN(1); | |
417 | ||
418 | if (it->it_op == IT_OPEN) { | |
419 | struct inode *inode = de->d_inode; | |
420 | struct ll_inode_info *lli = ll_i2info(inode); | |
421 | struct obd_client_handle **och_p; | |
422 | __u64 *och_usecount; | |
423 | __u64 ibits; | |
424 | ||
425 | /* | |
426 | * We used to check for MDS_INODELOCK_OPEN here, but in fact | |
427 | * just having LOOKUP lock is enough to justify inode is the | |
428 | * same. And if inode is the same and we have suitable | |
429 | * openhandle, then there is no point in doing another OPEN RPC | |
430 | * just to throw away newly received openhandle. There are no | |
431 | * security implications too, if file owner or access mode is | |
432 | * change, LOOKUP lock is revoked. | |
433 | */ | |
434 | ||
435 | ||
436 | if (it->it_flags & FMODE_WRITE) { | |
437 | och_p = &lli->lli_mds_write_och; | |
438 | och_usecount = &lli->lli_open_fd_write_count; | |
439 | } else if (it->it_flags & FMODE_EXEC) { | |
440 | och_p = &lli->lli_mds_exec_och; | |
441 | och_usecount = &lli->lli_open_fd_exec_count; | |
442 | } else { | |
443 | och_p = &lli->lli_mds_read_och; | |
444 | och_usecount = &lli->lli_open_fd_read_count; | |
445 | } | |
446 | /* Check for the proper lock. */ | |
447 | ibits = MDS_INODELOCK_LOOKUP; | |
448 | if (!ll_have_md_lock(inode, &ibits, LCK_MINMODE)) | |
449 | goto do_lock; | |
450 | mutex_lock(&lli->lli_och_mutex); | |
451 | if (*och_p) { /* Everything is open already, do nothing */ | |
452 | /*(*och_usecount)++; Do not let them steal our open | |
453 | handle from under us */ | |
454 | SET_BUT_UNUSED(och_usecount); | |
455 | /* XXX The code above was my original idea, but in case | |
456 | we have the handle, but we cannot use it due to later | |
457 | checks (e.g. O_CREAT|O_EXCL flags set), nobody | |
458 | would decrement counter increased here. So we just | |
459 | hope the lock won't be invalidated in between. But | |
460 | if it would be, we'll reopen the open request to | |
461 | MDS later during file open path */ | |
462 | mutex_unlock(&lli->lli_och_mutex); | |
463 | RETURN(1); | |
464 | } else { | |
465 | mutex_unlock(&lli->lli_och_mutex); | |
466 | } | |
467 | } | |
468 | ||
469 | if (it->it_op == IT_GETATTR) { | |
470 | rc = ll_statahead_enter(parent, &de, 0); | |
471 | if (rc == 1) | |
472 | goto mark; | |
473 | else if (rc != -EAGAIN && rc != 0) | |
474 | GOTO(out, rc = 0); | |
475 | } | |
476 | ||
477 | do_lock: | |
478 | op_data = ll_prep_md_op_data(NULL, parent, de->d_inode, | |
479 | de->d_name.name, de->d_name.len, | |
480 | 0, LUSTRE_OPC_ANY, NULL); | |
481 | if (IS_ERR(op_data)) | |
482 | RETURN(PTR_ERR(op_data)); | |
483 | ||
484 | if (!IS_POSIXACL(parent) || !exp_connect_umask(exp)) | |
485 | it->it_create_mode &= ~current_umask(); | |
486 | it->it_create_mode |= M_CHECK_STALE; | |
487 | rc = md_intent_lock(exp, op_data, NULL, 0, it, | |
488 | lookup_flags, | |
489 | &req, ll_md_blocking_ast, 0); | |
490 | it->it_create_mode &= ~M_CHECK_STALE; | |
491 | ll_finish_md_op_data(op_data); | |
492 | ||
493 | /* If req is NULL, then md_intent_lock only tried to do a lock match; | |
494 | * if all was well, it will return 1 if it found locks, 0 otherwise. */ | |
495 | if (req == NULL && rc >= 0) { | |
496 | if (!rc) | |
497 | goto do_lookup; | |
498 | GOTO(out, rc); | |
499 | } | |
500 | ||
501 | if (rc < 0) { | |
502 | if (rc != -ESTALE) { | |
503 | CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status " | |
504 | "%d\n", rc, it->d.lustre.it_status); | |
505 | } | |
506 | GOTO(out, rc = 0); | |
507 | } | |
508 | ||
509 | revalidate_finish: | |
510 | rc = ll_revalidate_it_finish(req, it, de); | |
511 | if (rc != 0) { | |
512 | if (rc != -ESTALE && rc != -ENOENT) | |
513 | ll_intent_release(it); | |
514 | GOTO(out, rc = 0); | |
515 | } | |
516 | ||
517 | if ((it->it_op & IT_OPEN) && de->d_inode && | |
518 | !S_ISREG(de->d_inode->i_mode) && | |
519 | !S_ISDIR(de->d_inode->i_mode)) { | |
520 | ll_release_openhandle(de, it); | |
521 | } | |
522 | rc = 1; | |
523 | ||
524 | out: | |
525 | /* We do not free request as it may be reused during following lookup | |
526 | * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will | |
527 | * be freed in ll_lookup_it or in ll_intent_release. But if | |
528 | * request was not completed, we need to free it. (bug 5154, 9903) */ | |
529 | if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE)) | |
530 | ptlrpc_req_finished(req); | |
531 | if (rc == 0) { | |
532 | /* mdt may grant layout lock for the newly created file, so | |
533 | * release the lock to avoid leaking */ | |
534 | ll_intent_drop_lock(it); | |
535 | ll_invalidate_aliases(de->d_inode); | |
536 | } else { | |
537 | __u64 bits = 0; | |
538 | __u64 matched_bits = 0; | |
539 | ||
540 | CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p " | |
541 | "inode %p refc %d\n", de->d_name.len, | |
542 | de->d_name.name, de, de->d_parent, de->d_inode, | |
543 | d_refcount(de)); | |
544 | ||
545 | ll_set_lock_data(exp, de->d_inode, it, &bits); | |
546 | ||
547 | /* Note: We have to match both LOOKUP and PERM lock | |
548 | * here to make sure the dentry is valid and no one | |
549 | * changing the permission. | |
550 | * But if the client connects < 2.4 server, which will | |
551 | * only grant LOOKUP lock, so we can only Match LOOKUP | |
552 | * lock for old server */ | |
553 | if (exp_connect_flags(ll_i2mdexp(de->d_inode)) && | |
554 | OBD_CONNECT_LVB_TYPE) | |
555 | matched_bits = | |
556 | MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM; | |
557 | else | |
558 | matched_bits = MDS_INODELOCK_LOOKUP; | |
559 | ||
560 | if (((bits & matched_bits) == matched_bits) && | |
561 | d_lustre_invalid(de)) | |
562 | d_lustre_revalidate(de); | |
563 | ll_lookup_finish_locks(it, de); | |
564 | } | |
565 | ||
566 | mark: | |
567 | if (it != NULL && it->it_op == IT_GETATTR && rc > 0) | |
568 | ll_statahead_mark(parent, de); | |
569 | RETURN(rc); | |
570 | ||
571 | /* | |
572 | * This part is here to combat evil-evil race in real_lookup on 2.6 | |
573 | * kernels. The race details are: We enter do_lookup() looking for some | |
574 | * name, there is nothing in dcache for this name yet and d_lookup() | |
575 | * returns NULL. We proceed to real_lookup(), and while we do this, | |
576 | * another process does open on the same file we looking up (most simple | |
577 | * reproducer), open succeeds and the dentry is added. Now back to | |
578 | * us. In real_lookup() we do d_lookup() again and suddenly find the | |
579 | * dentry, so we call d_revalidate on it, but there is no lock, so | |
580 | * without this code we would return 0, but unpatched real_lookup just | |
581 | * returns -ENOENT in such a case instead of retrying the lookup. Once | |
582 | * this is dealt with in real_lookup(), all of this ugly mess can go and | |
583 | * we can just check locks in ->d_revalidate without doing any RPCs | |
584 | * ever. | |
585 | */ | |
586 | do_lookup: | |
587 | if (it != &lookup_it) { | |
588 | /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */ | |
589 | if (it->it_op == IT_GETATTR) | |
590 | lookup_it.it_op = IT_GETATTR; | |
591 | ll_lookup_finish_locks(it, de); | |
592 | it = &lookup_it; | |
593 | } | |
594 | ||
595 | /* Do real lookup here. */ | |
596 | op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name, | |
597 | de->d_name.len, 0, (it->it_op & IT_CREAT ? | |
598 | LUSTRE_OPC_CREATE : | |
599 | LUSTRE_OPC_ANY), NULL); | |
600 | if (IS_ERR(op_data)) | |
601 | RETURN(PTR_ERR(op_data)); | |
602 | ||
603 | rc = md_intent_lock(exp, op_data, NULL, 0, it, 0, &req, | |
604 | ll_md_blocking_ast, 0); | |
605 | if (rc >= 0) { | |
606 | struct mdt_body *mdt_body; | |
607 | struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0}; | |
608 | mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); | |
609 | ||
610 | if (de->d_inode) | |
611 | fid = *ll_inode2fid(de->d_inode); | |
612 | ||
613 | /* see if we got same inode, if not - return error */ | |
614 | if (lu_fid_eq(&fid, &mdt_body->fid1)) { | |
615 | ll_finish_md_op_data(op_data); | |
616 | op_data = NULL; | |
617 | goto revalidate_finish; | |
618 | } | |
619 | ll_intent_release(it); | |
620 | } | |
621 | ll_finish_md_op_data(op_data); | |
622 | GOTO(out, rc = 0); | |
623 | ||
624 | out_sa: | |
625 | /* | |
626 | * For rc == 1 case, should not return directly to prevent losing | |
627 | * statahead windows; for rc == 0 case, the "lookup" will be done later. | |
628 | */ | |
629 | if (it != NULL && it->it_op == IT_GETATTR && rc == 1) | |
630 | ll_statahead_enter(parent, &de, 1); | |
631 | goto mark; | |
632 | } | |
633 | ||
634 | /* | |
635 | * Always trust cached dentries. Update statahead window if necessary. | |
636 | */ | |
637 | int ll_revalidate_nd(struct dentry *dentry, unsigned int flags) | |
638 | { | |
639 | struct inode *parent = dentry->d_parent->d_inode; | |
640 | int unplug = 0; | |
641 | ||
642 | ENTRY; | |
643 | CDEBUG(D_VFSTRACE, "VFS Op:name=%s,flags=%u\n", | |
644 | dentry->d_name.name, flags); | |
645 | ||
646 | if (!(flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) && | |
647 | ll_need_statahead(parent, dentry) > 0) { | |
648 | if (flags & LOOKUP_RCU) | |
649 | RETURN(-ECHILD); | |
650 | ||
651 | if (dentry->d_inode == NULL) | |
652 | unplug = 1; | |
653 | do_statahead_enter(parent, &dentry, unplug); | |
654 | ll_statahead_mark(parent, dentry); | |
655 | } | |
656 | ||
657 | RETURN(1); | |
658 | } | |
659 | ||
660 | ||
661 | void ll_d_iput(struct dentry *de, struct inode *inode) | |
662 | { | |
663 | LASSERT(inode); | |
664 | if (!find_cbdata(inode)) | |
665 | clear_nlink(inode); | |
666 | iput(inode); | |
667 | } | |
668 | ||
669 | struct dentry_operations ll_d_ops = { | |
670 | .d_revalidate = ll_revalidate_nd, | |
671 | .d_release = ll_release, | |
672 | .d_delete = ll_ddelete, | |
673 | .d_iput = ll_d_iput, | |
674 | .d_compare = ll_dcompare, | |
675 | }; |