Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | */ | |
36 | ||
37 | #include <linux/fs.h> | |
38 | #include <linux/sched.h> | |
39 | #include <linux/mm.h> | |
40 | #include <linux/highmem.h> | |
41 | #include <linux/pagemap.h> | |
42 | ||
43 | #define DEBUG_SUBSYSTEM S_LLITE | |
44 | ||
45 | #include <obd_support.h> | |
46 | #include <lustre_lite.h> | |
47 | #include <lustre_dlm.h> | |
48 | #include "llite_internal.h" | |
49 | ||
50 | #define SA_OMITTED_ENTRY_MAX 8ULL | |
51 | ||
52 | typedef enum { | |
53 | /** negative values are for error cases */ | |
54 | SA_ENTRY_INIT = 0, /** init entry */ | |
55 | SA_ENTRY_SUCC = 1, /** stat succeed */ | |
56 | SA_ENTRY_INVA = 2, /** invalid entry */ | |
57 | SA_ENTRY_DEST = 3, /** entry to be destroyed */ | |
58 | } se_stat_t; | |
59 | ||
60 | struct ll_sa_entry { | |
61 | /* link into sai->sai_entries */ | |
62 | struct list_head se_link; | |
63 | /* link into sai->sai_entries_{received,stated} */ | |
64 | struct list_head se_list; | |
65 | /* link into sai hash table locally */ | |
66 | struct list_head se_hash; | |
67 | /* entry reference count */ | |
68 | atomic_t se_refcount; | |
69 | /* entry index in the sai */ | |
70 | __u64 se_index; | |
71 | /* low layer ldlm lock handle */ | |
72 | __u64 se_handle; | |
73 | /* entry status */ | |
74 | se_stat_t se_stat; | |
75 | /* entry size, contains name */ | |
76 | int se_size; | |
77 | /* pointer to async getattr enqueue info */ | |
78 | struct md_enqueue_info *se_minfo; | |
79 | /* pointer to the async getattr request */ | |
80 | struct ptlrpc_request *se_req; | |
81 | /* pointer to the target inode */ | |
82 | struct inode *se_inode; | |
83 | /* entry name */ | |
84 | struct qstr se_qstr; | |
85 | }; | |
86 | ||
87 | static unsigned int sai_generation = 0; | |
88 | static DEFINE_SPINLOCK(sai_generation_lock); | |
89 | ||
90 | static inline int ll_sa_entry_unhashed(struct ll_sa_entry *entry) | |
91 | { | |
92 | return list_empty(&entry->se_hash); | |
93 | } | |
94 | ||
95 | /* | |
96 | * The entry only can be released by the caller, it is necessary to hold lock. | |
97 | */ | |
98 | static inline int ll_sa_entry_stated(struct ll_sa_entry *entry) | |
99 | { | |
100 | smp_rmb(); | |
101 | return (entry->se_stat != SA_ENTRY_INIT); | |
102 | } | |
103 | ||
104 | static inline int ll_sa_entry_hash(int val) | |
105 | { | |
106 | return val & LL_SA_CACHE_MASK; | |
107 | } | |
108 | ||
109 | /* | |
110 | * Insert entry to hash SA table. | |
111 | */ | |
112 | static inline void | |
113 | ll_sa_entry_enhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry) | |
114 | { | |
115 | int i = ll_sa_entry_hash(entry->se_qstr.hash); | |
116 | ||
117 | spin_lock(&sai->sai_cache_lock[i]); | |
118 | list_add_tail(&entry->se_hash, &sai->sai_cache[i]); | |
119 | spin_unlock(&sai->sai_cache_lock[i]); | |
120 | } | |
121 | ||
122 | /* | |
123 | * Remove entry from SA table. | |
124 | */ | |
125 | static inline void | |
126 | ll_sa_entry_unhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry) | |
127 | { | |
128 | int i = ll_sa_entry_hash(entry->se_qstr.hash); | |
129 | ||
130 | spin_lock(&sai->sai_cache_lock[i]); | |
131 | list_del_init(&entry->se_hash); | |
132 | spin_unlock(&sai->sai_cache_lock[i]); | |
133 | } | |
134 | ||
135 | static inline int agl_should_run(struct ll_statahead_info *sai, | |
136 | struct inode *inode) | |
137 | { | |
138 | return (inode != NULL && S_ISREG(inode->i_mode) && sai->sai_agl_valid); | |
139 | } | |
140 | ||
141 | static inline struct ll_sa_entry * | |
142 | sa_first_received_entry(struct ll_statahead_info *sai) | |
143 | { | |
144 | return list_entry(sai->sai_entries_received.next, | |
145 | struct ll_sa_entry, se_list); | |
146 | } | |
147 | ||
148 | static inline struct ll_inode_info * | |
149 | agl_first_entry(struct ll_statahead_info *sai) | |
150 | { | |
151 | return list_entry(sai->sai_entries_agl.next, | |
152 | struct ll_inode_info, lli_agl_list); | |
153 | } | |
154 | ||
155 | static inline int sa_sent_full(struct ll_statahead_info *sai) | |
156 | { | |
157 | return atomic_read(&sai->sai_cache_count) >= sai->sai_max; | |
158 | } | |
159 | ||
160 | static inline int sa_received_empty(struct ll_statahead_info *sai) | |
161 | { | |
162 | return list_empty(&sai->sai_entries_received); | |
163 | } | |
164 | ||
165 | static inline int agl_list_empty(struct ll_statahead_info *sai) | |
166 | { | |
167 | return list_empty(&sai->sai_entries_agl); | |
168 | } | |
169 | ||
170 | /** | |
171 | * (1) hit ratio less than 80% | |
172 | * or | |
173 | * (2) consecutive miss more than 8 | |
174 | * then means low hit. | |
175 | */ | |
176 | static inline int sa_low_hit(struct ll_statahead_info *sai) | |
177 | { | |
178 | return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) || | |
179 | (sai->sai_consecutive_miss > 8)); | |
180 | } | |
181 | ||
182 | /* | |
183 | * If the given index is behind of statahead window more than | |
184 | * SA_OMITTED_ENTRY_MAX, then it is old. | |
185 | */ | |
186 | static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index) | |
187 | { | |
188 | return ((__u64)sai->sai_max + index + SA_OMITTED_ENTRY_MAX < | |
189 | sai->sai_index); | |
190 | } | |
191 | ||
192 | /* | |
193 | * Insert it into sai_entries tail when init. | |
194 | */ | |
195 | static struct ll_sa_entry * | |
196 | ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index, | |
197 | const char *name, int len) | |
198 | { | |
199 | struct ll_inode_info *lli; | |
200 | struct ll_sa_entry *entry; | |
201 | int entry_size; | |
202 | char *dname; | |
d7e09d03 PT |
203 | |
204 | entry_size = sizeof(struct ll_sa_entry) + (len & ~3) + 4; | |
205 | OBD_ALLOC(entry, entry_size); | |
206 | if (unlikely(entry == NULL)) | |
0a3bdb00 | 207 | return ERR_PTR(-ENOMEM); |
d7e09d03 PT |
208 | |
209 | CDEBUG(D_READA, "alloc sa entry %.*s(%p) index "LPU64"\n", | |
210 | len, name, entry, index); | |
211 | ||
212 | entry->se_index = index; | |
213 | ||
214 | /* | |
215 | * Statahead entry reference rules: | |
216 | * | |
217 | * 1) When statahead entry is initialized, its reference is set as 2. | |
218 | * One reference is used by the directory scanner. When the scanner | |
219 | * searches the statahead cache for the given name, it can perform | |
220 | * lockless hash lookup (only the scanner can remove entry from hash | |
221 | * list), and once found, it needn't to call "atomic_inc()" for the | |
222 | * entry reference. So the performance is improved. After using the | |
223 | * statahead entry, the scanner will call "atomic_dec()" to drop the | |
224 | * reference held when initialization. If it is the last reference, | |
225 | * the statahead entry will be freed. | |
226 | * | |
227 | * 2) All other threads, including statahead thread and ptlrpcd thread, | |
228 | * when they process the statahead entry, the reference for target | |
229 | * should be held to guarantee the entry will not be released by the | |
230 | * directory scanner. After processing the entry, these threads will | |
231 | * drop the entry reference. If it is the last reference, the entry | |
232 | * will be freed. | |
233 | * | |
234 | * The second reference when initializes the statahead entry is used | |
235 | * by the statahead thread, following the rule 2). | |
236 | */ | |
237 | atomic_set(&entry->se_refcount, 2); | |
238 | entry->se_stat = SA_ENTRY_INIT; | |
239 | entry->se_size = entry_size; | |
240 | dname = (char *)entry + sizeof(struct ll_sa_entry); | |
241 | memcpy(dname, name, len); | |
242 | dname[len] = 0; | |
243 | entry->se_qstr.hash = full_name_hash(name, len); | |
244 | entry->se_qstr.len = len; | |
245 | entry->se_qstr.name = dname; | |
246 | ||
247 | lli = ll_i2info(sai->sai_inode); | |
248 | spin_lock(&lli->lli_sa_lock); | |
249 | list_add_tail(&entry->se_link, &sai->sai_entries); | |
250 | INIT_LIST_HEAD(&entry->se_list); | |
251 | ll_sa_entry_enhash(sai, entry); | |
252 | spin_unlock(&lli->lli_sa_lock); | |
253 | ||
254 | atomic_inc(&sai->sai_cache_count); | |
255 | ||
0a3bdb00 | 256 | return entry; |
d7e09d03 PT |
257 | } |
258 | ||
259 | /* | |
260 | * Used by the directory scanner to search entry with name. | |
261 | * | |
262 | * Only the caller can remove the entry from hash, so it is unnecessary to hold | |
263 | * hash lock. It is caller's duty to release the init refcount on the entry, so | |
264 | * it is also unnecessary to increase refcount on the entry. | |
265 | */ | |
266 | static struct ll_sa_entry * | |
267 | ll_sa_entry_get_byname(struct ll_statahead_info *sai, const struct qstr *qstr) | |
268 | { | |
269 | struct ll_sa_entry *entry; | |
270 | int i = ll_sa_entry_hash(qstr->hash); | |
271 | ||
272 | list_for_each_entry(entry, &sai->sai_cache[i], se_hash) { | |
273 | if (entry->se_qstr.hash == qstr->hash && | |
274 | entry->se_qstr.len == qstr->len && | |
275 | memcmp(entry->se_qstr.name, qstr->name, qstr->len) == 0) | |
276 | return entry; | |
277 | } | |
278 | return NULL; | |
279 | } | |
280 | ||
281 | /* | |
282 | * Used by the async getattr request callback to find entry with index. | |
283 | * | |
284 | * Inside lli_sa_lock to prevent others to change the list during the search. | |
285 | * It needs to increase entry refcount before returning to guarantee that the | |
286 | * entry cannot be freed by others. | |
287 | */ | |
288 | static struct ll_sa_entry * | |
289 | ll_sa_entry_get_byindex(struct ll_statahead_info *sai, __u64 index) | |
290 | { | |
291 | struct ll_sa_entry *entry; | |
292 | ||
293 | list_for_each_entry(entry, &sai->sai_entries, se_link) { | |
294 | if (entry->se_index == index) { | |
295 | LASSERT(atomic_read(&entry->se_refcount) > 0); | |
296 | atomic_inc(&entry->se_refcount); | |
297 | return entry; | |
298 | } | |
299 | if (entry->se_index > index) | |
300 | break; | |
301 | } | |
302 | return NULL; | |
303 | } | |
304 | ||
305 | static void ll_sa_entry_cleanup(struct ll_statahead_info *sai, | |
306 | struct ll_sa_entry *entry) | |
307 | { | |
308 | struct md_enqueue_info *minfo = entry->se_minfo; | |
309 | struct ptlrpc_request *req = entry->se_req; | |
310 | ||
311 | if (minfo) { | |
312 | entry->se_minfo = NULL; | |
313 | ll_intent_release(&minfo->mi_it); | |
314 | iput(minfo->mi_dir); | |
315 | OBD_FREE_PTR(minfo); | |
316 | } | |
317 | ||
318 | if (req) { | |
319 | entry->se_req = NULL; | |
320 | ptlrpc_req_finished(req); | |
321 | } | |
322 | } | |
323 | ||
324 | static void ll_sa_entry_put(struct ll_statahead_info *sai, | |
325 | struct ll_sa_entry *entry) | |
326 | { | |
327 | if (atomic_dec_and_test(&entry->se_refcount)) { | |
328 | CDEBUG(D_READA, "free sa entry %.*s(%p) index "LPU64"\n", | |
329 | entry->se_qstr.len, entry->se_qstr.name, entry, | |
330 | entry->se_index); | |
331 | ||
332 | LASSERT(list_empty(&entry->se_link)); | |
333 | LASSERT(list_empty(&entry->se_list)); | |
334 | LASSERT(ll_sa_entry_unhashed(entry)); | |
335 | ||
336 | ll_sa_entry_cleanup(sai, entry); | |
337 | if (entry->se_inode) | |
338 | iput(entry->se_inode); | |
339 | ||
340 | OBD_FREE(entry, entry->se_size); | |
341 | atomic_dec(&sai->sai_cache_count); | |
342 | } | |
343 | } | |
344 | ||
345 | static inline void | |
346 | do_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry) | |
347 | { | |
348 | struct ll_inode_info *lli = ll_i2info(sai->sai_inode); | |
349 | ||
350 | LASSERT(!ll_sa_entry_unhashed(entry)); | |
351 | LASSERT(!list_empty(&entry->se_link)); | |
352 | ||
353 | ll_sa_entry_unhash(sai, entry); | |
354 | ||
355 | spin_lock(&lli->lli_sa_lock); | |
356 | entry->se_stat = SA_ENTRY_DEST; | |
357 | list_del_init(&entry->se_link); | |
358 | if (likely(!list_empty(&entry->se_list))) | |
359 | list_del_init(&entry->se_list); | |
360 | spin_unlock(&lli->lli_sa_lock); | |
361 | ||
362 | ll_sa_entry_put(sai, entry); | |
363 | } | |
364 | ||
365 | /* | |
366 | * Delete it from sai_entries_stated list when fini. | |
367 | */ | |
368 | static void | |
369 | ll_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry) | |
370 | { | |
371 | struct ll_sa_entry *pos, *next; | |
372 | ||
373 | if (entry) | |
374 | do_sa_entry_fini(sai, entry); | |
375 | ||
376 | /* drop old entry, only 'scanner' process does this, no need to lock */ | |
377 | list_for_each_entry_safe(pos, next, &sai->sai_entries, se_link) { | |
378 | if (!is_omitted_entry(sai, pos->se_index)) | |
379 | break; | |
380 | do_sa_entry_fini(sai, pos); | |
381 | } | |
382 | } | |
383 | ||
384 | /* | |
385 | * Inside lli_sa_lock. | |
386 | */ | |
387 | static void | |
388 | do_sa_entry_to_stated(struct ll_statahead_info *sai, | |
389 | struct ll_sa_entry *entry, se_stat_t stat) | |
390 | { | |
391 | struct ll_sa_entry *se; | |
392 | struct list_head *pos = &sai->sai_entries_stated; | |
393 | ||
394 | if (!list_empty(&entry->se_list)) | |
395 | list_del_init(&entry->se_list); | |
396 | ||
397 | list_for_each_entry_reverse(se, &sai->sai_entries_stated, se_list) { | |
398 | if (se->se_index < entry->se_index) { | |
399 | pos = &se->se_list; | |
400 | break; | |
401 | } | |
402 | } | |
403 | ||
404 | list_add(&entry->se_list, pos); | |
405 | entry->se_stat = stat; | |
406 | } | |
407 | ||
408 | /* | |
409 | * Move entry to sai_entries_stated and sort with the index. | |
410 | * \retval 1 -- entry to be destroyed. | |
411 | * \retval 0 -- entry is inserted into stated list. | |
412 | */ | |
413 | static int | |
414 | ll_sa_entry_to_stated(struct ll_statahead_info *sai, | |
415 | struct ll_sa_entry *entry, se_stat_t stat) | |
416 | { | |
417 | struct ll_inode_info *lli = ll_i2info(sai->sai_inode); | |
418 | int ret = 1; | |
419 | ||
420 | ll_sa_entry_cleanup(sai, entry); | |
421 | ||
422 | spin_lock(&lli->lli_sa_lock); | |
423 | if (likely(entry->se_stat != SA_ENTRY_DEST)) { | |
424 | do_sa_entry_to_stated(sai, entry, stat); | |
425 | ret = 0; | |
426 | } | |
427 | spin_unlock(&lli->lli_sa_lock); | |
428 | ||
429 | return ret; | |
430 | } | |
431 | ||
432 | /* | |
433 | * Insert inode into the list of sai_entries_agl. | |
434 | */ | |
435 | static void ll_agl_add(struct ll_statahead_info *sai, | |
436 | struct inode *inode, int index) | |
437 | { | |
438 | struct ll_inode_info *child = ll_i2info(inode); | |
439 | struct ll_inode_info *parent = ll_i2info(sai->sai_inode); | |
440 | int added = 0; | |
441 | ||
442 | spin_lock(&child->lli_agl_lock); | |
443 | if (child->lli_agl_index == 0) { | |
444 | child->lli_agl_index = index; | |
445 | spin_unlock(&child->lli_agl_lock); | |
446 | ||
447 | LASSERT(list_empty(&child->lli_agl_list)); | |
448 | ||
449 | igrab(inode); | |
450 | spin_lock(&parent->lli_agl_lock); | |
451 | if (agl_list_empty(sai)) | |
452 | added = 1; | |
453 | list_add_tail(&child->lli_agl_list, &sai->sai_entries_agl); | |
454 | spin_unlock(&parent->lli_agl_lock); | |
455 | } else { | |
456 | spin_unlock(&child->lli_agl_lock); | |
457 | } | |
458 | ||
459 | if (added > 0) | |
460 | wake_up(&sai->sai_agl_thread.t_ctl_waitq); | |
461 | } | |
462 | ||
463 | static struct ll_statahead_info *ll_sai_alloc(void) | |
464 | { | |
465 | struct ll_statahead_info *sai; | |
466 | int i; | |
d7e09d03 PT |
467 | |
468 | OBD_ALLOC_PTR(sai); | |
469 | if (!sai) | |
0a3bdb00 | 470 | return NULL; |
d7e09d03 PT |
471 | |
472 | atomic_set(&sai->sai_refcount, 1); | |
473 | ||
474 | spin_lock(&sai_generation_lock); | |
475 | sai->sai_generation = ++sai_generation; | |
476 | if (unlikely(sai_generation == 0)) | |
477 | sai->sai_generation = ++sai_generation; | |
478 | spin_unlock(&sai_generation_lock); | |
479 | ||
480 | sai->sai_max = LL_SA_RPC_MIN; | |
481 | sai->sai_index = 1; | |
482 | init_waitqueue_head(&sai->sai_waitq); | |
483 | init_waitqueue_head(&sai->sai_thread.t_ctl_waitq); | |
484 | init_waitqueue_head(&sai->sai_agl_thread.t_ctl_waitq); | |
485 | ||
486 | INIT_LIST_HEAD(&sai->sai_entries); | |
487 | INIT_LIST_HEAD(&sai->sai_entries_received); | |
488 | INIT_LIST_HEAD(&sai->sai_entries_stated); | |
489 | INIT_LIST_HEAD(&sai->sai_entries_agl); | |
490 | ||
491 | for (i = 0; i < LL_SA_CACHE_SIZE; i++) { | |
492 | INIT_LIST_HEAD(&sai->sai_cache[i]); | |
493 | spin_lock_init(&sai->sai_cache_lock[i]); | |
494 | } | |
495 | atomic_set(&sai->sai_cache_count, 0); | |
496 | ||
0a3bdb00 | 497 | return sai; |
d7e09d03 PT |
498 | } |
499 | ||
500 | static inline struct ll_statahead_info * | |
501 | ll_sai_get(struct ll_statahead_info *sai) | |
502 | { | |
503 | atomic_inc(&sai->sai_refcount); | |
504 | return sai; | |
505 | } | |
506 | ||
507 | static void ll_sai_put(struct ll_statahead_info *sai) | |
508 | { | |
509 | struct inode *inode = sai->sai_inode; | |
510 | struct ll_inode_info *lli = ll_i2info(inode); | |
d7e09d03 PT |
511 | |
512 | if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) { | |
513 | struct ll_sa_entry *entry, *next; | |
514 | ||
515 | if (unlikely(atomic_read(&sai->sai_refcount) > 0)) { | |
516 | /* It is race case, the interpret callback just hold | |
517 | * a reference count */ | |
518 | spin_unlock(&lli->lli_sa_lock); | |
e05e02e4 | 519 | return; |
d7e09d03 PT |
520 | } |
521 | ||
522 | LASSERT(lli->lli_opendir_key == NULL); | |
523 | LASSERT(thread_is_stopped(&sai->sai_thread)); | |
524 | LASSERT(thread_is_stopped(&sai->sai_agl_thread)); | |
525 | ||
526 | lli->lli_sai = NULL; | |
527 | lli->lli_opendir_pid = 0; | |
528 | spin_unlock(&lli->lli_sa_lock); | |
529 | ||
530 | if (sai->sai_sent > sai->sai_replied) | |
531 | CDEBUG(D_READA,"statahead for dir "DFID" does not " | |
532 | "finish: [sent:"LPU64"] [replied:"LPU64"]\n", | |
533 | PFID(&lli->lli_fid), | |
534 | sai->sai_sent, sai->sai_replied); | |
535 | ||
536 | list_for_each_entry_safe(entry, next, | |
537 | &sai->sai_entries, se_link) | |
538 | do_sa_entry_fini(sai, entry); | |
539 | ||
540 | LASSERT(list_empty(&sai->sai_entries)); | |
541 | LASSERT(sa_received_empty(sai)); | |
542 | LASSERT(list_empty(&sai->sai_entries_stated)); | |
543 | ||
544 | LASSERT(atomic_read(&sai->sai_cache_count) == 0); | |
545 | LASSERT(agl_list_empty(sai)); | |
546 | ||
547 | iput(inode); | |
548 | OBD_FREE_PTR(sai); | |
549 | } | |
d7e09d03 PT |
550 | } |
551 | ||
552 | /* Do NOT forget to drop inode refcount when into sai_entries_agl. */ | |
553 | static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai) | |
554 | { | |
555 | struct ll_inode_info *lli = ll_i2info(inode); | |
556 | __u64 index = lli->lli_agl_index; | |
557 | int rc; | |
d7e09d03 PT |
558 | |
559 | LASSERT(list_empty(&lli->lli_agl_list)); | |
560 | ||
561 | /* AGL maybe fall behind statahead with one entry */ | |
562 | if (is_omitted_entry(sai, index + 1)) { | |
563 | lli->lli_agl_index = 0; | |
564 | iput(inode); | |
e05e02e4 | 565 | return; |
d7e09d03 PT |
566 | } |
567 | ||
568 | /* Someone is in glimpse (sync or async), do nothing. */ | |
569 | rc = down_write_trylock(&lli->lli_glimpse_sem); | |
570 | if (rc == 0) { | |
571 | lli->lli_agl_index = 0; | |
572 | iput(inode); | |
e05e02e4 | 573 | return; |
d7e09d03 PT |
574 | } |
575 | ||
576 | /* | |
577 | * Someone triggered glimpse within 1 sec before. | |
578 | * 1) The former glimpse succeeded with glimpse lock granted by OST, and | |
579 | * if the lock is still cached on client, AGL needs to do nothing. If | |
580 | * it is cancelled by other client, AGL maybe cannot obtaion new lock | |
581 | * for no glimpse callback triggered by AGL. | |
582 | * 2) The former glimpse succeeded, but OST did not grant glimpse lock. | |
583 | * Under such case, it is quite possible that the OST will not grant | |
584 | * glimpse lock for AGL also. | |
585 | * 3) The former glimpse failed, compared with other two cases, it is | |
586 | * relative rare. AGL can ignore such case, and it will not muchly | |
587 | * affect the performance. | |
588 | */ | |
589 | if (lli->lli_glimpse_time != 0 && | |
590 | cfs_time_before(cfs_time_shift(-1), lli->lli_glimpse_time)) { | |
591 | up_write(&lli->lli_glimpse_sem); | |
592 | lli->lli_agl_index = 0; | |
593 | iput(inode); | |
e05e02e4 | 594 | return; |
d7e09d03 PT |
595 | } |
596 | ||
597 | CDEBUG(D_READA, "Handling (init) async glimpse: inode = " | |
598 | DFID", idx = "LPU64"\n", PFID(&lli->lli_fid), index); | |
599 | ||
600 | cl_agl(inode); | |
601 | lli->lli_agl_index = 0; | |
602 | lli->lli_glimpse_time = cfs_time_current(); | |
603 | up_write(&lli->lli_glimpse_sem); | |
604 | ||
605 | CDEBUG(D_READA, "Handled (init) async glimpse: inode= " | |
606 | DFID", idx = "LPU64", rc = %d\n", | |
607 | PFID(&lli->lli_fid), index, rc); | |
608 | ||
609 | iput(inode); | |
d7e09d03 PT |
610 | } |
611 | ||
612 | static void ll_post_statahead(struct ll_statahead_info *sai) | |
613 | { | |
614 | struct inode *dir = sai->sai_inode; | |
615 | struct inode *child; | |
616 | struct ll_inode_info *lli = ll_i2info(dir); | |
617 | struct ll_sa_entry *entry; | |
618 | struct md_enqueue_info *minfo; | |
619 | struct lookup_intent *it; | |
620 | struct ptlrpc_request *req; | |
621 | struct mdt_body *body; | |
622 | int rc = 0; | |
d7e09d03 PT |
623 | |
624 | spin_lock(&lli->lli_sa_lock); | |
625 | if (unlikely(sa_received_empty(sai))) { | |
626 | spin_unlock(&lli->lli_sa_lock); | |
e05e02e4 | 627 | return; |
d7e09d03 PT |
628 | } |
629 | entry = sa_first_received_entry(sai); | |
630 | atomic_inc(&entry->se_refcount); | |
631 | list_del_init(&entry->se_list); | |
632 | spin_unlock(&lli->lli_sa_lock); | |
633 | ||
634 | LASSERT(entry->se_handle != 0); | |
635 | ||
636 | minfo = entry->se_minfo; | |
637 | it = &minfo->mi_it; | |
638 | req = entry->se_req; | |
639 | body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); | |
640 | if (body == NULL) | |
641 | GOTO(out, rc = -EFAULT); | |
642 | ||
643 | child = entry->se_inode; | |
644 | if (child == NULL) { | |
645 | /* | |
646 | * lookup. | |
647 | */ | |
648 | LASSERT(fid_is_zero(&minfo->mi_data.op_fid2)); | |
649 | ||
bef31c78 | 650 | /* XXX: No fid in reply, this is probably cross-ref case. |
d7e09d03 PT |
651 | * SA can't handle it yet. */ |
652 | if (body->valid & OBD_MD_MDS) | |
653 | GOTO(out, rc = -EAGAIN); | |
654 | } else { | |
655 | /* | |
656 | * revalidate. | |
657 | */ | |
658 | /* unlinked and re-created with the same name */ | |
659 | if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->fid1))){ | |
660 | entry->se_inode = NULL; | |
661 | iput(child); | |
662 | child = NULL; | |
663 | } | |
664 | } | |
665 | ||
666 | it->d.lustre.it_lock_handle = entry->se_handle; | |
667 | rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL); | |
668 | if (rc != 1) | |
669 | GOTO(out, rc = -EAGAIN); | |
670 | ||
671 | rc = ll_prep_inode(&child, req, dir->i_sb, it); | |
672 | if (rc) | |
673 | GOTO(out, rc); | |
674 | ||
675 | CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", | |
676 | child, child->i_ino, child->i_generation); | |
677 | ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL); | |
678 | ||
679 | entry->se_inode = child; | |
680 | ||
681 | if (agl_should_run(sai, child)) | |
682 | ll_agl_add(sai, child, entry->se_index); | |
683 | ||
d7e09d03 PT |
684 | out: |
685 | /* The "ll_sa_entry_to_stated()" will drop related ldlm ibits lock | |
686 | * reference count by calling "ll_intent_drop_lock()" in spite of the | |
687 | * above operations failed or not. Do not worry about calling | |
688 | * "ll_intent_drop_lock()" more than once. */ | |
689 | rc = ll_sa_entry_to_stated(sai, entry, | |
690 | rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC); | |
691 | if (rc == 0 && entry->se_index == sai->sai_index_wait) | |
692 | wake_up(&sai->sai_waitq); | |
693 | ll_sa_entry_put(sai, entry); | |
694 | } | |
695 | ||
696 | static int ll_statahead_interpret(struct ptlrpc_request *req, | |
697 | struct md_enqueue_info *minfo, int rc) | |
698 | { | |
699 | struct lookup_intent *it = &minfo->mi_it; | |
700 | struct inode *dir = minfo->mi_dir; | |
701 | struct ll_inode_info *lli = ll_i2info(dir); | |
702 | struct ll_statahead_info *sai = NULL; | |
703 | struct ll_sa_entry *entry; | |
704 | int wakeup; | |
d7e09d03 PT |
705 | |
706 | if (it_disposition(it, DISP_LOOKUP_NEG)) | |
707 | rc = -ENOENT; | |
708 | ||
709 | spin_lock(&lli->lli_sa_lock); | |
710 | /* stale entry */ | |
711 | if (unlikely(lli->lli_sai == NULL || | |
712 | lli->lli_sai->sai_generation != minfo->mi_generation)) { | |
713 | spin_unlock(&lli->lli_sa_lock); | |
714 | GOTO(out, rc = -ESTALE); | |
715 | } else { | |
716 | sai = ll_sai_get(lli->lli_sai); | |
717 | if (unlikely(!thread_is_running(&sai->sai_thread))) { | |
718 | sai->sai_replied++; | |
719 | spin_unlock(&lli->lli_sa_lock); | |
720 | GOTO(out, rc = -EBADFD); | |
721 | } | |
722 | ||
723 | entry = ll_sa_entry_get_byindex(sai, minfo->mi_cbdata); | |
724 | if (entry == NULL) { | |
725 | sai->sai_replied++; | |
726 | spin_unlock(&lli->lli_sa_lock); | |
727 | GOTO(out, rc = -EIDRM); | |
728 | } | |
729 | ||
730 | if (rc != 0) { | |
731 | do_sa_entry_to_stated(sai, entry, SA_ENTRY_INVA); | |
732 | wakeup = (entry->se_index == sai->sai_index_wait); | |
733 | } else { | |
734 | entry->se_minfo = minfo; | |
735 | entry->se_req = ptlrpc_request_addref(req); | |
736 | /* Release the async ibits lock ASAP to avoid deadlock | |
737 | * when statahead thread tries to enqueue lock on parent | |
738 | * for readpage and other tries to enqueue lock on child | |
739 | * with parent's lock held, for example: unlink. */ | |
740 | entry->se_handle = it->d.lustre.it_lock_handle; | |
741 | ll_intent_drop_lock(it); | |
742 | wakeup = sa_received_empty(sai); | |
743 | list_add_tail(&entry->se_list, | |
744 | &sai->sai_entries_received); | |
745 | } | |
746 | sai->sai_replied++; | |
747 | spin_unlock(&lli->lli_sa_lock); | |
748 | ||
749 | ll_sa_entry_put(sai, entry); | |
750 | if (wakeup) | |
751 | wake_up(&sai->sai_thread.t_ctl_waitq); | |
752 | } | |
753 | ||
d7e09d03 PT |
754 | out: |
755 | if (rc != 0) { | |
756 | ll_intent_release(it); | |
757 | iput(dir); | |
758 | OBD_FREE_PTR(minfo); | |
759 | } | |
760 | if (sai != NULL) | |
761 | ll_sai_put(sai); | |
762 | return rc; | |
763 | } | |
764 | ||
765 | static void sa_args_fini(struct md_enqueue_info *minfo, | |
766 | struct ldlm_enqueue_info *einfo) | |
767 | { | |
768 | LASSERT(minfo && einfo); | |
769 | iput(minfo->mi_dir); | |
770 | capa_put(minfo->mi_data.op_capa1); | |
771 | capa_put(minfo->mi_data.op_capa2); | |
772 | OBD_FREE_PTR(minfo); | |
773 | OBD_FREE_PTR(einfo); | |
774 | } | |
775 | ||
776 | /** | |
777 | * There is race condition between "capa_put" and "ll_statahead_interpret" for | |
778 | * accessing "op_data.op_capa[1,2]" as following: | |
779 | * "capa_put" releases "op_data.op_capa[1,2]"'s reference count after calling | |
780 | * "md_intent_getattr_async". But "ll_statahead_interpret" maybe run first, and | |
781 | * fill "op_data.op_capa[1,2]" as POISON, then cause "capa_put" access invalid | |
782 | * "ocapa". So here reserve "op_data.op_capa[1,2]" in "pcapa" before calling | |
783 | * "md_intent_getattr_async". | |
784 | */ | |
785 | static int sa_args_init(struct inode *dir, struct inode *child, | |
786 | struct ll_sa_entry *entry, struct md_enqueue_info **pmi, | |
787 | struct ldlm_enqueue_info **pei, | |
788 | struct obd_capa **pcapa) | |
789 | { | |
790 | struct qstr *qstr = &entry->se_qstr; | |
791 | struct ll_inode_info *lli = ll_i2info(dir); | |
792 | struct md_enqueue_info *minfo; | |
793 | struct ldlm_enqueue_info *einfo; | |
794 | struct md_op_data *op_data; | |
795 | ||
796 | OBD_ALLOC_PTR(einfo); | |
797 | if (einfo == NULL) | |
798 | return -ENOMEM; | |
799 | ||
800 | OBD_ALLOC_PTR(minfo); | |
801 | if (minfo == NULL) { | |
802 | OBD_FREE_PTR(einfo); | |
803 | return -ENOMEM; | |
804 | } | |
805 | ||
806 | op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, qstr->name, | |
807 | qstr->len, 0, LUSTRE_OPC_ANY, NULL); | |
808 | if (IS_ERR(op_data)) { | |
809 | OBD_FREE_PTR(einfo); | |
810 | OBD_FREE_PTR(minfo); | |
811 | return PTR_ERR(op_data); | |
812 | } | |
813 | ||
814 | minfo->mi_it.it_op = IT_GETATTR; | |
815 | minfo->mi_dir = igrab(dir); | |
816 | minfo->mi_cb = ll_statahead_interpret; | |
817 | minfo->mi_generation = lli->lli_sai->sai_generation; | |
818 | minfo->mi_cbdata = entry->se_index; | |
819 | ||
820 | einfo->ei_type = LDLM_IBITS; | |
821 | einfo->ei_mode = it_to_lock_mode(&minfo->mi_it); | |
822 | einfo->ei_cb_bl = ll_md_blocking_ast; | |
823 | einfo->ei_cb_cp = ldlm_completion_ast; | |
824 | einfo->ei_cb_gl = NULL; | |
825 | einfo->ei_cbdata = NULL; | |
826 | ||
827 | *pmi = minfo; | |
828 | *pei = einfo; | |
829 | pcapa[0] = op_data->op_capa1; | |
830 | pcapa[1] = op_data->op_capa2; | |
831 | ||
832 | return 0; | |
833 | } | |
834 | ||
835 | static int do_sa_lookup(struct inode *dir, struct ll_sa_entry *entry) | |
836 | { | |
837 | struct md_enqueue_info *minfo; | |
838 | struct ldlm_enqueue_info *einfo; | |
839 | struct obd_capa *capas[2]; | |
840 | int rc; | |
d7e09d03 PT |
841 | |
842 | rc = sa_args_init(dir, NULL, entry, &minfo, &einfo, capas); | |
843 | if (rc) | |
0a3bdb00 | 844 | return rc; |
d7e09d03 PT |
845 | |
846 | rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo); | |
847 | if (!rc) { | |
848 | capa_put(capas[0]); | |
849 | capa_put(capas[1]); | |
850 | } else { | |
851 | sa_args_fini(minfo, einfo); | |
852 | } | |
853 | ||
0a3bdb00 | 854 | return rc; |
d7e09d03 PT |
855 | } |
856 | ||
857 | /** | |
858 | * similar to ll_revalidate_it(). | |
859 | * \retval 1 -- dentry valid | |
860 | * \retval 0 -- will send stat-ahead request | |
861 | * \retval others -- prepare stat-ahead request failed | |
862 | */ | |
863 | static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry, | |
864 | struct dentry *dentry) | |
865 | { | |
866 | struct inode *inode = dentry->d_inode; | |
867 | struct lookup_intent it = { .it_op = IT_GETATTR, | |
868 | .d.lustre.it_lock_handle = 0 }; | |
869 | struct md_enqueue_info *minfo; | |
870 | struct ldlm_enqueue_info *einfo; | |
871 | struct obd_capa *capas[2]; | |
872 | int rc; | |
d7e09d03 PT |
873 | |
874 | if (unlikely(inode == NULL)) | |
0a3bdb00 | 875 | return 1; |
d7e09d03 PT |
876 | |
877 | if (d_mountpoint(dentry)) | |
0a3bdb00 | 878 | return 1; |
d7e09d03 PT |
879 | |
880 | if (unlikely(dentry == dentry->d_sb->s_root)) | |
0a3bdb00 | 881 | return 1; |
d7e09d03 PT |
882 | |
883 | entry->se_inode = igrab(inode); | |
884 | rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),NULL); | |
885 | if (rc == 1) { | |
886 | entry->se_handle = it.d.lustre.it_lock_handle; | |
887 | ll_intent_release(&it); | |
0a3bdb00 | 888 | return 1; |
d7e09d03 PT |
889 | } |
890 | ||
891 | rc = sa_args_init(dir, inode, entry, &minfo, &einfo, capas); | |
892 | if (rc) { | |
893 | entry->se_inode = NULL; | |
894 | iput(inode); | |
0a3bdb00 | 895 | return rc; |
d7e09d03 PT |
896 | } |
897 | ||
898 | rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo); | |
899 | if (!rc) { | |
900 | capa_put(capas[0]); | |
901 | capa_put(capas[1]); | |
902 | } else { | |
903 | entry->se_inode = NULL; | |
904 | iput(inode); | |
905 | sa_args_fini(minfo, einfo); | |
906 | } | |
907 | ||
0a3bdb00 | 908 | return rc; |
d7e09d03 PT |
909 | } |
910 | ||
911 | static void ll_statahead_one(struct dentry *parent, const char* entry_name, | |
912 | int entry_name_len) | |
913 | { | |
914 | struct inode *dir = parent->d_inode; | |
915 | struct ll_inode_info *lli = ll_i2info(dir); | |
916 | struct ll_statahead_info *sai = lli->lli_sai; | |
917 | struct dentry *dentry = NULL; | |
918 | struct ll_sa_entry *entry; | |
919 | int rc; | |
920 | int rc1; | |
d7e09d03 PT |
921 | |
922 | entry = ll_sa_entry_alloc(sai, sai->sai_index, entry_name, | |
923 | entry_name_len); | |
924 | if (IS_ERR(entry)) | |
e05e02e4 | 925 | return; |
d7e09d03 PT |
926 | |
927 | dentry = d_lookup(parent, &entry->se_qstr); | |
928 | if (!dentry) { | |
929 | rc = do_sa_lookup(dir, entry); | |
930 | } else { | |
931 | rc = do_sa_revalidate(dir, entry, dentry); | |
932 | if (rc == 1 && agl_should_run(sai, dentry->d_inode)) | |
933 | ll_agl_add(sai, dentry->d_inode, entry->se_index); | |
934 | } | |
935 | ||
936 | if (dentry != NULL) | |
937 | dput(dentry); | |
938 | ||
939 | if (rc) { | |
940 | rc1 = ll_sa_entry_to_stated(sai, entry, | |
941 | rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC); | |
942 | if (rc1 == 0 && entry->se_index == sai->sai_index_wait) | |
943 | wake_up(&sai->sai_waitq); | |
944 | } else { | |
945 | sai->sai_sent++; | |
946 | } | |
947 | ||
948 | sai->sai_index++; | |
949 | /* drop one refcount on entry by ll_sa_entry_alloc */ | |
950 | ll_sa_entry_put(sai, entry); | |
d7e09d03 PT |
951 | } |
952 | ||
953 | static int ll_agl_thread(void *arg) | |
954 | { | |
955 | struct dentry *parent = (struct dentry *)arg; | |
956 | struct inode *dir = parent->d_inode; | |
957 | struct ll_inode_info *plli = ll_i2info(dir); | |
958 | struct ll_inode_info *clli; | |
959 | struct ll_sb_info *sbi = ll_i2sbi(dir); | |
960 | struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai); | |
961 | struct ptlrpc_thread *thread = &sai->sai_agl_thread; | |
962 | struct l_wait_info lwi = { 0 }; | |
d7e09d03 PT |
963 | |
964 | CDEBUG(D_READA, "agl thread started: [pid %d] [parent %.*s]\n", | |
965 | current_pid(), parent->d_name.len, parent->d_name.name); | |
966 | ||
967 | atomic_inc(&sbi->ll_agl_total); | |
968 | spin_lock(&plli->lli_agl_lock); | |
969 | sai->sai_agl_valid = 1; | |
970 | thread_set_flags(thread, SVC_RUNNING); | |
971 | spin_unlock(&plli->lli_agl_lock); | |
972 | wake_up(&thread->t_ctl_waitq); | |
973 | ||
974 | while (1) { | |
975 | l_wait_event(thread->t_ctl_waitq, | |
976 | !agl_list_empty(sai) || | |
977 | !thread_is_running(thread), | |
978 | &lwi); | |
979 | ||
980 | if (!thread_is_running(thread)) | |
981 | break; | |
982 | ||
983 | spin_lock(&plli->lli_agl_lock); | |
984 | /* The statahead thread maybe help to process AGL entries, | |
985 | * so check whether list empty again. */ | |
986 | if (!agl_list_empty(sai)) { | |
987 | clli = agl_first_entry(sai); | |
988 | list_del_init(&clli->lli_agl_list); | |
989 | spin_unlock(&plli->lli_agl_lock); | |
990 | ll_agl_trigger(&clli->lli_vfs_inode, sai); | |
991 | } else { | |
992 | spin_unlock(&plli->lli_agl_lock); | |
993 | } | |
994 | } | |
995 | ||
996 | spin_lock(&plli->lli_agl_lock); | |
997 | sai->sai_agl_valid = 0; | |
998 | while (!agl_list_empty(sai)) { | |
999 | clli = agl_first_entry(sai); | |
1000 | list_del_init(&clli->lli_agl_list); | |
1001 | spin_unlock(&plli->lli_agl_lock); | |
1002 | clli->lli_agl_index = 0; | |
1003 | iput(&clli->lli_vfs_inode); | |
1004 | spin_lock(&plli->lli_agl_lock); | |
1005 | } | |
1006 | thread_set_flags(thread, SVC_STOPPED); | |
1007 | spin_unlock(&plli->lli_agl_lock); | |
1008 | wake_up(&thread->t_ctl_waitq); | |
1009 | ll_sai_put(sai); | |
1010 | CDEBUG(D_READA, "agl thread stopped: [pid %d] [parent %.*s]\n", | |
1011 | current_pid(), parent->d_name.len, parent->d_name.name); | |
0a3bdb00 | 1012 | return 0; |
d7e09d03 PT |
1013 | } |
1014 | ||
1015 | static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai) | |
1016 | { | |
1017 | struct ptlrpc_thread *thread = &sai->sai_agl_thread; | |
1018 | struct l_wait_info lwi = { 0 }; | |
1019 | struct ll_inode_info *plli; | |
68b636b6 | 1020 | struct task_struct *task; |
d7e09d03 PT |
1021 | |
1022 | CDEBUG(D_READA, "start agl thread: [pid %d] [parent %.*s]\n", | |
1023 | current_pid(), parent->d_name.len, parent->d_name.name); | |
1024 | ||
1025 | plli = ll_i2info(parent->d_inode); | |
1026 | task = kthread_run(ll_agl_thread, parent, | |
1027 | "ll_agl_%u", plli->lli_opendir_pid); | |
1028 | if (IS_ERR(task)) { | |
1029 | CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task)); | |
1030 | thread_set_flags(thread, SVC_STOPPED); | |
e05e02e4 | 1031 | return; |
d7e09d03 PT |
1032 | } |
1033 | ||
1034 | l_wait_event(thread->t_ctl_waitq, | |
1035 | thread_is_running(thread) || thread_is_stopped(thread), | |
1036 | &lwi); | |
d7e09d03 PT |
1037 | } |
1038 | ||
1039 | static int ll_statahead_thread(void *arg) | |
1040 | { | |
1041 | struct dentry *parent = (struct dentry *)arg; | |
1042 | struct inode *dir = parent->d_inode; | |
1043 | struct ll_inode_info *plli = ll_i2info(dir); | |
1044 | struct ll_inode_info *clli; | |
1045 | struct ll_sb_info *sbi = ll_i2sbi(dir); | |
1046 | struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai); | |
1047 | struct ptlrpc_thread *thread = &sai->sai_thread; | |
1048 | struct ptlrpc_thread *agl_thread = &sai->sai_agl_thread; | |
1049 | struct page *page; | |
1050 | __u64 pos = 0; | |
1051 | int first = 0; | |
1052 | int rc = 0; | |
1053 | struct ll_dir_chain chain; | |
1054 | struct l_wait_info lwi = { 0 }; | |
d7e09d03 PT |
1055 | |
1056 | CDEBUG(D_READA, "statahead thread started: [pid %d] [parent %.*s]\n", | |
1057 | current_pid(), parent->d_name.len, parent->d_name.name); | |
1058 | ||
1059 | if (sbi->ll_flags & LL_SBI_AGL_ENABLED) | |
1060 | ll_start_agl(parent, sai); | |
1061 | ||
1062 | atomic_inc(&sbi->ll_sa_total); | |
1063 | spin_lock(&plli->lli_sa_lock); | |
1064 | thread_set_flags(thread, SVC_RUNNING); | |
1065 | spin_unlock(&plli->lli_sa_lock); | |
1066 | wake_up(&thread->t_ctl_waitq); | |
1067 | ||
1068 | ll_dir_chain_init(&chain); | |
1069 | page = ll_get_dir_page(dir, pos, &chain); | |
1070 | ||
1071 | while (1) { | |
1072 | struct lu_dirpage *dp; | |
1073 | struct lu_dirent *ent; | |
1074 | ||
1075 | if (IS_ERR(page)) { | |
1076 | rc = PTR_ERR(page); | |
1077 | CDEBUG(D_READA, "error reading dir "DFID" at "LPU64 | |
1078 | "/"LPU64": [rc %d] [parent %u]\n", | |
1079 | PFID(ll_inode2fid(dir)), pos, sai->sai_index, | |
1080 | rc, plli->lli_opendir_pid); | |
1081 | GOTO(out, rc); | |
1082 | } | |
1083 | ||
1084 | dp = page_address(page); | |
1085 | for (ent = lu_dirent_start(dp); ent != NULL; | |
1086 | ent = lu_dirent_next(ent)) { | |
1087 | __u64 hash; | |
1088 | int namelen; | |
1089 | char *name; | |
1090 | ||
1091 | hash = le64_to_cpu(ent->lde_hash); | |
1092 | if (unlikely(hash < pos)) | |
1093 | /* | |
1094 | * Skip until we find target hash value. | |
1095 | */ | |
1096 | continue; | |
1097 | ||
1098 | namelen = le16_to_cpu(ent->lde_namelen); | |
1099 | if (unlikely(namelen == 0)) | |
1100 | /* | |
1101 | * Skip dummy record. | |
1102 | */ | |
1103 | continue; | |
1104 | ||
1105 | name = ent->lde_name; | |
1106 | if (name[0] == '.') { | |
1107 | if (namelen == 1) { | |
1108 | /* | |
1109 | * skip "." | |
1110 | */ | |
1111 | continue; | |
1112 | } else if (name[1] == '.' && namelen == 2) { | |
1113 | /* | |
1114 | * skip ".." | |
1115 | */ | |
1116 | continue; | |
1117 | } else if (!sai->sai_ls_all) { | |
1118 | /* | |
1119 | * skip hidden files. | |
1120 | */ | |
1121 | sai->sai_skip_hidden++; | |
1122 | continue; | |
1123 | } | |
1124 | } | |
1125 | ||
1126 | /* | |
1127 | * don't stat-ahead first entry. | |
1128 | */ | |
1129 | if (unlikely(++first == 1)) | |
1130 | continue; | |
1131 | ||
1132 | keep_it: | |
1133 | l_wait_event(thread->t_ctl_waitq, | |
1134 | !sa_sent_full(sai) || | |
1135 | !sa_received_empty(sai) || | |
1136 | !agl_list_empty(sai) || | |
1137 | !thread_is_running(thread), | |
1138 | &lwi); | |
1139 | ||
1140 | interpret_it: | |
1141 | while (!sa_received_empty(sai)) | |
1142 | ll_post_statahead(sai); | |
1143 | ||
1144 | if (unlikely(!thread_is_running(thread))) { | |
1145 | ll_release_page(page, 0); | |
1146 | GOTO(out, rc = 0); | |
1147 | } | |
1148 | ||
1149 | /* If no window for metadata statahead, but there are | |
1150 | * some AGL entries to be triggered, then try to help | |
1151 | * to process the AGL entries. */ | |
1152 | if (sa_sent_full(sai)) { | |
1153 | spin_lock(&plli->lli_agl_lock); | |
1154 | while (!agl_list_empty(sai)) { | |
1155 | clli = agl_first_entry(sai); | |
1156 | list_del_init(&clli->lli_agl_list); | |
1157 | spin_unlock(&plli->lli_agl_lock); | |
1158 | ll_agl_trigger(&clli->lli_vfs_inode, | |
1159 | sai); | |
1160 | ||
1161 | if (!sa_received_empty(sai)) | |
1162 | goto interpret_it; | |
1163 | ||
1164 | if (unlikely( | |
1165 | !thread_is_running(thread))) { | |
1166 | ll_release_page(page, 0); | |
1167 | GOTO(out, rc = 0); | |
1168 | } | |
1169 | ||
1170 | if (!sa_sent_full(sai)) | |
1171 | goto do_it; | |
1172 | ||
1173 | spin_lock(&plli->lli_agl_lock); | |
1174 | } | |
1175 | spin_unlock(&plli->lli_agl_lock); | |
1176 | ||
1177 | goto keep_it; | |
1178 | } | |
1179 | ||
1180 | do_it: | |
1181 | ll_statahead_one(parent, name, namelen); | |
1182 | } | |
1183 | pos = le64_to_cpu(dp->ldp_hash_end); | |
1184 | if (pos == MDS_DIR_END_OFF) { | |
1185 | /* | |
1186 | * End of directory reached. | |
1187 | */ | |
1188 | ll_release_page(page, 0); | |
1189 | while (1) { | |
1190 | l_wait_event(thread->t_ctl_waitq, | |
1191 | !sa_received_empty(sai) || | |
1192 | sai->sai_sent == sai->sai_replied|| | |
1193 | !thread_is_running(thread), | |
1194 | &lwi); | |
1195 | ||
1196 | while (!sa_received_empty(sai)) | |
1197 | ll_post_statahead(sai); | |
1198 | ||
1199 | if (unlikely(!thread_is_running(thread))) | |
1200 | GOTO(out, rc = 0); | |
1201 | ||
1202 | if (sai->sai_sent == sai->sai_replied && | |
1203 | sa_received_empty(sai)) | |
1204 | break; | |
1205 | } | |
1206 | ||
1207 | spin_lock(&plli->lli_agl_lock); | |
1208 | while (!agl_list_empty(sai) && | |
1209 | thread_is_running(thread)) { | |
1210 | clli = agl_first_entry(sai); | |
1211 | list_del_init(&clli->lli_agl_list); | |
1212 | spin_unlock(&plli->lli_agl_lock); | |
1213 | ll_agl_trigger(&clli->lli_vfs_inode, sai); | |
1214 | spin_lock(&plli->lli_agl_lock); | |
1215 | } | |
1216 | spin_unlock(&plli->lli_agl_lock); | |
1217 | ||
1218 | GOTO(out, rc = 0); | |
1219 | } else if (1) { | |
1220 | /* | |
1221 | * chain is exhausted. | |
1222 | * Normal case: continue to the next page. | |
1223 | */ | |
1224 | ll_release_page(page, le32_to_cpu(dp->ldp_flags) & | |
1225 | LDF_COLLIDE); | |
1226 | sai->sai_in_readpage = 1; | |
1227 | page = ll_get_dir_page(dir, pos, &chain); | |
1228 | sai->sai_in_readpage = 0; | |
1229 | } else { | |
1230 | LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); | |
1231 | ll_release_page(page, 1); | |
1232 | /* | |
1233 | * go into overflow page. | |
1234 | */ | |
1235 | } | |
1236 | } | |
d7e09d03 PT |
1237 | |
1238 | out: | |
1239 | if (sai->sai_agl_valid) { | |
1240 | spin_lock(&plli->lli_agl_lock); | |
1241 | thread_set_flags(agl_thread, SVC_STOPPING); | |
1242 | spin_unlock(&plli->lli_agl_lock); | |
1243 | wake_up(&agl_thread->t_ctl_waitq); | |
1244 | ||
1245 | CDEBUG(D_READA, "stop agl thread: [pid %d]\n", | |
1246 | current_pid()); | |
1247 | l_wait_event(agl_thread->t_ctl_waitq, | |
1248 | thread_is_stopped(agl_thread), | |
1249 | &lwi); | |
1250 | } else { | |
1251 | /* Set agl_thread flags anyway. */ | |
1252 | thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED); | |
1253 | } | |
1254 | ll_dir_chain_fini(&chain); | |
1255 | spin_lock(&plli->lli_sa_lock); | |
1256 | if (!sa_received_empty(sai)) { | |
1257 | thread_set_flags(thread, SVC_STOPPING); | |
1258 | spin_unlock(&plli->lli_sa_lock); | |
1259 | ||
1260 | /* To release the resources held by received entries. */ | |
1261 | while (!sa_received_empty(sai)) | |
1262 | ll_post_statahead(sai); | |
1263 | ||
1264 | spin_lock(&plli->lli_sa_lock); | |
1265 | } | |
1266 | thread_set_flags(thread, SVC_STOPPED); | |
1267 | spin_unlock(&plli->lli_sa_lock); | |
1268 | wake_up(&sai->sai_waitq); | |
1269 | wake_up(&thread->t_ctl_waitq); | |
1270 | ll_sai_put(sai); | |
1271 | dput(parent); | |
1272 | CDEBUG(D_READA, "statahead thread stopped: [pid %d] [parent %.*s]\n", | |
1273 | current_pid(), parent->d_name.len, parent->d_name.name); | |
1274 | return rc; | |
1275 | } | |
1276 | ||
1277 | /** | |
1278 | * called in ll_file_release(). | |
1279 | */ | |
1280 | void ll_stop_statahead(struct inode *dir, void *key) | |
1281 | { | |
1282 | struct ll_inode_info *lli = ll_i2info(dir); | |
1283 | ||
1284 | if (unlikely(key == NULL)) | |
1285 | return; | |
1286 | ||
1287 | spin_lock(&lli->lli_sa_lock); | |
1288 | if (lli->lli_opendir_key != key || lli->lli_opendir_pid == 0) { | |
1289 | spin_unlock(&lli->lli_sa_lock); | |
1290 | return; | |
1291 | } | |
1292 | ||
1293 | lli->lli_opendir_key = NULL; | |
1294 | ||
1295 | if (lli->lli_sai) { | |
1296 | struct l_wait_info lwi = { 0 }; | |
1297 | struct ptlrpc_thread *thread = &lli->lli_sai->sai_thread; | |
1298 | ||
1299 | if (!thread_is_stopped(thread)) { | |
1300 | thread_set_flags(thread, SVC_STOPPING); | |
1301 | spin_unlock(&lli->lli_sa_lock); | |
1302 | wake_up(&thread->t_ctl_waitq); | |
1303 | ||
1304 | CDEBUG(D_READA, "stop statahead thread: [pid %d]\n", | |
1305 | current_pid()); | |
1306 | l_wait_event(thread->t_ctl_waitq, | |
1307 | thread_is_stopped(thread), | |
1308 | &lwi); | |
1309 | } else { | |
1310 | spin_unlock(&lli->lli_sa_lock); | |
1311 | } | |
1312 | ||
1313 | /* | |
1314 | * Put the ref which was held when first statahead_enter. | |
1315 | * It maybe not the last ref for some statahead requests | |
1316 | * maybe inflight. | |
1317 | */ | |
1318 | ll_sai_put(lli->lli_sai); | |
1319 | } else { | |
1320 | lli->lli_opendir_pid = 0; | |
1321 | spin_unlock(&lli->lli_sa_lock); | |
1322 | } | |
1323 | } | |
1324 | ||
1325 | enum { | |
1326 | /** | |
1327 | * not first dirent, or is "." | |
1328 | */ | |
1329 | LS_NONE_FIRST_DE = 0, | |
1330 | /** | |
1331 | * the first non-hidden dirent | |
1332 | */ | |
1333 | LS_FIRST_DE, | |
1334 | /** | |
1335 | * the first hidden dirent, that is "." | |
1336 | */ | |
1337 | LS_FIRST_DOT_DE | |
1338 | }; | |
1339 | ||
1340 | static int is_first_dirent(struct inode *dir, struct dentry *dentry) | |
1341 | { | |
1342 | struct ll_dir_chain chain; | |
1343 | struct qstr *target = &dentry->d_name; | |
1344 | struct page *page; | |
1345 | __u64 pos = 0; | |
1346 | int dot_de; | |
1347 | int rc = LS_NONE_FIRST_DE; | |
d7e09d03 PT |
1348 | |
1349 | ll_dir_chain_init(&chain); | |
1350 | page = ll_get_dir_page(dir, pos, &chain); | |
1351 | ||
1352 | while (1) { | |
1353 | struct lu_dirpage *dp; | |
1354 | struct lu_dirent *ent; | |
1355 | ||
1356 | if (IS_ERR(page)) { | |
1357 | struct ll_inode_info *lli = ll_i2info(dir); | |
1358 | ||
1359 | rc = PTR_ERR(page); | |
1360 | CERROR("error reading dir "DFID" at "LPU64": " | |
1361 | "[rc %d] [parent %u]\n", | |
1362 | PFID(ll_inode2fid(dir)), pos, | |
1363 | rc, lli->lli_opendir_pid); | |
1364 | break; | |
1365 | } | |
1366 | ||
1367 | dp = page_address(page); | |
1368 | for (ent = lu_dirent_start(dp); ent != NULL; | |
1369 | ent = lu_dirent_next(ent)) { | |
1370 | __u64 hash; | |
1371 | int namelen; | |
1372 | char *name; | |
1373 | ||
1374 | hash = le64_to_cpu(ent->lde_hash); | |
1375 | /* The ll_get_dir_page() can return any page containing | |
1376 | * the given hash which may be not the start hash. */ | |
1377 | if (unlikely(hash < pos)) | |
1378 | continue; | |
1379 | ||
1380 | namelen = le16_to_cpu(ent->lde_namelen); | |
1381 | if (unlikely(namelen == 0)) | |
1382 | /* | |
1383 | * skip dummy record. | |
1384 | */ | |
1385 | continue; | |
1386 | ||
1387 | name = ent->lde_name; | |
1388 | if (name[0] == '.') { | |
1389 | if (namelen == 1) | |
1390 | /* | |
1391 | * skip "." | |
1392 | */ | |
1393 | continue; | |
1394 | else if (name[1] == '.' && namelen == 2) | |
1395 | /* | |
1396 | * skip ".." | |
1397 | */ | |
1398 | continue; | |
1399 | else | |
1400 | dot_de = 1; | |
1401 | } else { | |
1402 | dot_de = 0; | |
1403 | } | |
1404 | ||
1405 | if (dot_de && target->name[0] != '.') { | |
1406 | CDEBUG(D_READA, "%.*s skip hidden file %.*s\n", | |
1407 | target->len, target->name, | |
1408 | namelen, name); | |
1409 | continue; | |
1410 | } | |
1411 | ||
1412 | if (target->len != namelen || | |
1413 | memcmp(target->name, name, namelen) != 0) | |
1414 | rc = LS_NONE_FIRST_DE; | |
1415 | else if (!dot_de) | |
1416 | rc = LS_FIRST_DE; | |
1417 | else | |
1418 | rc = LS_FIRST_DOT_DE; | |
1419 | ||
1420 | ll_release_page(page, 0); | |
1421 | GOTO(out, rc); | |
1422 | } | |
1423 | pos = le64_to_cpu(dp->ldp_hash_end); | |
1424 | if (pos == MDS_DIR_END_OFF) { | |
1425 | /* | |
1426 | * End of directory reached. | |
1427 | */ | |
1428 | ll_release_page(page, 0); | |
1429 | break; | |
1430 | } else if (1) { | |
1431 | /* | |
1432 | * chain is exhausted | |
1433 | * Normal case: continue to the next page. | |
1434 | */ | |
1435 | ll_release_page(page, le32_to_cpu(dp->ldp_flags) & | |
1436 | LDF_COLLIDE); | |
1437 | page = ll_get_dir_page(dir, pos, &chain); | |
1438 | } else { | |
1439 | /* | |
1440 | * go into overflow page. | |
1441 | */ | |
1442 | LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); | |
1443 | ll_release_page(page, 1); | |
1444 | } | |
1445 | } | |
d7e09d03 PT |
1446 | |
1447 | out: | |
1448 | ll_dir_chain_fini(&chain); | |
1449 | return rc; | |
1450 | } | |
1451 | ||
1452 | static void | |
1453 | ll_sai_unplug(struct ll_statahead_info *sai, struct ll_sa_entry *entry) | |
1454 | { | |
1455 | struct ptlrpc_thread *thread = &sai->sai_thread; | |
1456 | struct ll_sb_info *sbi = ll_i2sbi(sai->sai_inode); | |
1457 | int hit; | |
d7e09d03 PT |
1458 | |
1459 | if (entry != NULL && entry->se_stat == SA_ENTRY_SUCC) | |
1460 | hit = 1; | |
1461 | else | |
1462 | hit = 0; | |
1463 | ||
1464 | ll_sa_entry_fini(sai, entry); | |
1465 | if (hit) { | |
1466 | sai->sai_hit++; | |
1467 | sai->sai_consecutive_miss = 0; | |
1468 | sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max); | |
1469 | } else { | |
1470 | struct ll_inode_info *lli = ll_i2info(sai->sai_inode); | |
1471 | ||
1472 | sai->sai_miss++; | |
1473 | sai->sai_consecutive_miss++; | |
1474 | if (sa_low_hit(sai) && thread_is_running(thread)) { | |
1475 | atomic_inc(&sbi->ll_sa_wrong); | |
1476 | CDEBUG(D_READA, "Statahead for dir "DFID" hit " | |
1477 | "ratio too low: hit/miss "LPU64"/"LPU64 | |
1478 | ", sent/replied "LPU64"/"LPU64", stopping " | |
1479 | "statahead thread: pid %d\n", | |
1480 | PFID(&lli->lli_fid), sai->sai_hit, | |
1481 | sai->sai_miss, sai->sai_sent, | |
1482 | sai->sai_replied, current_pid()); | |
1483 | spin_lock(&lli->lli_sa_lock); | |
1484 | if (!thread_is_stopped(thread)) | |
1485 | thread_set_flags(thread, SVC_STOPPING); | |
1486 | spin_unlock(&lli->lli_sa_lock); | |
1487 | } | |
1488 | } | |
1489 | ||
1490 | if (!thread_is_stopped(thread)) | |
1491 | wake_up(&thread->t_ctl_waitq); | |
d7e09d03 PT |
1492 | } |
1493 | ||
1494 | /** | |
1495 | * Start statahead thread if this is the first dir entry. | |
1496 | * Otherwise if a thread is started already, wait it until it is ahead of me. | |
1497 | * \retval 1 -- find entry with lock in cache, the caller needs to do | |
1498 | * nothing. | |
1499 | * \retval 0 -- find entry in cache, but without lock, the caller needs | |
1500 | * refresh from MDS. | |
1501 | * \retval others -- the caller need to process as non-statahead. | |
1502 | */ | |
1503 | int do_statahead_enter(struct inode *dir, struct dentry **dentryp, | |
1504 | int only_unplug) | |
1505 | { | |
1506 | struct ll_inode_info *lli = ll_i2info(dir); | |
1507 | struct ll_statahead_info *sai = lli->lli_sai; | |
1508 | struct dentry *parent; | |
1509 | struct ll_sa_entry *entry; | |
1510 | struct ptlrpc_thread *thread; | |
1511 | struct l_wait_info lwi = { 0 }; | |
1512 | int rc = 0; | |
1513 | struct ll_inode_info *plli; | |
d7e09d03 PT |
1514 | |
1515 | LASSERT(lli->lli_opendir_pid == current_pid()); | |
1516 | ||
1517 | if (sai) { | |
1518 | thread = &sai->sai_thread; | |
1519 | if (unlikely(thread_is_stopped(thread) && | |
1520 | list_empty(&sai->sai_entries_stated))) { | |
1521 | /* to release resource */ | |
1522 | ll_stop_statahead(dir, lli->lli_opendir_key); | |
0a3bdb00 | 1523 | return -EAGAIN; |
d7e09d03 PT |
1524 | } |
1525 | ||
1526 | if ((*dentryp)->d_name.name[0] == '.') { | |
1527 | if (sai->sai_ls_all || | |
1528 | sai->sai_miss_hidden >= sai->sai_skip_hidden) { | |
1529 | /* | |
1530 | * Hidden dentry is the first one, or statahead | |
1531 | * thread does not skip so many hidden dentries | |
1532 | * before "sai_ls_all" enabled as below. | |
1533 | */ | |
1534 | } else { | |
1535 | if (!sai->sai_ls_all) | |
1536 | /* | |
1537 | * It maybe because hidden dentry is not | |
1538 | * the first one, "sai_ls_all" was not | |
1539 | * set, then "ls -al" missed. Enable | |
1540 | * "sai_ls_all" for such case. | |
1541 | */ | |
1542 | sai->sai_ls_all = 1; | |
1543 | ||
1544 | /* | |
1545 | * Such "getattr" has been skipped before | |
1546 | * "sai_ls_all" enabled as above. | |
1547 | */ | |
1548 | sai->sai_miss_hidden++; | |
0a3bdb00 | 1549 | return -EAGAIN; |
d7e09d03 PT |
1550 | } |
1551 | } | |
1552 | ||
1553 | entry = ll_sa_entry_get_byname(sai, &(*dentryp)->d_name); | |
1554 | if (entry == NULL || only_unplug) { | |
1555 | ll_sai_unplug(sai, entry); | |
0a3bdb00 | 1556 | return entry ? 1 : -EAGAIN; |
d7e09d03 PT |
1557 | } |
1558 | ||
1559 | /* if statahead is busy in readdir, help it do post-work */ | |
1560 | while (!ll_sa_entry_stated(entry) && | |
1561 | sai->sai_in_readpage && | |
1562 | !sa_received_empty(sai)) | |
1563 | ll_post_statahead(sai); | |
1564 | ||
1565 | if (!ll_sa_entry_stated(entry)) { | |
1566 | sai->sai_index_wait = entry->se_index; | |
1567 | lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL, | |
1568 | LWI_ON_SIGNAL_NOOP, NULL); | |
1569 | rc = l_wait_event(sai->sai_waitq, | |
1570 | ll_sa_entry_stated(entry) || | |
1571 | thread_is_stopped(thread), | |
1572 | &lwi); | |
1573 | if (rc < 0) { | |
1574 | ll_sai_unplug(sai, entry); | |
0a3bdb00 | 1575 | return -EAGAIN; |
d7e09d03 PT |
1576 | } |
1577 | } | |
1578 | ||
1579 | if (entry->se_stat == SA_ENTRY_SUCC && | |
1580 | entry->se_inode != NULL) { | |
1581 | struct inode *inode = entry->se_inode; | |
1582 | struct lookup_intent it = { .it_op = IT_GETATTR, | |
1583 | .d.lustre.it_lock_handle = | |
1584 | entry->se_handle }; | |
1585 | __u64 bits; | |
1586 | ||
1587 | rc = md_revalidate_lock(ll_i2mdexp(dir), &it, | |
1588 | ll_inode2fid(inode), &bits); | |
1589 | if (rc == 1) { | |
1590 | if ((*dentryp)->d_inode == NULL) { | |
1591 | *dentryp = ll_splice_alias(inode, | |
1592 | *dentryp); | |
1593 | } else if ((*dentryp)->d_inode != inode) { | |
1594 | /* revalidate, but inode is recreated */ | |
1595 | CDEBUG(D_READA, | |
1596 | "stale dentry %.*s inode %lu/%u, " | |
1597 | "statahead inode %lu/%u\n", | |
1598 | (*dentryp)->d_name.len, | |
1599 | (*dentryp)->d_name.name, | |
1600 | (*dentryp)->d_inode->i_ino, | |
1601 | (*dentryp)->d_inode->i_generation, | |
1602 | inode->i_ino, | |
1603 | inode->i_generation); | |
1604 | ll_sai_unplug(sai, entry); | |
0a3bdb00 | 1605 | return -ESTALE; |
d7e09d03 PT |
1606 | } else { |
1607 | iput(inode); | |
1608 | } | |
1609 | entry->se_inode = NULL; | |
1610 | ||
1611 | if ((bits & MDS_INODELOCK_LOOKUP) && | |
1612 | d_lustre_invalid(*dentryp)) | |
1613 | d_lustre_revalidate(*dentryp); | |
1614 | ll_intent_release(&it); | |
1615 | } | |
1616 | } | |
1617 | ||
1618 | ll_sai_unplug(sai, entry); | |
0a3bdb00 | 1619 | return rc; |
d7e09d03 PT |
1620 | } |
1621 | ||
1622 | /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */ | |
1623 | rc = is_first_dirent(dir, *dentryp); | |
1624 | if (rc == LS_NONE_FIRST_DE) | |
1625 | /* It is not "ls -{a}l" operation, no need statahead for it. */ | |
1626 | GOTO(out, rc = -EAGAIN); | |
1627 | ||
1628 | sai = ll_sai_alloc(); | |
1629 | if (sai == NULL) | |
1630 | GOTO(out, rc = -ENOMEM); | |
1631 | ||
1632 | sai->sai_ls_all = (rc == LS_FIRST_DOT_DE); | |
1633 | sai->sai_inode = igrab(dir); | |
1634 | if (unlikely(sai->sai_inode == NULL)) { | |
1635 | CWARN("Do not start stat ahead on dying inode "DFID"\n", | |
1636 | PFID(&lli->lli_fid)); | |
1637 | GOTO(out, rc = -ESTALE); | |
1638 | } | |
1639 | ||
1640 | /* get parent reference count here, and put it in ll_statahead_thread */ | |
1641 | parent = dget((*dentryp)->d_parent); | |
1642 | if (unlikely(sai->sai_inode != parent->d_inode)) { | |
1643 | struct ll_inode_info *nlli = ll_i2info(parent->d_inode); | |
1644 | ||
1645 | CWARN("Race condition, someone changed %.*s just now: " | |
1646 | "old parent "DFID", new parent "DFID"\n", | |
1647 | (*dentryp)->d_name.len, (*dentryp)->d_name.name, | |
1648 | PFID(&lli->lli_fid), PFID(&nlli->lli_fid)); | |
1649 | dput(parent); | |
1650 | iput(sai->sai_inode); | |
1651 | GOTO(out, rc = -EAGAIN); | |
1652 | } | |
1653 | ||
1654 | CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %.*s]\n", | |
1655 | current_pid(), parent->d_name.len, parent->d_name.name); | |
1656 | ||
1657 | lli->lli_sai = sai; | |
1658 | ||
1659 | plli = ll_i2info(parent->d_inode); | |
1660 | rc = PTR_ERR(kthread_run(ll_statahead_thread, parent, | |
1661 | "ll_sa_%u", plli->lli_opendir_pid)); | |
1662 | thread = &sai->sai_thread; | |
1663 | if (IS_ERR_VALUE(rc)) { | |
1664 | CERROR("can't start ll_sa thread, rc: %d\n", rc); | |
1665 | dput(parent); | |
1666 | lli->lli_opendir_key = NULL; | |
1667 | thread_set_flags(thread, SVC_STOPPED); | |
1668 | thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED); | |
1669 | ll_sai_put(sai); | |
1670 | LASSERT(lli->lli_sai == NULL); | |
0a3bdb00 | 1671 | return -EAGAIN; |
d7e09d03 PT |
1672 | } |
1673 | ||
1674 | l_wait_event(thread->t_ctl_waitq, | |
1675 | thread_is_running(thread) || thread_is_stopped(thread), | |
1676 | &lwi); | |
1677 | ||
1678 | /* | |
1679 | * We don't stat-ahead for the first dirent since we are already in | |
1680 | * lookup. | |
1681 | */ | |
0a3bdb00 | 1682 | return -EAGAIN; |
d7e09d03 PT |
1683 | |
1684 | out: | |
1685 | if (sai != NULL) | |
1686 | OBD_FREE_PTR(sai); | |
1687 | spin_lock(&lli->lli_sa_lock); | |
1688 | lli->lli_opendir_key = NULL; | |
1689 | lli->lli_opendir_pid = 0; | |
1690 | spin_unlock(&lli->lli_sa_lock); | |
1691 | return rc; | |
1692 | } |