4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/llite/llite_capa.c
38 * Author: Lai Siyao <lsy@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <linux/uaccess.h>
45 #include <linux/file.h>
46 #include <linux/kmod.h>
48 #include "../include/lustre_lite.h"
49 #include "llite_internal.h"
51 /* for obd_capa.c_list, client capa might stay in three places:
54 * 3. stand alone: just allocated.
57 /* capas for oss writeback and those failed to renew */
58 static LIST_HEAD(ll_idle_capas
);
59 static struct ptlrpc_thread ll_capa_thread
;
60 static struct list_head
*ll_capa_list
= &capa_list
[CAPA_SITE_CLIENT
];
62 /* llite capa renewal timer */
63 struct timer_list ll_capa_timer
;
64 /* for debug: indicate whether capa on llite is enabled or not */
65 static atomic_t ll_capa_debug
= ATOMIC_INIT(0);
66 static unsigned long long ll_capa_renewed
;
67 static unsigned long long ll_capa_renewal_noent
;
68 static unsigned long long ll_capa_renewal_failed
;
69 static unsigned long long ll_capa_renewal_retries
;
71 static int ll_update_capa(struct obd_capa
*ocapa
, struct lustre_capa
*capa
);
73 static inline void update_capa_timer(struct obd_capa
*ocapa
, unsigned long expiry
)
75 if (time_before(expiry
, ll_capa_timer
.expires
) ||
76 !timer_pending(&ll_capa_timer
)) {
77 mod_timer(&ll_capa_timer
, expiry
);
78 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
,
79 "ll_capa_timer update: %lu/%lu by", expiry
, jiffies
);
83 static inline unsigned long capa_renewal_time(struct obd_capa
*ocapa
)
85 return cfs_time_sub(ocapa
->c_expiry
,
86 cfs_time_seconds(ocapa
->c_capa
.lc_timeout
) / 2);
89 static inline int capa_is_to_expire(struct obd_capa
*ocapa
)
91 return time_before_eq(capa_renewal_time(ocapa
), cfs_time_current());
94 static inline int have_expired_capa(void)
96 struct obd_capa
*ocapa
= NULL
;
99 /* if ll_capa_list has client capa to expire or ll_idle_capas has
100 * expired capa, return 1.
102 spin_lock(&capa_lock
);
103 if (!list_empty(ll_capa_list
)) {
104 ocapa
= list_entry(ll_capa_list
->next
, struct obd_capa
,
106 expired
= capa_is_to_expire(ocapa
);
108 update_capa_timer(ocapa
, capa_renewal_time(ocapa
));
109 } else if (!list_empty(&ll_idle_capas
)) {
110 ocapa
= list_entry(ll_idle_capas
.next
, struct obd_capa
,
112 expired
= capa_is_expired(ocapa
);
114 update_capa_timer(ocapa
, ocapa
->c_expiry
);
116 spin_unlock(&capa_lock
);
119 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
, "expired");
123 static void sort_add_capa(struct obd_capa
*ocapa
, struct list_head
*head
)
125 struct obd_capa
*tmp
;
126 struct list_head
*before
= NULL
;
128 /* TODO: client capa is sorted by expiry, this could be optimized */
129 list_for_each_entry_reverse(tmp
, head
, c_list
) {
130 if (cfs_time_aftereq(ocapa
->c_expiry
, tmp
->c_expiry
)) {
131 before
= &tmp
->c_list
;
136 LASSERT(&ocapa
->c_list
!= before
);
137 list_add(&ocapa
->c_list
, before
?: head
);
140 static inline int obd_capa_open_count(struct obd_capa
*oc
)
142 struct ll_inode_info
*lli
= ll_i2info(oc
->u
.cli
.inode
);
143 return atomic_read(&lli
->lli_open_count
);
146 static void ll_delete_capa(struct obd_capa
*ocapa
)
148 struct ll_inode_info
*lli
= ll_i2info(ocapa
->u
.cli
.inode
);
150 if (capa_for_mds(&ocapa
->c_capa
)) {
151 LASSERT(lli
->lli_mds_capa
== ocapa
);
152 lli
->lli_mds_capa
= NULL
;
153 } else if (capa_for_oss(&ocapa
->c_capa
)) {
154 list_del_init(&ocapa
->u
.cli
.lli_list
);
157 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
, "free client");
158 list_del_init(&ocapa
->c_list
);
159 capa_count
[CAPA_SITE_CLIENT
]--;
160 /* release the ref when alloc */
164 /* three places where client capa is deleted:
165 * 1. capa_thread_main(), main place to delete expired capa.
166 * 2. ll_clear_inode_capas() in ll_clear_inode().
167 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost().
169 static int capa_thread_main(void *unused
)
171 struct obd_capa
*ocapa
, *tmp
, *next
;
172 struct inode
*inode
= NULL
;
173 struct l_wait_info lwi
= { 0 };
176 thread_set_flags(&ll_capa_thread
, SVC_RUNNING
);
177 wake_up(&ll_capa_thread
.t_ctl_waitq
);
180 l_wait_event(ll_capa_thread
.t_ctl_waitq
,
181 !thread_is_running(&ll_capa_thread
) ||
185 if (!thread_is_running(&ll_capa_thread
))
190 spin_lock(&capa_lock
);
191 list_for_each_entry_safe(ocapa
, tmp
, ll_capa_list
, c_list
) {
194 LASSERT(ocapa
->c_capa
.lc_opc
!= CAPA_OPC_OSS_TRUNC
);
196 if (!capa_is_to_expire(ocapa
)) {
201 list_del_init(&ocapa
->c_list
);
203 /* for MDS capability, only renew those which belong to
204 * dir, or its inode is opened, or client holds LOOKUP
207 /* ibits may be changed by ll_have_md_lock() so we have
208 * to set it each time */
209 ibits
= MDS_INODELOCK_LOOKUP
;
210 if (capa_for_mds(&ocapa
->c_capa
) &&
211 !S_ISDIR(ocapa
->u
.cli
.inode
->i_mode
) &&
212 obd_capa_open_count(ocapa
) == 0 &&
213 !ll_have_md_lock(ocapa
->u
.cli
.inode
,
214 &ibits
, LCK_MINMODE
)) {
215 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
,
217 sort_add_capa(ocapa
, &ll_idle_capas
);
221 /* for OSS capability, only renew those whose inode is
224 if (capa_for_oss(&ocapa
->c_capa
) &&
225 obd_capa_open_count(ocapa
) == 0) {
226 /* oss capa with open count == 0 won't renew,
227 * move to idle list */
228 sort_add_capa(ocapa
, &ll_idle_capas
);
232 /* NB iput() is in ll_update_capa() */
233 inode
= igrab(ocapa
->u
.cli
.inode
);
235 DEBUG_CAPA(D_ERROR
, &ocapa
->c_capa
,
242 spin_unlock(&capa_lock
);
243 rc
= md_renew_capa(ll_i2mdexp(inode
), ocapa
,
245 spin_lock(&capa_lock
);
247 DEBUG_CAPA(D_ERROR
, &ocapa
->c_capa
,
248 "renew failed: %d", rc
);
249 ll_capa_renewal_failed
++;
254 update_capa_timer(next
, capa_renewal_time(next
));
256 list_for_each_entry_safe(ocapa
, tmp
, &ll_idle_capas
,
258 if (!capa_is_expired(ocapa
)) {
260 update_capa_timer(ocapa
,
265 if (atomic_read(&ocapa
->c_refc
) > 1) {
266 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
,
267 "expired(c_refc %d), don't release",
268 atomic_read(&ocapa
->c_refc
));
269 /* don't try to renew any more */
270 list_del_init(&ocapa
->c_list
);
274 /* expired capa is released. */
275 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
, "release expired");
276 ll_delete_capa(ocapa
);
279 spin_unlock(&capa_lock
);
282 thread_set_flags(&ll_capa_thread
, SVC_STOPPED
);
283 wake_up(&ll_capa_thread
.t_ctl_waitq
);
287 void ll_capa_timer_callback(unsigned long unused
)
289 wake_up(&ll_capa_thread
.t_ctl_waitq
);
292 int ll_capa_thread_start(void)
294 struct task_struct
*task
;
296 init_waitqueue_head(&ll_capa_thread
.t_ctl_waitq
);
298 task
= kthread_run(capa_thread_main
, NULL
, "ll_capa");
300 CERROR("cannot start expired capa thread: rc %ld\n",
302 return PTR_ERR(task
);
304 wait_event(ll_capa_thread
.t_ctl_waitq
,
305 thread_is_running(&ll_capa_thread
));
310 void ll_capa_thread_stop(void)
312 thread_set_flags(&ll_capa_thread
, SVC_STOPPING
);
313 wake_up(&ll_capa_thread
.t_ctl_waitq
);
314 wait_event(ll_capa_thread
.t_ctl_waitq
,
315 thread_is_stopped(&ll_capa_thread
));
318 struct obd_capa
*ll_osscapa_get(struct inode
*inode
, __u64 opc
)
320 struct ll_inode_info
*lli
= ll_i2info(inode
);
321 struct obd_capa
*ocapa
;
324 if ((ll_i2sbi(inode
)->ll_flags
& LL_SBI_OSS_CAPA
) == 0)
327 LASSERT(opc
== CAPA_OPC_OSS_WRITE
|| opc
== CAPA_OPC_OSS_RW
||
328 opc
== CAPA_OPC_OSS_TRUNC
);
330 spin_lock(&capa_lock
);
331 list_for_each_entry(ocapa
, &lli
->lli_oss_capas
, u
.cli
.lli_list
) {
332 if (capa_is_expired(ocapa
))
334 if ((opc
& CAPA_OPC_OSS_WRITE
) &&
335 capa_opc_supported(&ocapa
->c_capa
, CAPA_OPC_OSS_WRITE
)) {
338 } else if ((opc
& CAPA_OPC_OSS_READ
) &&
339 capa_opc_supported(&ocapa
->c_capa
,
340 CAPA_OPC_OSS_READ
)) {
343 } else if ((opc
& CAPA_OPC_OSS_TRUNC
) &&
344 capa_opc_supported(&ocapa
->c_capa
, opc
)) {
351 LASSERT(lu_fid_eq(capa_fid(&ocapa
->c_capa
),
352 ll_inode2fid(inode
)));
353 LASSERT(ocapa
->c_site
== CAPA_SITE_CLIENT
);
357 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
, "found client");
361 if (atomic_read(&ll_capa_debug
)) {
362 CERROR("no capability for "DFID
" opc %#llx\n",
363 PFID(&lli
->lli_fid
), opc
);
364 atomic_set(&ll_capa_debug
, 0);
367 spin_unlock(&capa_lock
);
371 EXPORT_SYMBOL(ll_osscapa_get
);
373 struct obd_capa
*ll_mdscapa_get(struct inode
*inode
)
375 struct ll_inode_info
*lli
= ll_i2info(inode
);
376 struct obd_capa
*ocapa
;
378 LASSERT(inode
!= NULL
);
380 if ((ll_i2sbi(inode
)->ll_flags
& LL_SBI_MDS_CAPA
) == 0)
383 spin_lock(&capa_lock
);
384 ocapa
= capa_get(lli
->lli_mds_capa
);
385 spin_unlock(&capa_lock
);
386 if (!ocapa
&& atomic_read(&ll_capa_debug
)) {
387 CERROR("no mds capability for "DFID
"\n", PFID(&lli
->lli_fid
));
388 atomic_set(&ll_capa_debug
, 0);
394 static struct obd_capa
*do_add_mds_capa(struct inode
*inode
,
395 struct obd_capa
*ocapa
)
397 struct ll_inode_info
*lli
= ll_i2info(inode
);
398 struct obd_capa
*old
= lli
->lli_mds_capa
;
399 struct lustre_capa
*capa
= &ocapa
->c_capa
;
402 ocapa
->u
.cli
.inode
= inode
;
403 lli
->lli_mds_capa
= ocapa
;
404 capa_count
[CAPA_SITE_CLIENT
]++;
406 DEBUG_CAPA(D_SEC
, capa
, "add MDS");
408 spin_lock(&old
->c_lock
);
410 spin_unlock(&old
->c_lock
);
412 DEBUG_CAPA(D_SEC
, capa
, "update MDS");
420 static struct obd_capa
*do_lookup_oss_capa(struct inode
*inode
, int opc
)
422 struct ll_inode_info
*lli
= ll_i2info(inode
);
423 struct obd_capa
*ocapa
;
425 /* inside capa_lock */
426 list_for_each_entry(ocapa
, &lli
->lli_oss_capas
, u
.cli
.lli_list
) {
427 if ((capa_opc(&ocapa
->c_capa
) & opc
) != opc
)
430 LASSERT(lu_fid_eq(capa_fid(&ocapa
->c_capa
),
431 ll_inode2fid(inode
)));
432 LASSERT(ocapa
->c_site
== CAPA_SITE_CLIENT
);
434 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
, "found client");
441 static inline void inode_add_oss_capa(struct inode
*inode
,
442 struct obd_capa
*ocapa
)
444 struct ll_inode_info
*lli
= ll_i2info(inode
);
445 struct obd_capa
*tmp
;
446 struct list_head
*next
= NULL
;
448 /* capa is sorted in lli_oss_capas so lookup can always find the
450 list_for_each_entry(tmp
, &lli
->lli_oss_capas
, u
.cli
.lli_list
) {
451 if (cfs_time_after(ocapa
->c_expiry
, tmp
->c_expiry
)) {
452 next
= &tmp
->u
.cli
.lli_list
;
456 LASSERT(&ocapa
->u
.cli
.lli_list
!= next
);
457 list_move_tail(&ocapa
->u
.cli
.lli_list
, next
?: &lli
->lli_oss_capas
);
460 static struct obd_capa
*do_add_oss_capa(struct inode
*inode
,
461 struct obd_capa
*ocapa
)
463 struct obd_capa
*old
;
464 struct lustre_capa
*capa
= &ocapa
->c_capa
;
466 LASSERTF(S_ISREG(inode
->i_mode
),
467 "inode has oss capa, but not regular file, mode: %d\n",
470 /* FIXME: can't replace it so easily with fine-grained opc */
471 old
= do_lookup_oss_capa(inode
, capa_opc(capa
) & CAPA_OPC_OSS_ONLY
);
473 ocapa
->u
.cli
.inode
= inode
;
474 INIT_LIST_HEAD(&ocapa
->u
.cli
.lli_list
);
475 capa_count
[CAPA_SITE_CLIENT
]++;
477 DEBUG_CAPA(D_SEC
, capa
, "add OSS");
479 spin_lock(&old
->c_lock
);
481 spin_unlock(&old
->c_lock
);
483 DEBUG_CAPA(D_SEC
, capa
, "update OSS");
489 inode_add_oss_capa(inode
, ocapa
);
493 struct obd_capa
*ll_add_capa(struct inode
*inode
, struct obd_capa
*ocapa
)
495 spin_lock(&capa_lock
);
496 ocapa
= capa_for_mds(&ocapa
->c_capa
) ? do_add_mds_capa(inode
, ocapa
) :
497 do_add_oss_capa(inode
, ocapa
);
499 /* truncate capa won't renew */
500 if (ocapa
->c_capa
.lc_opc
!= CAPA_OPC_OSS_TRUNC
) {
501 set_capa_expiry(ocapa
);
502 list_del_init(&ocapa
->c_list
);
503 sort_add_capa(ocapa
, ll_capa_list
);
505 update_capa_timer(ocapa
, capa_renewal_time(ocapa
));
508 spin_unlock(&capa_lock
);
510 atomic_set(&ll_capa_debug
, 1);
514 static inline void delay_capa_renew(struct obd_capa
*oc
, unsigned long delay
)
516 /* NB: set a fake expiry for this capa to prevent it renew too soon */
517 oc
->c_expiry
= cfs_time_add(oc
->c_expiry
, cfs_time_seconds(delay
));
520 static int ll_update_capa(struct obd_capa
*ocapa
, struct lustre_capa
*capa
)
522 struct inode
*inode
= ocapa
->u
.cli
.inode
;
530 spin_lock(&capa_lock
);
532 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
,
533 "renewal canceled because object removed");
534 ll_capa_renewal_noent
++;
536 ll_capa_renewal_failed
++;
538 /* failed capa won't be renewed any longer, but if -EIO,
539 * client might be doing recovery, retry in 2 min. */
540 if (rc
== -EIO
&& !capa_is_expired(ocapa
)) {
541 delay_capa_renew(ocapa
, 120);
542 DEBUG_CAPA(D_ERROR
, &ocapa
->c_capa
,
543 "renewal failed: -EIO, "
545 ll_capa_renewal_retries
++;
548 DEBUG_CAPA(D_ERROR
, &ocapa
->c_capa
,
549 "renewal failed(rc: %d) for", rc
);
553 list_del_init(&ocapa
->c_list
);
554 sort_add_capa(ocapa
, &ll_idle_capas
);
555 spin_unlock(&capa_lock
);
562 spin_lock(&ocapa
->c_lock
);
563 LASSERT(!memcmp(&ocapa
->c_capa
, capa
,
564 offsetof(struct lustre_capa
, lc_opc
)));
565 ocapa
->c_capa
= *capa
;
566 set_capa_expiry(ocapa
);
567 spin_unlock(&ocapa
->c_lock
);
569 spin_lock(&capa_lock
);
570 if (capa_for_oss(capa
))
571 inode_add_oss_capa(inode
, ocapa
);
572 DEBUG_CAPA(D_SEC
, capa
, "renew");
574 list_del_init(&ocapa
->c_list
);
575 sort_add_capa(ocapa
, ll_capa_list
);
576 update_capa_timer(ocapa
, capa_renewal_time(ocapa
));
577 spin_unlock(&capa_lock
);
584 void ll_capa_open(struct inode
*inode
)
586 struct ll_inode_info
*lli
= ll_i2info(inode
);
588 if ((ll_i2sbi(inode
)->ll_flags
& (LL_SBI_MDS_CAPA
| LL_SBI_OSS_CAPA
))
592 if (!S_ISREG(inode
->i_mode
))
595 atomic_inc(&lli
->lli_open_count
);
598 void ll_capa_close(struct inode
*inode
)
600 struct ll_inode_info
*lli
= ll_i2info(inode
);
602 if ((ll_i2sbi(inode
)->ll_flags
& (LL_SBI_MDS_CAPA
| LL_SBI_OSS_CAPA
))
606 if (!S_ISREG(inode
->i_mode
))
609 atomic_dec(&lli
->lli_open_count
);
612 /* delete CAPA_OPC_OSS_TRUNC only */
613 void ll_truncate_free_capa(struct obd_capa
*ocapa
)
618 LASSERT(ocapa
->c_capa
.lc_opc
& CAPA_OPC_OSS_TRUNC
);
619 DEBUG_CAPA(D_SEC
, &ocapa
->c_capa
, "free truncate");
621 /* release ref when find */
623 if (likely(ocapa
->c_capa
.lc_opc
== CAPA_OPC_OSS_TRUNC
)) {
624 spin_lock(&capa_lock
);
625 ll_delete_capa(ocapa
);
626 spin_unlock(&capa_lock
);
630 void ll_clear_inode_capas(struct inode
*inode
)
632 struct ll_inode_info
*lli
= ll_i2info(inode
);
633 struct obd_capa
*ocapa
, *tmp
;
635 spin_lock(&capa_lock
);
636 ocapa
= lli
->lli_mds_capa
;
638 ll_delete_capa(ocapa
);
640 list_for_each_entry_safe(ocapa
, tmp
, &lli
->lli_oss_capas
,
642 ll_delete_capa(ocapa
);
643 spin_unlock(&capa_lock
);
646 void ll_print_capa_stat(struct ll_sb_info
*sbi
)
648 if (sbi
->ll_flags
& (LL_SBI_MDS_CAPA
| LL_SBI_OSS_CAPA
))
649 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
650 "Fid capabilities renewal ENOENT: %llu\n"
651 "Fid capabilities failed to renew: %llu\n"
652 "Fid capabilities renewal retries: %llu\n",
653 ll_capa_renewed
, ll_capa_renewal_noent
,
654 ll_capa_renewal_failed
, ll_capa_renewal_retries
);