staging: lustre: mdc: use __FMODE_EXEC macro
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / llite_capa.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/llite_capa.c
37 *
38 * Author: Lai Siyao <lsy@clusterfs.com>
39 */
40
41 #define DEBUG_SUBSYSTEM S_LLITE
42
43 #include <linux/fs.h>
44 #include <linux/uaccess.h>
45 #include <linux/file.h>
46 #include <linux/kmod.h>
47
48 #include "../include/lustre_lite.h"
49 #include "llite_internal.h"
50
51 /* for obd_capa.c_list, client capa might stay in three places:
52 * 1. ll_capa_list.
53 * 2. ll_idle_capas.
54 * 3. stand alone: just allocated.
55 */
56
57 /* capas for oss writeback and those failed to renew */
58 static LIST_HEAD(ll_idle_capas);
59 static struct ptlrpc_thread ll_capa_thread;
60 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
61
62 /* llite capa renewal timer */
63 struct timer_list ll_capa_timer;
64 /* for debug: indicate whether capa on llite is enabled or not */
65 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
66 static unsigned long long ll_capa_renewed;
67 static unsigned long long ll_capa_renewal_noent;
68 static unsigned long long ll_capa_renewal_failed;
69 static unsigned long long ll_capa_renewal_retries;
70
71 static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa);
72
73 static inline void update_capa_timer(struct obd_capa *ocapa, unsigned long expiry)
74 {
75 if (time_before(expiry, ll_capa_timer.expires) ||
76 !timer_pending(&ll_capa_timer)) {
77 mod_timer(&ll_capa_timer, expiry);
78 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
79 "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
80 }
81 }
82
83 static inline unsigned long capa_renewal_time(struct obd_capa *ocapa)
84 {
85 return cfs_time_sub(ocapa->c_expiry,
86 cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
87 }
88
89 static inline int capa_is_to_expire(struct obd_capa *ocapa)
90 {
91 return time_before_eq(capa_renewal_time(ocapa), cfs_time_current());
92 }
93
94 static inline int have_expired_capa(void)
95 {
96 struct obd_capa *ocapa = NULL;
97 int expired = 0;
98
99 /* if ll_capa_list has client capa to expire or ll_idle_capas has
100 * expired capa, return 1.
101 */
102 spin_lock(&capa_lock);
103 if (!list_empty(ll_capa_list)) {
104 ocapa = list_entry(ll_capa_list->next, struct obd_capa,
105 c_list);
106 expired = capa_is_to_expire(ocapa);
107 if (!expired)
108 update_capa_timer(ocapa, capa_renewal_time(ocapa));
109 } else if (!list_empty(&ll_idle_capas)) {
110 ocapa = list_entry(ll_idle_capas.next, struct obd_capa,
111 c_list);
112 expired = capa_is_expired(ocapa);
113 if (!expired)
114 update_capa_timer(ocapa, ocapa->c_expiry);
115 }
116 spin_unlock(&capa_lock);
117
118 if (expired)
119 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
120 return expired;
121 }
122
123 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
124 {
125 struct obd_capa *tmp;
126 struct list_head *before = NULL;
127
128 /* TODO: client capa is sorted by expiry, this could be optimized */
129 list_for_each_entry_reverse(tmp, head, c_list) {
130 if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
131 before = &tmp->c_list;
132 break;
133 }
134 }
135
136 LASSERT(&ocapa->c_list != before);
137 list_add(&ocapa->c_list, before ?: head);
138 }
139
140 static inline int obd_capa_open_count(struct obd_capa *oc)
141 {
142 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
143 return atomic_read(&lli->lli_open_count);
144 }
145
146 static void ll_delete_capa(struct obd_capa *ocapa)
147 {
148 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
149
150 if (capa_for_mds(&ocapa->c_capa)) {
151 LASSERT(lli->lli_mds_capa == ocapa);
152 lli->lli_mds_capa = NULL;
153 } else if (capa_for_oss(&ocapa->c_capa)) {
154 list_del_init(&ocapa->u.cli.lli_list);
155 }
156
157 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
158 list_del_init(&ocapa->c_list);
159 capa_count[CAPA_SITE_CLIENT]--;
160 /* release the ref when alloc */
161 capa_put(ocapa);
162 }
163
164 /* three places where client capa is deleted:
165 * 1. capa_thread_main(), main place to delete expired capa.
166 * 2. ll_clear_inode_capas() in ll_clear_inode().
167 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost().
168 */
169 static int capa_thread_main(void *unused)
170 {
171 struct obd_capa *ocapa, *tmp, *next;
172 struct inode *inode = NULL;
173 struct l_wait_info lwi = { 0 };
174 int rc;
175
176 thread_set_flags(&ll_capa_thread, SVC_RUNNING);
177 wake_up(&ll_capa_thread.t_ctl_waitq);
178
179 while (1) {
180 l_wait_event(ll_capa_thread.t_ctl_waitq,
181 !thread_is_running(&ll_capa_thread) ||
182 have_expired_capa(),
183 &lwi);
184
185 if (!thread_is_running(&ll_capa_thread))
186 break;
187
188 next = NULL;
189
190 spin_lock(&capa_lock);
191 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
192 __u64 ibits;
193
194 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
195
196 if (!capa_is_to_expire(ocapa)) {
197 next = ocapa;
198 break;
199 }
200
201 list_del_init(&ocapa->c_list);
202
203 /* for MDS capability, only renew those which belong to
204 * dir, or its inode is opened, or client holds LOOKUP
205 * lock.
206 */
207 /* ibits may be changed by ll_have_md_lock() so we have
208 * to set it each time */
209 ibits = MDS_INODELOCK_LOOKUP;
210 if (capa_for_mds(&ocapa->c_capa) &&
211 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
212 obd_capa_open_count(ocapa) == 0 &&
213 !ll_have_md_lock(ocapa->u.cli.inode,
214 &ibits, LCK_MINMODE)) {
215 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
216 "skip renewal for");
217 sort_add_capa(ocapa, &ll_idle_capas);
218 continue;
219 }
220
221 /* for OSS capability, only renew those whose inode is
222 * opened.
223 */
224 if (capa_for_oss(&ocapa->c_capa) &&
225 obd_capa_open_count(ocapa) == 0) {
226 /* oss capa with open count == 0 won't renew,
227 * move to idle list */
228 sort_add_capa(ocapa, &ll_idle_capas);
229 continue;
230 }
231
232 /* NB iput() is in ll_update_capa() */
233 inode = igrab(ocapa->u.cli.inode);
234 if (inode == NULL) {
235 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
236 "igrab failed for");
237 continue;
238 }
239
240 capa_get(ocapa);
241 ll_capa_renewed++;
242 spin_unlock(&capa_lock);
243 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
244 ll_update_capa);
245 spin_lock(&capa_lock);
246 if (rc) {
247 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
248 "renew failed: %d", rc);
249 ll_capa_renewal_failed++;
250 }
251 }
252
253 if (next)
254 update_capa_timer(next, capa_renewal_time(next));
255
256 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas,
257 c_list) {
258 if (!capa_is_expired(ocapa)) {
259 if (!next)
260 update_capa_timer(ocapa,
261 ocapa->c_expiry);
262 break;
263 }
264
265 if (atomic_read(&ocapa->c_refc) > 1) {
266 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
267 "expired(c_refc %d), don't release",
268 atomic_read(&ocapa->c_refc));
269 /* don't try to renew any more */
270 list_del_init(&ocapa->c_list);
271 continue;
272 }
273
274 /* expired capa is released. */
275 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
276 ll_delete_capa(ocapa);
277 }
278
279 spin_unlock(&capa_lock);
280 }
281
282 thread_set_flags(&ll_capa_thread, SVC_STOPPED);
283 wake_up(&ll_capa_thread.t_ctl_waitq);
284 return 0;
285 }
286
287 void ll_capa_timer_callback(unsigned long unused)
288 {
289 wake_up(&ll_capa_thread.t_ctl_waitq);
290 }
291
292 int ll_capa_thread_start(void)
293 {
294 struct task_struct *task;
295
296 init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
297
298 task = kthread_run(capa_thread_main, NULL, "ll_capa");
299 if (IS_ERR(task)) {
300 CERROR("cannot start expired capa thread: rc %ld\n",
301 PTR_ERR(task));
302 return PTR_ERR(task);
303 }
304 wait_event(ll_capa_thread.t_ctl_waitq,
305 thread_is_running(&ll_capa_thread));
306
307 return 0;
308 }
309
310 void ll_capa_thread_stop(void)
311 {
312 thread_set_flags(&ll_capa_thread, SVC_STOPPING);
313 wake_up(&ll_capa_thread.t_ctl_waitq);
314 wait_event(ll_capa_thread.t_ctl_waitq,
315 thread_is_stopped(&ll_capa_thread));
316 }
317
318 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
319 {
320 struct ll_inode_info *lli = ll_i2info(inode);
321 struct obd_capa *ocapa;
322 int found = 0;
323
324 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
325 return NULL;
326
327 LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
328 opc == CAPA_OPC_OSS_TRUNC);
329
330 spin_lock(&capa_lock);
331 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
332 if (capa_is_expired(ocapa))
333 continue;
334 if ((opc & CAPA_OPC_OSS_WRITE) &&
335 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
336 found = 1;
337 break;
338 } else if ((opc & CAPA_OPC_OSS_READ) &&
339 capa_opc_supported(&ocapa->c_capa,
340 CAPA_OPC_OSS_READ)) {
341 found = 1;
342 break;
343 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
344 capa_opc_supported(&ocapa->c_capa, opc)) {
345 found = 1;
346 break;
347 }
348 }
349
350 if (found) {
351 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
352 ll_inode2fid(inode)));
353 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
354
355 capa_get(ocapa);
356
357 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
358 } else {
359 ocapa = NULL;
360
361 if (atomic_read(&ll_capa_debug)) {
362 CERROR("no capability for "DFID" opc %#llx\n",
363 PFID(&lli->lli_fid), opc);
364 atomic_set(&ll_capa_debug, 0);
365 }
366 }
367 spin_unlock(&capa_lock);
368
369 return ocapa;
370 }
371 EXPORT_SYMBOL(ll_osscapa_get);
372
373 struct obd_capa *ll_mdscapa_get(struct inode *inode)
374 {
375 struct ll_inode_info *lli = ll_i2info(inode);
376 struct obd_capa *ocapa;
377
378 LASSERT(inode != NULL);
379
380 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
381 return NULL;
382
383 spin_lock(&capa_lock);
384 ocapa = capa_get(lli->lli_mds_capa);
385 spin_unlock(&capa_lock);
386 if (!ocapa && atomic_read(&ll_capa_debug)) {
387 CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
388 atomic_set(&ll_capa_debug, 0);
389 }
390
391 return ocapa;
392 }
393
394 static struct obd_capa *do_add_mds_capa(struct inode *inode,
395 struct obd_capa *ocapa)
396 {
397 struct ll_inode_info *lli = ll_i2info(inode);
398 struct obd_capa *old = lli->lli_mds_capa;
399 struct lustre_capa *capa = &ocapa->c_capa;
400
401 if (!old) {
402 ocapa->u.cli.inode = inode;
403 lli->lli_mds_capa = ocapa;
404 capa_count[CAPA_SITE_CLIENT]++;
405
406 DEBUG_CAPA(D_SEC, capa, "add MDS");
407 } else {
408 spin_lock(&old->c_lock);
409 old->c_capa = *capa;
410 spin_unlock(&old->c_lock);
411
412 DEBUG_CAPA(D_SEC, capa, "update MDS");
413
414 capa_put(ocapa);
415 ocapa = old;
416 }
417 return ocapa;
418 }
419
420 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
421 {
422 struct ll_inode_info *lli = ll_i2info(inode);
423 struct obd_capa *ocapa;
424
425 /* inside capa_lock */
426 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
427 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
428 continue;
429
430 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
431 ll_inode2fid(inode)));
432 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
433
434 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
435 return ocapa;
436 }
437
438 return NULL;
439 }
440
441 static inline void inode_add_oss_capa(struct inode *inode,
442 struct obd_capa *ocapa)
443 {
444 struct ll_inode_info *lli = ll_i2info(inode);
445 struct obd_capa *tmp;
446 struct list_head *next = NULL;
447
448 /* capa is sorted in lli_oss_capas so lookup can always find the
449 * latest one */
450 list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
451 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
452 next = &tmp->u.cli.lli_list;
453 break;
454 }
455 }
456 LASSERT(&ocapa->u.cli.lli_list != next);
457 list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
458 }
459
460 static struct obd_capa *do_add_oss_capa(struct inode *inode,
461 struct obd_capa *ocapa)
462 {
463 struct obd_capa *old;
464 struct lustre_capa *capa = &ocapa->c_capa;
465
466 LASSERTF(S_ISREG(inode->i_mode),
467 "inode has oss capa, but not regular file, mode: %d\n",
468 inode->i_mode);
469
470 /* FIXME: can't replace it so easily with fine-grained opc */
471 old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
472 if (!old) {
473 ocapa->u.cli.inode = inode;
474 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
475 capa_count[CAPA_SITE_CLIENT]++;
476
477 DEBUG_CAPA(D_SEC, capa, "add OSS");
478 } else {
479 spin_lock(&old->c_lock);
480 old->c_capa = *capa;
481 spin_unlock(&old->c_lock);
482
483 DEBUG_CAPA(D_SEC, capa, "update OSS");
484
485 capa_put(ocapa);
486 ocapa = old;
487 }
488
489 inode_add_oss_capa(inode, ocapa);
490 return ocapa;
491 }
492
493 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
494 {
495 spin_lock(&capa_lock);
496 ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
497 do_add_oss_capa(inode, ocapa);
498
499 /* truncate capa won't renew */
500 if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
501 set_capa_expiry(ocapa);
502 list_del_init(&ocapa->c_list);
503 sort_add_capa(ocapa, ll_capa_list);
504
505 update_capa_timer(ocapa, capa_renewal_time(ocapa));
506 }
507
508 spin_unlock(&capa_lock);
509
510 atomic_set(&ll_capa_debug, 1);
511 return ocapa;
512 }
513
514 static inline void delay_capa_renew(struct obd_capa *oc, unsigned long delay)
515 {
516 /* NB: set a fake expiry for this capa to prevent it renew too soon */
517 oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
518 }
519
520 static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
521 {
522 struct inode *inode = ocapa->u.cli.inode;
523 int rc = 0;
524
525 LASSERT(ocapa);
526
527 if (IS_ERR(capa)) {
528 /* set error code */
529 rc = PTR_ERR(capa);
530 spin_lock(&capa_lock);
531 if (rc == -ENOENT) {
532 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
533 "renewal canceled because object removed");
534 ll_capa_renewal_noent++;
535 } else {
536 ll_capa_renewal_failed++;
537
538 /* failed capa won't be renewed any longer, but if -EIO,
539 * client might be doing recovery, retry in 2 min. */
540 if (rc == -EIO && !capa_is_expired(ocapa)) {
541 delay_capa_renew(ocapa, 120);
542 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
543 "renewal failed: -EIO, "
544 "retry in 2 mins");
545 ll_capa_renewal_retries++;
546 goto retry;
547 } else {
548 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
549 "renewal failed(rc: %d) for", rc);
550 }
551 }
552
553 list_del_init(&ocapa->c_list);
554 sort_add_capa(ocapa, &ll_idle_capas);
555 spin_unlock(&capa_lock);
556
557 capa_put(ocapa);
558 iput(inode);
559 return rc;
560 }
561
562 spin_lock(&ocapa->c_lock);
563 LASSERT(!memcmp(&ocapa->c_capa, capa,
564 offsetof(struct lustre_capa, lc_opc)));
565 ocapa->c_capa = *capa;
566 set_capa_expiry(ocapa);
567 spin_unlock(&ocapa->c_lock);
568
569 spin_lock(&capa_lock);
570 if (capa_for_oss(capa))
571 inode_add_oss_capa(inode, ocapa);
572 DEBUG_CAPA(D_SEC, capa, "renew");
573 retry:
574 list_del_init(&ocapa->c_list);
575 sort_add_capa(ocapa, ll_capa_list);
576 update_capa_timer(ocapa, capa_renewal_time(ocapa));
577 spin_unlock(&capa_lock);
578
579 capa_put(ocapa);
580 iput(inode);
581 return rc;
582 }
583
584 void ll_capa_open(struct inode *inode)
585 {
586 struct ll_inode_info *lli = ll_i2info(inode);
587
588 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
589 == 0)
590 return;
591
592 if (!S_ISREG(inode->i_mode))
593 return;
594
595 atomic_inc(&lli->lli_open_count);
596 }
597
598 void ll_capa_close(struct inode *inode)
599 {
600 struct ll_inode_info *lli = ll_i2info(inode);
601
602 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
603 == 0)
604 return;
605
606 if (!S_ISREG(inode->i_mode))
607 return;
608
609 atomic_dec(&lli->lli_open_count);
610 }
611
612 /* delete CAPA_OPC_OSS_TRUNC only */
613 void ll_truncate_free_capa(struct obd_capa *ocapa)
614 {
615 if (!ocapa)
616 return;
617
618 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
619 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
620
621 /* release ref when find */
622 capa_put(ocapa);
623 if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
624 spin_lock(&capa_lock);
625 ll_delete_capa(ocapa);
626 spin_unlock(&capa_lock);
627 }
628 }
629
630 void ll_clear_inode_capas(struct inode *inode)
631 {
632 struct ll_inode_info *lli = ll_i2info(inode);
633 struct obd_capa *ocapa, *tmp;
634
635 spin_lock(&capa_lock);
636 ocapa = lli->lli_mds_capa;
637 if (ocapa)
638 ll_delete_capa(ocapa);
639
640 list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
641 u.cli.lli_list)
642 ll_delete_capa(ocapa);
643 spin_unlock(&capa_lock);
644 }
645
646 void ll_print_capa_stat(struct ll_sb_info *sbi)
647 {
648 if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
649 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
650 "Fid capabilities renewal ENOENT: %llu\n"
651 "Fid capabilities failed to renew: %llu\n"
652 "Fid capabilities renewal retries: %llu\n",
653 ll_capa_renewed, ll_capa_renewal_noent,
654 ll_capa_renewal_failed, ll_capa_renewal_retries);
655 }
This page took 0.06695 seconds and 5 git commands to generate.