staging: lustre: remove RETURN macro
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / llite_capa.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/llite_capa.c
37 *
38 * Author: Lai Siyao <lsy@clusterfs.com>
39 */
40
41 #define DEBUG_SUBSYSTEM S_LLITE
42
43 #include <linux/fs.h>
44 #include <asm/uaccess.h>
45 #include <linux/file.h>
46 #include <linux/kmod.h>
47
48 #include <lustre_lite.h>
49 #include "llite_internal.h"
50
51 /* for obd_capa.c_list, client capa might stay in three places:
52 * 1. ll_capa_list.
53 * 2. ll_idle_capas.
54 * 3. stand alone: just allocated.
55 */
56
57 /* capas for oss writeback and those failed to renew */
58 static LIST_HEAD(ll_idle_capas);
59 static struct ptlrpc_thread ll_capa_thread;
60 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
61
62 /* llite capa renewal timer */
63 struct timer_list ll_capa_timer;
64 /* for debug: indicate whether capa on llite is enabled or not */
65 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
66 static unsigned long long ll_capa_renewed = 0;
67 static unsigned long long ll_capa_renewal_noent = 0;
68 static unsigned long long ll_capa_renewal_failed = 0;
69 static unsigned long long ll_capa_renewal_retries = 0;
70
71 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
72 {
73 if (cfs_time_before(expiry, ll_capa_timer.expires) ||
74 !timer_pending(&ll_capa_timer)) {
75 mod_timer(&ll_capa_timer, expiry);
76 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
77 "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
78 }
79 }
80
81 static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
82 {
83 return cfs_time_sub(ocapa->c_expiry,
84 cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
85 }
86
87 static inline int capa_is_to_expire(struct obd_capa *ocapa)
88 {
89 return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
90 }
91
92 static inline int have_expired_capa(void)
93 {
94 struct obd_capa *ocapa = NULL;
95 int expired = 0;
96
97 /* if ll_capa_list has client capa to expire or ll_idle_capas has
98 * expired capa, return 1.
99 */
100 spin_lock(&capa_lock);
101 if (!list_empty(ll_capa_list)) {
102 ocapa = list_entry(ll_capa_list->next, struct obd_capa,
103 c_list);
104 expired = capa_is_to_expire(ocapa);
105 if (!expired)
106 update_capa_timer(ocapa, capa_renewal_time(ocapa));
107 } else if (!list_empty(&ll_idle_capas)) {
108 ocapa = list_entry(ll_idle_capas.next, struct obd_capa,
109 c_list);
110 expired = capa_is_expired(ocapa);
111 if (!expired)
112 update_capa_timer(ocapa, ocapa->c_expiry);
113 }
114 spin_unlock(&capa_lock);
115
116 if (expired)
117 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
118 return expired;
119 }
120
121 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
122 {
123 struct obd_capa *tmp;
124 struct list_head *before = NULL;
125
126 /* TODO: client capa is sorted by expiry, this could be optimized */
127 list_for_each_entry_reverse(tmp, head, c_list) {
128 if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
129 before = &tmp->c_list;
130 break;
131 }
132 }
133
134 LASSERT(&ocapa->c_list != before);
135 list_add(&ocapa->c_list, before ?: head);
136 }
137
138 static inline int obd_capa_open_count(struct obd_capa *oc)
139 {
140 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
141 return atomic_read(&lli->lli_open_count);
142 }
143
144 static void ll_delete_capa(struct obd_capa *ocapa)
145 {
146 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
147
148 if (capa_for_mds(&ocapa->c_capa)) {
149 LASSERT(lli->lli_mds_capa == ocapa);
150 lli->lli_mds_capa = NULL;
151 } else if (capa_for_oss(&ocapa->c_capa)) {
152 list_del_init(&ocapa->u.cli.lli_list);
153 }
154
155 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
156 list_del_init(&ocapa->c_list);
157 capa_count[CAPA_SITE_CLIENT]--;
158 /* release the ref when alloc */
159 capa_put(ocapa);
160 }
161
162 /* three places where client capa is deleted:
163 * 1. capa_thread_main(), main place to delete expired capa.
164 * 2. ll_clear_inode_capas() in ll_clear_inode().
165 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost().
166 */
167 static int capa_thread_main(void *unused)
168 {
169 struct obd_capa *ocapa, *tmp, *next;
170 struct inode *inode = NULL;
171 struct l_wait_info lwi = { 0 };
172 int rc;
173
174 thread_set_flags(&ll_capa_thread, SVC_RUNNING);
175 wake_up(&ll_capa_thread.t_ctl_waitq);
176
177 while (1) {
178 l_wait_event(ll_capa_thread.t_ctl_waitq,
179 !thread_is_running(&ll_capa_thread) ||
180 have_expired_capa(),
181 &lwi);
182
183 if (!thread_is_running(&ll_capa_thread))
184 break;
185
186 next = NULL;
187
188 spin_lock(&capa_lock);
189 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
190 __u64 ibits;
191
192 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
193
194 if (!capa_is_to_expire(ocapa)) {
195 next = ocapa;
196 break;
197 }
198
199 list_del_init(&ocapa->c_list);
200
201 /* for MDS capability, only renew those which belong to
202 * dir, or its inode is opened, or client holds LOOKUP
203 * lock.
204 */
205 /* ibits may be changed by ll_have_md_lock() so we have
206 * to set it each time */
207 ibits = MDS_INODELOCK_LOOKUP;
208 if (capa_for_mds(&ocapa->c_capa) &&
209 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
210 obd_capa_open_count(ocapa) == 0 &&
211 !ll_have_md_lock(ocapa->u.cli.inode,
212 &ibits, LCK_MINMODE)) {
213 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
214 "skip renewal for");
215 sort_add_capa(ocapa, &ll_idle_capas);
216 continue;
217 }
218
219 /* for OSS capability, only renew those whose inode is
220 * opened.
221 */
222 if (capa_for_oss(&ocapa->c_capa) &&
223 obd_capa_open_count(ocapa) == 0) {
224 /* oss capa with open count == 0 won't renew,
225 * move to idle list */
226 sort_add_capa(ocapa, &ll_idle_capas);
227 continue;
228 }
229
230 /* NB iput() is in ll_update_capa() */
231 inode = igrab(ocapa->u.cli.inode);
232 if (inode == NULL) {
233 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
234 "igrab failed for");
235 continue;
236 }
237
238 capa_get(ocapa);
239 ll_capa_renewed++;
240 spin_unlock(&capa_lock);
241 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
242 ll_update_capa);
243 spin_lock(&capa_lock);
244 if (rc) {
245 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
246 "renew failed: %d", rc);
247 ll_capa_renewal_failed++;
248 }
249 }
250
251 if (next)
252 update_capa_timer(next, capa_renewal_time(next));
253
254 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas,
255 c_list) {
256 if (!capa_is_expired(ocapa)) {
257 if (!next)
258 update_capa_timer(ocapa,
259 ocapa->c_expiry);
260 break;
261 }
262
263 if (atomic_read(&ocapa->c_refc) > 1) {
264 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
265 "expired(c_refc %d), don't release",
266 atomic_read(&ocapa->c_refc));
267 /* don't try to renew any more */
268 list_del_init(&ocapa->c_list);
269 continue;
270 }
271
272 /* expired capa is released. */
273 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
274 ll_delete_capa(ocapa);
275 }
276
277 spin_unlock(&capa_lock);
278 }
279
280 thread_set_flags(&ll_capa_thread, SVC_STOPPED);
281 wake_up(&ll_capa_thread.t_ctl_waitq);
282 return 0;
283 }
284
285 void ll_capa_timer_callback(unsigned long unused)
286 {
287 wake_up(&ll_capa_thread.t_ctl_waitq);
288 }
289
290 int ll_capa_thread_start(void)
291 {
292 task_t *task;
293
294 init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
295
296 task = kthread_run(capa_thread_main, NULL, "ll_capa");
297 if (IS_ERR(task)) {
298 CERROR("cannot start expired capa thread: rc %ld\n",
299 PTR_ERR(task));
300 return PTR_ERR(task);
301 }
302 wait_event(ll_capa_thread.t_ctl_waitq,
303 thread_is_running(&ll_capa_thread));
304
305 return 0;
306 }
307
308 void ll_capa_thread_stop(void)
309 {
310 thread_set_flags(&ll_capa_thread, SVC_STOPPING);
311 wake_up(&ll_capa_thread.t_ctl_waitq);
312 wait_event(ll_capa_thread.t_ctl_waitq,
313 thread_is_stopped(&ll_capa_thread));
314 }
315
316 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
317 {
318 struct ll_inode_info *lli = ll_i2info(inode);
319 struct obd_capa *ocapa;
320 int found = 0;
321
322 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
323 return NULL;
324
325 LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
326 opc == CAPA_OPC_OSS_TRUNC);
327
328 spin_lock(&capa_lock);
329 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
330 if (capa_is_expired(ocapa))
331 continue;
332 if ((opc & CAPA_OPC_OSS_WRITE) &&
333 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
334 found = 1;
335 break;
336 } else if ((opc & CAPA_OPC_OSS_READ) &&
337 capa_opc_supported(&ocapa->c_capa,
338 CAPA_OPC_OSS_READ)) {
339 found = 1;
340 break;
341 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
342 capa_opc_supported(&ocapa->c_capa, opc)) {
343 found = 1;
344 break;
345 }
346 }
347
348 if (found) {
349 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
350 ll_inode2fid(inode)));
351 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
352
353 capa_get(ocapa);
354
355 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
356 } else {
357 ocapa = NULL;
358
359 if (atomic_read(&ll_capa_debug)) {
360 CERROR("no capability for "DFID" opc "LPX64"\n",
361 PFID(&lli->lli_fid), opc);
362 atomic_set(&ll_capa_debug, 0);
363 }
364 }
365 spin_unlock(&capa_lock);
366
367 return ocapa;
368 }
369 EXPORT_SYMBOL(ll_osscapa_get);
370
371 struct obd_capa *ll_mdscapa_get(struct inode *inode)
372 {
373 struct ll_inode_info *lli = ll_i2info(inode);
374 struct obd_capa *ocapa;
375
376 LASSERT(inode != NULL);
377
378 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
379 return NULL;
380
381 spin_lock(&capa_lock);
382 ocapa = capa_get(lli->lli_mds_capa);
383 spin_unlock(&capa_lock);
384 if (!ocapa && atomic_read(&ll_capa_debug)) {
385 CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
386 atomic_set(&ll_capa_debug, 0);
387 }
388
389 return ocapa;
390 }
391
392 static struct obd_capa *do_add_mds_capa(struct inode *inode,
393 struct obd_capa *ocapa)
394 {
395 struct ll_inode_info *lli = ll_i2info(inode);
396 struct obd_capa *old = lli->lli_mds_capa;
397 struct lustre_capa *capa = &ocapa->c_capa;
398
399 if (!old) {
400 ocapa->u.cli.inode = inode;
401 lli->lli_mds_capa = ocapa;
402 capa_count[CAPA_SITE_CLIENT]++;
403
404 DEBUG_CAPA(D_SEC, capa, "add MDS");
405 } else {
406 spin_lock(&old->c_lock);
407 old->c_capa = *capa;
408 spin_unlock(&old->c_lock);
409
410 DEBUG_CAPA(D_SEC, capa, "update MDS");
411
412 capa_put(ocapa);
413 ocapa = old;
414 }
415 return ocapa;
416 }
417
418 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
419 {
420 struct ll_inode_info *lli = ll_i2info(inode);
421 struct obd_capa *ocapa;
422
423 /* inside capa_lock */
424 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
425 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
426 continue;
427
428 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
429 ll_inode2fid(inode)));
430 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
431
432 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
433 return ocapa;
434 }
435
436 return NULL;
437 }
438
439 static inline void inode_add_oss_capa(struct inode *inode,
440 struct obd_capa *ocapa)
441 {
442 struct ll_inode_info *lli = ll_i2info(inode);
443 struct obd_capa *tmp;
444 struct list_head *next = NULL;
445
446 /* capa is sorted in lli_oss_capas so lookup can always find the
447 * latest one */
448 list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
449 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
450 next = &tmp->u.cli.lli_list;
451 break;
452 }
453 }
454 LASSERT(&ocapa->u.cli.lli_list != next);
455 list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
456 }
457
458 static struct obd_capa *do_add_oss_capa(struct inode *inode,
459 struct obd_capa *ocapa)
460 {
461 struct obd_capa *old;
462 struct lustre_capa *capa = &ocapa->c_capa;
463
464 LASSERTF(S_ISREG(inode->i_mode),
465 "inode has oss capa, but not regular file, mode: %d\n",
466 inode->i_mode);
467
468 /* FIXME: can't replace it so easily with fine-grained opc */
469 old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
470 if (!old) {
471 ocapa->u.cli.inode = inode;
472 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
473 capa_count[CAPA_SITE_CLIENT]++;
474
475 DEBUG_CAPA(D_SEC, capa, "add OSS");
476 } else {
477 spin_lock(&old->c_lock);
478 old->c_capa = *capa;
479 spin_unlock(&old->c_lock);
480
481 DEBUG_CAPA(D_SEC, capa, "update OSS");
482
483 capa_put(ocapa);
484 ocapa = old;
485 }
486
487 inode_add_oss_capa(inode, ocapa);
488 return ocapa;
489 }
490
491 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
492 {
493 spin_lock(&capa_lock);
494 ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
495 do_add_oss_capa(inode, ocapa);
496
497 /* truncate capa won't renew */
498 if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
499 set_capa_expiry(ocapa);
500 list_del_init(&ocapa->c_list);
501 sort_add_capa(ocapa, ll_capa_list);
502
503 update_capa_timer(ocapa, capa_renewal_time(ocapa));
504 }
505
506 spin_unlock(&capa_lock);
507
508 atomic_set(&ll_capa_debug, 1);
509 return ocapa;
510 }
511
512 static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
513 {
514 /* NB: set a fake expiry for this capa to prevent it renew too soon */
515 oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
516 }
517
518 int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
519 {
520 struct inode *inode = ocapa->u.cli.inode;
521 int rc = 0;
522
523 LASSERT(ocapa);
524
525 if (IS_ERR(capa)) {
526 /* set error code */
527 rc = PTR_ERR(capa);
528 spin_lock(&capa_lock);
529 if (rc == -ENOENT) {
530 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
531 "renewal canceled because object removed");
532 ll_capa_renewal_noent++;
533 } else {
534 ll_capa_renewal_failed++;
535
536 /* failed capa won't be renewed any longer, but if -EIO,
537 * client might be doing recovery, retry in 2 min. */
538 if (rc == -EIO && !capa_is_expired(ocapa)) {
539 delay_capa_renew(ocapa, 120);
540 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
541 "renewal failed: -EIO, "
542 "retry in 2 mins");
543 ll_capa_renewal_retries++;
544 GOTO(retry, rc);
545 } else {
546 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
547 "renewal failed(rc: %d) for", rc);
548 }
549 }
550
551 list_del_init(&ocapa->c_list);
552 sort_add_capa(ocapa, &ll_idle_capas);
553 spin_unlock(&capa_lock);
554
555 capa_put(ocapa);
556 iput(inode);
557 return rc;
558 }
559
560 spin_lock(&ocapa->c_lock);
561 LASSERT(!memcmp(&ocapa->c_capa, capa,
562 offsetof(struct lustre_capa, lc_opc)));
563 ocapa->c_capa = *capa;
564 set_capa_expiry(ocapa);
565 spin_unlock(&ocapa->c_lock);
566
567 spin_lock(&capa_lock);
568 if (capa_for_oss(capa))
569 inode_add_oss_capa(inode, ocapa);
570 DEBUG_CAPA(D_SEC, capa, "renew");
571 retry:
572 list_del_init(&ocapa->c_list);
573 sort_add_capa(ocapa, ll_capa_list);
574 update_capa_timer(ocapa, capa_renewal_time(ocapa));
575 spin_unlock(&capa_lock);
576
577 capa_put(ocapa);
578 iput(inode);
579 return rc;
580 }
581
582 void ll_capa_open(struct inode *inode)
583 {
584 struct ll_inode_info *lli = ll_i2info(inode);
585
586 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
587 == 0)
588 return;
589
590 if (!S_ISREG(inode->i_mode))
591 return;
592
593 atomic_inc(&lli->lli_open_count);
594 }
595
596 void ll_capa_close(struct inode *inode)
597 {
598 struct ll_inode_info *lli = ll_i2info(inode);
599
600 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
601 == 0)
602 return;
603
604 if (!S_ISREG(inode->i_mode))
605 return;
606
607 atomic_dec(&lli->lli_open_count);
608 }
609
610 /* delete CAPA_OPC_OSS_TRUNC only */
611 void ll_truncate_free_capa(struct obd_capa *ocapa)
612 {
613 if (!ocapa)
614 return;
615
616 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
617 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
618
619 /* release ref when find */
620 capa_put(ocapa);
621 if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
622 spin_lock(&capa_lock);
623 ll_delete_capa(ocapa);
624 spin_unlock(&capa_lock);
625 }
626 }
627
628 void ll_clear_inode_capas(struct inode *inode)
629 {
630 struct ll_inode_info *lli = ll_i2info(inode);
631 struct obd_capa *ocapa, *tmp;
632
633 spin_lock(&capa_lock);
634 ocapa = lli->lli_mds_capa;
635 if (ocapa)
636 ll_delete_capa(ocapa);
637
638 list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
639 u.cli.lli_list)
640 ll_delete_capa(ocapa);
641 spin_unlock(&capa_lock);
642 }
643
644 void ll_print_capa_stat(struct ll_sb_info *sbi)
645 {
646 if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
647 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
648 "Fid capabilities renewal ENOENT: %llu\n"
649 "Fid capabilities failed to renew: %llu\n"
650 "Fid capabilities renewal retries: %llu\n",
651 ll_capa_renewed, ll_capa_renewal_noent,
652 ll_capa_renewal_failed, ll_capa_renewal_retries);
653 }
This page took 0.048483 seconds and 5 git commands to generate.