4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * libcfs/libcfs/tracefile.c
38 * Author: Zach Brown <zab@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_LNET
43 #define LUSTRE_TRACEFILE_PRIVATE
44 #include "tracefile.h"
46 #include "../../include/linux/libcfs/libcfs.h"
48 /* XXX move things up to the top, comment */
49 union cfs_trace_data_union (*cfs_trace_data
[TCD_MAX_TYPES
])[NR_CPUS
] __cacheline_aligned
;
51 char cfs_tracefile
[TRACEFILE_NAME_SIZE
];
52 long long cfs_tracefile_size
= CFS_TRACEFILE_SIZE
;
53 static struct tracefiled_ctl trace_tctl
;
54 static DEFINE_MUTEX(cfs_trace_thread_mutex
);
55 static int thread_running
;
57 static atomic_t cfs_tage_allocated
= ATOMIC_INIT(0);
59 static void put_pages_on_tcd_daemon_list(struct page_collection
*pc
,
60 struct cfs_trace_cpu_data
*tcd
);
62 static inline struct cfs_trace_page
*
63 cfs_tage_from_list(struct list_head
*list
)
65 return list_entry(list
, struct cfs_trace_page
, linkage
);
68 static struct cfs_trace_page
*cfs_tage_alloc(gfp_t gfp
)
71 struct cfs_trace_page
*tage
;
73 /* My caller is trying to free memory */
74 if (!in_interrupt() && memory_pressure_get())
78 * Don't spam console with allocation failures: they will be reported
79 * by upper layer anyway.
82 page
= alloc_page(gfp
);
86 tage
= kmalloc(sizeof(*tage
), gfp
);
93 atomic_inc(&cfs_tage_allocated
);
97 static void cfs_tage_free(struct cfs_trace_page
*tage
)
99 __LASSERT(tage
!= NULL
);
100 __LASSERT(tage
->page
!= NULL
);
102 __free_page(tage
->page
);
104 atomic_dec(&cfs_tage_allocated
);
107 static void cfs_tage_to_tail(struct cfs_trace_page
*tage
,
108 struct list_head
*queue
)
110 __LASSERT(tage
!= NULL
);
111 __LASSERT(queue
!= NULL
);
113 list_move_tail(&tage
->linkage
, queue
);
116 int cfs_trace_refill_stock(struct cfs_trace_cpu_data
*tcd
, gfp_t gfp
,
117 struct list_head
*stock
)
122 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
123 * from here: this will lead to infinite recursion.
126 for (i
= 0; i
+ tcd
->tcd_cur_stock_pages
< TCD_STOCK_PAGES
; ++i
) {
127 struct cfs_trace_page
*tage
;
129 tage
= cfs_tage_alloc(gfp
);
132 list_add_tail(&tage
->linkage
, stock
);
137 /* return a page that has 'len' bytes left at the end */
138 static struct cfs_trace_page
*
139 cfs_trace_get_tage_try(struct cfs_trace_cpu_data
*tcd
, unsigned long len
)
141 struct cfs_trace_page
*tage
;
143 if (tcd
->tcd_cur_pages
> 0) {
144 __LASSERT(!list_empty(&tcd
->tcd_pages
));
145 tage
= cfs_tage_from_list(tcd
->tcd_pages
.prev
);
146 if (tage
->used
+ len
<= PAGE_CACHE_SIZE
)
150 if (tcd
->tcd_cur_pages
< tcd
->tcd_max_pages
) {
151 if (tcd
->tcd_cur_stock_pages
> 0) {
152 tage
= cfs_tage_from_list(tcd
->tcd_stock_pages
.prev
);
153 --tcd
->tcd_cur_stock_pages
;
154 list_del_init(&tage
->linkage
);
156 tage
= cfs_tage_alloc(GFP_ATOMIC
);
157 if (unlikely(tage
== NULL
)) {
158 if ((!memory_pressure_get() ||
159 in_interrupt()) && printk_ratelimit())
161 "cannot allocate a tage (%ld)\n",
168 tage
->cpu
= smp_processor_id();
169 tage
->type
= tcd
->tcd_type
;
170 list_add_tail(&tage
->linkage
, &tcd
->tcd_pages
);
171 tcd
->tcd_cur_pages
++;
173 if (tcd
->tcd_cur_pages
> 8 && thread_running
) {
174 struct tracefiled_ctl
*tctl
= &trace_tctl
;
176 * wake up tracefiled to process some pages.
178 wake_up(&tctl
->tctl_waitq
);
185 static void cfs_tcd_shrink(struct cfs_trace_cpu_data
*tcd
)
187 int pgcount
= tcd
->tcd_cur_pages
/ 10;
188 struct page_collection pc
;
189 struct cfs_trace_page
*tage
;
190 struct cfs_trace_page
*tmp
;
193 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
194 * from here: this will lead to infinite recursion.
197 if (printk_ratelimit())
198 printk(KERN_WARNING
"debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
199 pgcount
+ 1, tcd
->tcd_cur_pages
);
201 INIT_LIST_HEAD(&pc
.pc_pages
);
203 list_for_each_entry_safe(tage
, tmp
, &tcd
->tcd_pages
, linkage
) {
207 list_move_tail(&tage
->linkage
, &pc
.pc_pages
);
208 tcd
->tcd_cur_pages
--;
210 put_pages_on_tcd_daemon_list(&pc
, tcd
);
213 /* return a page that has 'len' bytes left at the end */
214 static struct cfs_trace_page
*cfs_trace_get_tage(struct cfs_trace_cpu_data
*tcd
,
217 struct cfs_trace_page
*tage
;
220 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
221 * from here: this will lead to infinite recursion.
224 if (len
> PAGE_CACHE_SIZE
) {
225 pr_err("cowardly refusing to write %lu bytes in a page\n", len
);
229 tage
= cfs_trace_get_tage_try(tcd
, len
);
234 if (tcd
->tcd_cur_pages
> 0) {
235 tage
= cfs_tage_from_list(tcd
->tcd_pages
.next
);
237 cfs_tage_to_tail(tage
, &tcd
->tcd_pages
);
242 int libcfs_debug_msg(struct libcfs_debug_msg_data
*msgdata
,
243 const char *format
, ...)
248 va_start(args
, format
);
249 rc
= libcfs_debug_vmsg2(msgdata
, format
, args
, NULL
);
254 EXPORT_SYMBOL(libcfs_debug_msg
);
256 int libcfs_debug_vmsg2(struct libcfs_debug_msg_data
*msgdata
,
257 const char *format1
, va_list args
,
258 const char *format2
, ...)
260 struct cfs_trace_cpu_data
*tcd
= NULL
;
261 struct ptldebug_header header
= {0};
262 struct cfs_trace_page
*tage
;
263 /* string_buf is used only if tcd != NULL, and is always set then */
264 char *string_buf
= NULL
;
267 int needed
= 85; /* average message length */
273 int mask
= msgdata
->msg_mask
;
274 const char *file
= kbasename(msgdata
->msg_file
);
275 struct cfs_debug_limit_state
*cdls
= msgdata
->msg_cdls
;
277 tcd
= cfs_trace_get_tcd();
279 /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
280 * pins us to a particular CPU. This avoids an smp_processor_id()
281 * warning on Linux when debugging is enabled. */
282 cfs_set_ptldebug_header(&header
, msgdata
, CDEBUG_STACK());
284 if (tcd
== NULL
) /* arch may not log in IRQ context */
287 if (tcd
->tcd_cur_pages
== 0)
288 header
.ph_flags
|= PH_FLAG_FIRST_RECORD
;
290 if (tcd
->tcd_shutting_down
) {
291 cfs_trace_put_tcd(tcd
);
296 depth
= __current_nesting_level();
297 known_size
= strlen(file
) + 1 + depth
;
299 known_size
+= strlen(msgdata
->msg_fn
) + 1;
301 if (libcfs_debug_binary
)
302 known_size
+= sizeof(header
);
305 * '2' used because vsnprintf return real size required for output
306 * _without_ terminating NULL.
307 * if needed is to small for this format.
309 for (i
= 0; i
< 2; i
++) {
310 tage
= cfs_trace_get_tage(tcd
, needed
+ known_size
+ 1);
312 if (needed
+ known_size
> PAGE_CACHE_SIZE
)
315 cfs_trace_put_tcd(tcd
);
320 string_buf
= (char *)page_address(tage
->page
) +
321 tage
->used
+ known_size
;
323 max_nob
= PAGE_CACHE_SIZE
- tage
->used
- known_size
;
325 printk(KERN_EMERG
"negative max_nob: %d\n",
328 cfs_trace_put_tcd(tcd
);
336 needed
= vsnprintf(string_buf
, max_nob
, format1
, ap
);
341 remain
= max_nob
- needed
;
345 va_start(ap
, format2
);
346 needed
+= vsnprintf(string_buf
+ needed
, remain
,
351 if (needed
< max_nob
) /* well. printing ok.. */
355 if (*(string_buf
+needed
-1) != '\n')
356 printk(KERN_INFO
"format at %s:%d:%s doesn't end in newline\n",
357 file
, msgdata
->msg_line
, msgdata
->msg_fn
);
359 header
.ph_len
= known_size
+ needed
;
360 debug_buf
= (char *)page_address(tage
->page
) + tage
->used
;
362 if (libcfs_debug_binary
) {
363 memcpy(debug_buf
, &header
, sizeof(header
));
364 tage
->used
+= sizeof(header
);
365 debug_buf
+= sizeof(header
);
368 /* indent message according to the nesting level */
369 while (depth
-- > 0) {
370 *(debug_buf
++) = '.';
374 strcpy(debug_buf
, file
);
375 tage
->used
+= strlen(file
) + 1;
376 debug_buf
+= strlen(file
) + 1;
378 if (msgdata
->msg_fn
) {
379 strcpy(debug_buf
, msgdata
->msg_fn
);
380 tage
->used
+= strlen(msgdata
->msg_fn
) + 1;
381 debug_buf
+= strlen(msgdata
->msg_fn
) + 1;
384 __LASSERT(debug_buf
== string_buf
);
386 tage
->used
+= needed
;
387 __LASSERT (tage
->used
<= PAGE_CACHE_SIZE
);
390 if ((mask
& libcfs_printk
) == 0) {
391 /* no console output requested */
393 cfs_trace_put_tcd(tcd
);
398 if (libcfs_console_ratelimit
&&
399 cdls
->cdls_next
!= 0 && /* not first time ever */
400 !cfs_time_after(cfs_time_current(), cdls
->cdls_next
)) {
401 /* skipping a console message */
404 cfs_trace_put_tcd(tcd
);
408 if (cfs_time_after(cfs_time_current(), cdls
->cdls_next
+
409 libcfs_console_max_delay
410 + cfs_time_seconds(10))) {
411 /* last timeout was a long time ago */
412 cdls
->cdls_delay
/= libcfs_console_backoff
* 4;
414 cdls
->cdls_delay
*= libcfs_console_backoff
;
417 if (cdls
->cdls_delay
< libcfs_console_min_delay
)
418 cdls
->cdls_delay
= libcfs_console_min_delay
;
419 else if (cdls
->cdls_delay
> libcfs_console_max_delay
)
420 cdls
->cdls_delay
= libcfs_console_max_delay
;
422 /* ensure cdls_next is never zero after it's been seen */
423 cdls
->cdls_next
= (cfs_time_current() + cdls
->cdls_delay
) | 1;
427 cfs_print_to_console(&header
, mask
, string_buf
, needed
, file
,
429 cfs_trace_put_tcd(tcd
);
431 string_buf
= cfs_trace_get_console_buffer();
434 if (format1
!= NULL
) {
436 needed
= vsnprintf(string_buf
,
437 CFS_TRACE_CONSOLE_BUFFER_SIZE
,
441 if (format2
!= NULL
) {
442 remain
= CFS_TRACE_CONSOLE_BUFFER_SIZE
- needed
;
444 va_start(ap
, format2
);
445 needed
+= vsnprintf(string_buf
+needed
, remain
,
450 cfs_print_to_console(&header
, mask
,
451 string_buf
, needed
, file
, msgdata
->msg_fn
);
456 if (cdls
!= NULL
&& cdls
->cdls_count
!= 0) {
457 string_buf
= cfs_trace_get_console_buffer();
459 needed
= snprintf(string_buf
, CFS_TRACE_CONSOLE_BUFFER_SIZE
,
460 "Skipped %d previous similar message%s\n",
462 (cdls
->cdls_count
> 1) ? "s" : "");
464 cfs_print_to_console(&header
, mask
,
465 string_buf
, needed
, file
, msgdata
->msg_fn
);
468 cdls
->cdls_count
= 0;
473 EXPORT_SYMBOL(libcfs_debug_vmsg2
);
476 cfs_trace_assertion_failed(const char *str
,
477 struct libcfs_debug_msg_data
*msgdata
)
479 struct ptldebug_header hdr
;
481 libcfs_panic_in_progress
= 1;
482 libcfs_catastrophe
= 1;
485 cfs_set_ptldebug_header(&hdr
, msgdata
, CDEBUG_STACK());
487 cfs_print_to_console(&hdr
, D_EMERG
, str
, strlen(str
),
488 msgdata
->msg_file
, msgdata
->msg_fn
);
490 panic("Lustre debug assertion failure\n");
496 panic_collect_pages(struct page_collection
*pc
)
498 /* Do the collect_pages job on a single CPU: assumes that all other
499 * CPUs have been stopped during a panic. If this isn't true for some
500 * arch, this will have to be implemented separately in each arch. */
503 struct cfs_trace_cpu_data
*tcd
;
505 INIT_LIST_HEAD(&pc
->pc_pages
);
507 cfs_tcd_for_each(tcd
, i
, j
) {
508 list_splice_init(&tcd
->tcd_pages
, &pc
->pc_pages
);
509 tcd
->tcd_cur_pages
= 0;
511 if (pc
->pc_want_daemon_pages
) {
512 list_splice_init(&tcd
->tcd_daemon_pages
,
514 tcd
->tcd_cur_daemon_pages
= 0;
519 static void collect_pages_on_all_cpus(struct page_collection
*pc
)
521 struct cfs_trace_cpu_data
*tcd
;
524 for_each_possible_cpu(cpu
) {
525 cfs_tcd_for_each_type_lock(tcd
, i
, cpu
) {
526 list_splice_init(&tcd
->tcd_pages
, &pc
->pc_pages
);
527 tcd
->tcd_cur_pages
= 0;
528 if (pc
->pc_want_daemon_pages
) {
529 list_splice_init(&tcd
->tcd_daemon_pages
,
531 tcd
->tcd_cur_daemon_pages
= 0;
537 static void collect_pages(struct page_collection
*pc
)
539 INIT_LIST_HEAD(&pc
->pc_pages
);
541 if (libcfs_panic_in_progress
)
542 panic_collect_pages(pc
);
544 collect_pages_on_all_cpus(pc
);
547 static void put_pages_back_on_all_cpus(struct page_collection
*pc
)
549 struct cfs_trace_cpu_data
*tcd
;
550 struct list_head
*cur_head
;
551 struct cfs_trace_page
*tage
;
552 struct cfs_trace_page
*tmp
;
555 for_each_possible_cpu(cpu
) {
556 cfs_tcd_for_each_type_lock(tcd
, i
, cpu
) {
557 cur_head
= tcd
->tcd_pages
.next
;
559 list_for_each_entry_safe(tage
, tmp
, &pc
->pc_pages
,
562 __LASSERT_TAGE_INVARIANT(tage
);
564 if (tage
->cpu
!= cpu
|| tage
->type
!= i
)
567 cfs_tage_to_tail(tage
, cur_head
);
568 tcd
->tcd_cur_pages
++;
574 static void put_pages_back(struct page_collection
*pc
)
576 if (!libcfs_panic_in_progress
)
577 put_pages_back_on_all_cpus(pc
);
580 /* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
581 * we have a good amount of data at all times for dumping during an LBUG, even
582 * if we have been steadily writing (and otherwise discarding) pages via the
584 static void put_pages_on_tcd_daemon_list(struct page_collection
*pc
,
585 struct cfs_trace_cpu_data
*tcd
)
587 struct cfs_trace_page
*tage
;
588 struct cfs_trace_page
*tmp
;
590 list_for_each_entry_safe(tage
, tmp
, &pc
->pc_pages
, linkage
) {
592 __LASSERT_TAGE_INVARIANT(tage
);
594 if (tage
->cpu
!= tcd
->tcd_cpu
|| tage
->type
!= tcd
->tcd_type
)
597 cfs_tage_to_tail(tage
, &tcd
->tcd_daemon_pages
);
598 tcd
->tcd_cur_daemon_pages
++;
600 if (tcd
->tcd_cur_daemon_pages
> tcd
->tcd_max_pages
) {
601 struct cfs_trace_page
*victim
;
603 __LASSERT(!list_empty(&tcd
->tcd_daemon_pages
));
604 victim
= cfs_tage_from_list(tcd
->tcd_daemon_pages
.next
);
606 __LASSERT_TAGE_INVARIANT(victim
);
608 list_del(&victim
->linkage
);
609 cfs_tage_free(victim
);
610 tcd
->tcd_cur_daemon_pages
--;
615 static void put_pages_on_daemon_list(struct page_collection
*pc
)
617 struct cfs_trace_cpu_data
*tcd
;
620 for_each_possible_cpu(cpu
) {
621 cfs_tcd_for_each_type_lock(tcd
, i
, cpu
)
622 put_pages_on_tcd_daemon_list(pc
, tcd
);
626 void cfs_trace_debug_print(void)
628 struct page_collection pc
;
629 struct cfs_trace_page
*tage
;
630 struct cfs_trace_page
*tmp
;
632 pc
.pc_want_daemon_pages
= 1;
634 list_for_each_entry_safe(tage
, tmp
, &pc
.pc_pages
, linkage
) {
638 __LASSERT_TAGE_INVARIANT(tage
);
641 p
= page_address(page
);
642 while (p
< ((char *)page_address(page
) + tage
->used
)) {
643 struct ptldebug_header
*hdr
;
649 p
+= strlen(file
) + 1;
652 len
= hdr
->ph_len
- (int)(p
- (char *)hdr
);
654 cfs_print_to_console(hdr
, D_EMERG
, p
, len
, file
, fn
);
659 list_del(&tage
->linkage
);
664 int cfs_tracefile_dump_all_pages(char *filename
)
666 struct page_collection pc
;
668 struct cfs_trace_page
*tage
;
669 struct cfs_trace_page
*tmp
;
675 cfs_tracefile_write_lock();
677 filp
= filp_open(filename
, O_CREAT
|O_EXCL
|O_WRONLY
|O_LARGEFILE
, 0600);
681 pr_err("LustreError: can't open %s for dump: rc %d\n",
686 pc
.pc_want_daemon_pages
= 1;
688 if (list_empty(&pc
.pc_pages
)) {
693 /* ok, for now, just write the pages. in the future we'll be building
694 * iobufs with the pages and calling generic_direct_IO */
696 list_for_each_entry_safe(tage
, tmp
, &pc
.pc_pages
, linkage
) {
698 __LASSERT_TAGE_INVARIANT(tage
);
700 buf
= kmap(tage
->page
);
701 rc
= vfs_write(filp
, (__force
const char __user
*)buf
,
702 tage
->used
, &filp
->f_pos
);
705 if (rc
!= (int)tage
->used
) {
706 printk(KERN_WARNING
"wanted to write %u but wrote %d\n",
709 __LASSERT(list_empty(&pc
.pc_pages
));
712 list_del(&tage
->linkage
);
716 rc
= vfs_fsync(filp
, 1);
718 pr_err("sync returns %d\n", rc
);
720 filp_close(filp
, NULL
);
722 cfs_tracefile_write_unlock();
726 void cfs_trace_flush_pages(void)
728 struct page_collection pc
;
729 struct cfs_trace_page
*tage
;
730 struct cfs_trace_page
*tmp
;
732 pc
.pc_want_daemon_pages
= 1;
734 list_for_each_entry_safe(tage
, tmp
, &pc
.pc_pages
, linkage
) {
736 __LASSERT_TAGE_INVARIANT(tage
);
738 list_del(&tage
->linkage
);
743 int cfs_trace_copyin_string(char *knl_buffer
, int knl_buffer_nob
,
744 const char __user
*usr_buffer
, int usr_buffer_nob
)
748 if (usr_buffer_nob
> knl_buffer_nob
)
751 if (copy_from_user((void *)knl_buffer
,
752 usr_buffer
, usr_buffer_nob
))
755 nob
= strnlen(knl_buffer
, usr_buffer_nob
);
756 while (nob
-- >= 0) /* strip trailing whitespace */
757 if (!isspace(knl_buffer
[nob
]))
760 if (nob
< 0) /* empty string */
763 if (nob
== knl_buffer_nob
) /* no space to terminate */
766 knl_buffer
[nob
+ 1] = 0; /* terminate */
769 EXPORT_SYMBOL(cfs_trace_copyin_string
);
771 int cfs_trace_copyout_string(char __user
*usr_buffer
, int usr_buffer_nob
,
772 const char *knl_buffer
, char *append
)
774 /* NB if 'append' != NULL, it's a single character to append to the
775 * copied out string - usually "\n", for /proc entries and "" (i.e. a
776 * terminating zero byte) for sysctl entries */
777 int nob
= strlen(knl_buffer
);
779 if (nob
> usr_buffer_nob
)
780 nob
= usr_buffer_nob
;
782 if (copy_to_user(usr_buffer
, knl_buffer
, nob
))
785 if (append
!= NULL
&& nob
< usr_buffer_nob
) {
786 if (copy_to_user(usr_buffer
+ nob
, append
, 1))
794 EXPORT_SYMBOL(cfs_trace_copyout_string
);
796 int cfs_trace_allocate_string_buffer(char **str
, int nob
)
798 if (nob
> 2 * PAGE_CACHE_SIZE
) /* string must be "sensible" */
801 *str
= kmalloc(nob
, GFP_KERNEL
| __GFP_ZERO
);
808 int cfs_trace_dump_debug_buffer_usrstr(void __user
*usr_str
, int usr_str_nob
)
813 rc
= cfs_trace_allocate_string_buffer(&str
, usr_str_nob
+ 1);
817 rc
= cfs_trace_copyin_string(str
, usr_str_nob
+ 1,
818 usr_str
, usr_str_nob
);
826 rc
= cfs_tracefile_dump_all_pages(str
);
832 int cfs_trace_daemon_command(char *str
)
836 cfs_tracefile_write_lock();
838 if (strcmp(str
, "stop") == 0) {
839 cfs_tracefile_write_unlock();
840 cfs_trace_stop_thread();
841 cfs_tracefile_write_lock();
842 memset(cfs_tracefile
, 0, sizeof(cfs_tracefile
));
844 } else if (strncmp(str
, "size=", 5) == 0) {
845 cfs_tracefile_size
= simple_strtoul(str
+ 5, NULL
, 0);
846 if (cfs_tracefile_size
< 10 || cfs_tracefile_size
> 20480)
847 cfs_tracefile_size
= CFS_TRACEFILE_SIZE
;
849 cfs_tracefile_size
<<= 20;
851 } else if (strlen(str
) >= sizeof(cfs_tracefile
)) {
853 } else if (str
[0] != '/') {
856 strcpy(cfs_tracefile
, str
);
859 "Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n",
861 (long)(cfs_tracefile_size
>> 10));
863 cfs_trace_start_thread();
866 cfs_tracefile_write_unlock();
870 int cfs_trace_daemon_command_usrstr(void __user
*usr_str
, int usr_str_nob
)
875 rc
= cfs_trace_allocate_string_buffer(&str
, usr_str_nob
+ 1);
879 rc
= cfs_trace_copyin_string(str
, usr_str_nob
+ 1,
880 usr_str
, usr_str_nob
);
882 rc
= cfs_trace_daemon_command(str
);
888 int cfs_trace_set_debug_mb(int mb
)
893 int limit
= cfs_trace_max_debug_mb();
894 struct cfs_trace_cpu_data
*tcd
;
896 if (mb
< num_possible_cpus()) {
898 "Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n",
899 mb
, num_possible_cpus());
900 mb
= num_possible_cpus();
905 "Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n",
910 mb
/= num_possible_cpus();
911 pages
= mb
<< (20 - PAGE_CACHE_SHIFT
);
913 cfs_tracefile_write_lock();
915 cfs_tcd_for_each(tcd
, i
, j
)
916 tcd
->tcd_max_pages
= (pages
* tcd
->tcd_pages_factor
) / 100;
918 cfs_tracefile_write_unlock();
923 int cfs_trace_get_debug_mb(void)
927 struct cfs_trace_cpu_data
*tcd
;
930 cfs_tracefile_read_lock();
932 cfs_tcd_for_each(tcd
, i
, j
)
933 total_pages
+= tcd
->tcd_max_pages
;
935 cfs_tracefile_read_unlock();
937 return (total_pages
>> (20 - PAGE_CACHE_SHIFT
)) + 1;
940 static int tracefiled(void *arg
)
942 struct page_collection pc
;
943 struct tracefiled_ctl
*tctl
= arg
;
944 struct cfs_trace_page
*tage
;
945 struct cfs_trace_page
*tmp
;
953 /* we're started late enough that we pick up init's fs context */
954 /* this is so broken in uml? what on earth is going on? */
956 complete(&tctl
->tctl_start
);
961 pc
.pc_want_daemon_pages
= 0;
963 if (list_empty(&pc
.pc_pages
))
967 cfs_tracefile_read_lock();
968 if (cfs_tracefile
[0] != 0) {
969 filp
= filp_open(cfs_tracefile
,
970 O_CREAT
| O_RDWR
| O_LARGEFILE
,
975 printk(KERN_WARNING
"couldn't open %s: %d\n",
979 cfs_tracefile_read_unlock();
981 put_pages_on_daemon_list(&pc
);
982 __LASSERT(list_empty(&pc
.pc_pages
));
988 list_for_each_entry_safe(tage
, tmp
, &pc
.pc_pages
,
992 __LASSERT_TAGE_INVARIANT(tage
);
994 if (f_pos
>= (off_t
)cfs_tracefile_size
)
996 else if (f_pos
> i_size_read(file_inode(filp
)))
997 f_pos
= i_size_read(file_inode(filp
));
999 buf
= kmap(tage
->page
);
1000 rc
= vfs_write(filp
, (__force
const char __user
*)buf
,
1001 tage
->used
, &f_pos
);
1004 if (rc
!= (int)tage
->used
) {
1005 printk(KERN_WARNING
"wanted to write %u but wrote %d\n",
1007 put_pages_back(&pc
);
1008 __LASSERT(list_empty(&pc
.pc_pages
));
1014 filp_close(filp
, NULL
);
1015 put_pages_on_daemon_list(&pc
);
1016 if (!list_empty(&pc
.pc_pages
)) {
1019 printk(KERN_ALERT
"Lustre: trace pages aren't empty\n");
1020 pr_err("total cpus(%d): ",
1021 num_possible_cpus());
1022 for (i
= 0; i
< num_possible_cpus(); i
++)
1024 pr_cont("%d(on) ", i
);
1026 pr_cont("%d(off) ", i
);
1030 list_for_each_entry_safe(tage
, tmp
, &pc
.pc_pages
,
1032 pr_err("page %d belongs to cpu %d\n",
1034 pr_err("There are %d pages unwritten\n", i
);
1036 __LASSERT(list_empty(&pc
.pc_pages
));
1038 if (atomic_read(&tctl
->tctl_shutdown
)) {
1039 if (last_loop
== 0) {
1046 init_waitqueue_entry(&__wait
, current
);
1047 add_wait_queue(&tctl
->tctl_waitq
, &__wait
);
1048 set_current_state(TASK_INTERRUPTIBLE
);
1049 schedule_timeout(cfs_time_seconds(1));
1050 remove_wait_queue(&tctl
->tctl_waitq
, &__wait
);
1052 complete(&tctl
->tctl_stop
);
1056 int cfs_trace_start_thread(void)
1058 struct tracefiled_ctl
*tctl
= &trace_tctl
;
1061 mutex_lock(&cfs_trace_thread_mutex
);
1065 init_completion(&tctl
->tctl_start
);
1066 init_completion(&tctl
->tctl_stop
);
1067 init_waitqueue_head(&tctl
->tctl_waitq
);
1068 atomic_set(&tctl
->tctl_shutdown
, 0);
1070 if (IS_ERR(kthread_run(tracefiled
, tctl
, "ktracefiled"))) {
1075 wait_for_completion(&tctl
->tctl_start
);
1078 mutex_unlock(&cfs_trace_thread_mutex
);
1082 void cfs_trace_stop_thread(void)
1084 struct tracefiled_ctl
*tctl
= &trace_tctl
;
1086 mutex_lock(&cfs_trace_thread_mutex
);
1087 if (thread_running
) {
1089 "Lustre: shutting down debug daemon thread...\n");
1090 atomic_set(&tctl
->tctl_shutdown
, 1);
1091 wait_for_completion(&tctl
->tctl_stop
);
1094 mutex_unlock(&cfs_trace_thread_mutex
);
1097 int cfs_tracefile_init(int max_pages
)
1099 struct cfs_trace_cpu_data
*tcd
;
1105 rc
= cfs_tracefile_init_arch();
1109 cfs_tcd_for_each(tcd
, i
, j
) {
1110 /* tcd_pages_factor is initialized int tracefile_init_arch. */
1111 factor
= tcd
->tcd_pages_factor
;
1112 INIT_LIST_HEAD(&tcd
->tcd_pages
);
1113 INIT_LIST_HEAD(&tcd
->tcd_stock_pages
);
1114 INIT_LIST_HEAD(&tcd
->tcd_daemon_pages
);
1115 tcd
->tcd_cur_pages
= 0;
1116 tcd
->tcd_cur_stock_pages
= 0;
1117 tcd
->tcd_cur_daemon_pages
= 0;
1118 tcd
->tcd_max_pages
= (max_pages
* factor
) / 100;
1119 LASSERT(tcd
->tcd_max_pages
> 0);
1120 tcd
->tcd_shutting_down
= 0;
1126 static void trace_cleanup_on_all_cpus(void)
1128 struct cfs_trace_cpu_data
*tcd
;
1129 struct cfs_trace_page
*tage
;
1130 struct cfs_trace_page
*tmp
;
1133 for_each_possible_cpu(cpu
) {
1134 cfs_tcd_for_each_type_lock(tcd
, i
, cpu
) {
1135 tcd
->tcd_shutting_down
= 1;
1137 list_for_each_entry_safe(tage
, tmp
, &tcd
->tcd_pages
,
1139 __LASSERT_TAGE_INVARIANT(tage
);
1141 list_del(&tage
->linkage
);
1142 cfs_tage_free(tage
);
1145 tcd
->tcd_cur_pages
= 0;
1150 static void cfs_trace_cleanup(void)
1152 struct page_collection pc
;
1154 INIT_LIST_HEAD(&pc
.pc_pages
);
1156 trace_cleanup_on_all_cpus();
1158 cfs_tracefile_fini_arch();
1161 void cfs_tracefile_exit(void)
1163 cfs_trace_stop_thread();
1164 cfs_trace_cleanup();