4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 #define DEBUG_SUBSYSTEM S_CLASS
38 #include "../include/obd_support.h"
39 #include "../include/obd.h"
40 #include "../include/lprocfs_status.h"
41 #include "../include/lustre/lustre_idl.h"
42 #include "../include/lustre_net.h"
43 #include "../include/obd_class.h"
44 #include "ptlrpc_internal.h"
46 static struct ll_rpc_opcode
{
49 } ll_rpc_opcode_table
[LUSTRE_MAX_OPCODES
] = {
50 { OST_REPLY
, "ost_reply" },
51 { OST_GETATTR
, "ost_getattr" },
52 { OST_SETATTR
, "ost_setattr" },
53 { OST_READ
, "ost_read" },
54 { OST_WRITE
, "ost_write" },
55 { OST_CREATE
, "ost_create" },
56 { OST_DESTROY
, "ost_destroy" },
57 { OST_GET_INFO
, "ost_get_info" },
58 { OST_CONNECT
, "ost_connect" },
59 { OST_DISCONNECT
, "ost_disconnect" },
60 { OST_PUNCH
, "ost_punch" },
61 { OST_OPEN
, "ost_open" },
62 { OST_CLOSE
, "ost_close" },
63 { OST_STATFS
, "ost_statfs" },
64 { 14, NULL
}, /* formerly OST_SAN_READ */
65 { 15, NULL
}, /* formerly OST_SAN_WRITE */
66 { OST_SYNC
, "ost_sync" },
67 { OST_SET_INFO
, "ost_set_info" },
68 { OST_QUOTACHECK
, "ost_quotacheck" },
69 { OST_QUOTACTL
, "ost_quotactl" },
70 { OST_QUOTA_ADJUST_QUNIT
, "ost_quota_adjust_qunit" },
71 { MDS_GETATTR
, "mds_getattr" },
72 { MDS_GETATTR_NAME
, "mds_getattr_lock" },
73 { MDS_CLOSE
, "mds_close" },
74 { MDS_REINT
, "mds_reint" },
75 { MDS_READPAGE
, "mds_readpage" },
76 { MDS_CONNECT
, "mds_connect" },
77 { MDS_DISCONNECT
, "mds_disconnect" },
78 { MDS_GETSTATUS
, "mds_getstatus" },
79 { MDS_STATFS
, "mds_statfs" },
80 { MDS_PIN
, "mds_pin" },
81 { MDS_UNPIN
, "mds_unpin" },
82 { MDS_SYNC
, "mds_sync" },
83 { MDS_DONE_WRITING
, "mds_done_writing" },
84 { MDS_SET_INFO
, "mds_set_info" },
85 { MDS_QUOTACHECK
, "mds_quotacheck" },
86 { MDS_QUOTACTL
, "mds_quotactl" },
87 { MDS_GETXATTR
, "mds_getxattr" },
88 { MDS_SETXATTR
, "mds_setxattr" },
89 { MDS_WRITEPAGE
, "mds_writepage" },
90 { MDS_IS_SUBDIR
, "mds_is_subdir" },
91 { MDS_GET_INFO
, "mds_get_info" },
92 { MDS_HSM_STATE_GET
, "mds_hsm_state_get" },
93 { MDS_HSM_STATE_SET
, "mds_hsm_state_set" },
94 { MDS_HSM_ACTION
, "mds_hsm_action" },
95 { MDS_HSM_PROGRESS
, "mds_hsm_progress" },
96 { MDS_HSM_REQUEST
, "mds_hsm_request" },
97 { MDS_HSM_CT_REGISTER
, "mds_hsm_ct_register" },
98 { MDS_HSM_CT_UNREGISTER
, "mds_hsm_ct_unregister" },
99 { MDS_SWAP_LAYOUTS
, "mds_swap_layouts" },
100 { LDLM_ENQUEUE
, "ldlm_enqueue" },
101 { LDLM_CONVERT
, "ldlm_convert" },
102 { LDLM_CANCEL
, "ldlm_cancel" },
103 { LDLM_BL_CALLBACK
, "ldlm_bl_callback" },
104 { LDLM_CP_CALLBACK
, "ldlm_cp_callback" },
105 { LDLM_GL_CALLBACK
, "ldlm_gl_callback" },
106 { LDLM_SET_INFO
, "ldlm_set_info" },
107 { MGS_CONNECT
, "mgs_connect" },
108 { MGS_DISCONNECT
, "mgs_disconnect" },
109 { MGS_EXCEPTION
, "mgs_exception" },
110 { MGS_TARGET_REG
, "mgs_target_reg" },
111 { MGS_TARGET_DEL
, "mgs_target_del" },
112 { MGS_SET_INFO
, "mgs_set_info" },
113 { MGS_CONFIG_READ
, "mgs_config_read" },
114 { OBD_PING
, "obd_ping" },
115 { OBD_LOG_CANCEL
, "llog_cancel" },
116 { OBD_QC_CALLBACK
, "obd_quota_callback" },
117 { OBD_IDX_READ
, "dt_index_read" },
118 { LLOG_ORIGIN_HANDLE_CREATE
, "llog_origin_handle_open" },
119 { LLOG_ORIGIN_HANDLE_NEXT_BLOCK
, "llog_origin_handle_next_block" },
120 { LLOG_ORIGIN_HANDLE_READ_HEADER
, "llog_origin_handle_read_header" },
121 { LLOG_ORIGIN_HANDLE_WRITE_REC
, "llog_origin_handle_write_rec" },
122 { LLOG_ORIGIN_HANDLE_CLOSE
, "llog_origin_handle_close" },
123 { LLOG_ORIGIN_CONNECT
, "llog_origin_connect" },
124 { LLOG_CATINFO
, "llog_catinfo" },
125 { LLOG_ORIGIN_HANDLE_PREV_BLOCK
, "llog_origin_handle_prev_block" },
126 { LLOG_ORIGIN_HANDLE_DESTROY
, "llog_origin_handle_destroy" },
127 { QUOTA_DQACQ
, "quota_acquire" },
128 { QUOTA_DQREL
, "quota_release" },
129 { SEQ_QUERY
, "seq_query" },
130 { SEC_CTX_INIT
, "sec_ctx_init" },
131 { SEC_CTX_INIT_CONT
, "sec_ctx_init_cont" },
132 { SEC_CTX_FINI
, "sec_ctx_fini" },
133 { FLD_QUERY
, "fld_query" },
134 { FLD_READ
, "fld_read" },
137 static struct ll_eopcode
{
140 } ll_eopcode_table
[EXTRA_LAST_OPC
] = {
141 { LDLM_GLIMPSE_ENQUEUE
, "ldlm_glimpse_enqueue" },
142 { LDLM_PLAIN_ENQUEUE
, "ldlm_plain_enqueue" },
143 { LDLM_EXTENT_ENQUEUE
, "ldlm_extent_enqueue" },
144 { LDLM_FLOCK_ENQUEUE
, "ldlm_flock_enqueue" },
145 { LDLM_IBITS_ENQUEUE
, "ldlm_ibits_enqueue" },
146 { MDS_REINT_SETATTR
, "mds_reint_setattr" },
147 { MDS_REINT_CREATE
, "mds_reint_create" },
148 { MDS_REINT_LINK
, "mds_reint_link" },
149 { MDS_REINT_UNLINK
, "mds_reint_unlink" },
150 { MDS_REINT_RENAME
, "mds_reint_rename" },
151 { MDS_REINT_OPEN
, "mds_reint_open" },
152 { MDS_REINT_SETXATTR
, "mds_reint_setxattr" },
153 { BRW_READ_BYTES
, "read_bytes" },
154 { BRW_WRITE_BYTES
, "write_bytes" },
157 const char *ll_opcode2str(__u32 opcode
)
159 /* When one of the assertions below fail, chances are that:
160 * 1) A new opcode was added in include/lustre/lustre_idl.h,
161 * but is missing from the table above.
162 * or 2) The opcode space was renumbered or rearranged,
163 * and the opcode_offset() function in
164 * ptlrpc_internal.h needs to be modified.
166 __u32 offset
= opcode_offset(opcode
);
168 LASSERTF(offset
< LUSTRE_MAX_OPCODES
,
169 "offset %u >= LUSTRE_MAX_OPCODES %u\n",
170 offset
, LUSTRE_MAX_OPCODES
);
171 LASSERTF(ll_rpc_opcode_table
[offset
].opcode
== opcode
,
172 "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
173 offset
, ll_rpc_opcode_table
[offset
].opcode
, opcode
);
174 return ll_rpc_opcode_table
[offset
].opname
;
177 static const char *ll_eopcode2str(__u32 opcode
)
179 LASSERT(ll_eopcode_table
[opcode
].opcode
== opcode
);
180 return ll_eopcode_table
[opcode
].opname
;
184 ptlrpc_ldebugfs_register(struct dentry
*root
, char *dir
,
186 struct dentry
**debugfs_root_ret
,
187 struct lprocfs_stats
**stats_ret
)
189 struct dentry
*svc_debugfs_entry
;
190 struct lprocfs_stats
*svc_stats
;
192 unsigned int svc_counter_config
= LPROCFS_CNTR_AVGMINMAX
|
195 LASSERT(!*debugfs_root_ret
);
196 LASSERT(!*stats_ret
);
198 svc_stats
= lprocfs_alloc_stats(EXTRA_MAX_OPCODES
+LUSTRE_MAX_OPCODES
,
204 svc_debugfs_entry
= ldebugfs_register(dir
, root
, NULL
, NULL
);
205 if (IS_ERR(svc_debugfs_entry
)) {
206 lprocfs_free_stats(&svc_stats
);
210 svc_debugfs_entry
= root
;
213 lprocfs_counter_init(svc_stats
, PTLRPC_REQWAIT_CNTR
,
214 svc_counter_config
, "req_waittime", "usec");
215 lprocfs_counter_init(svc_stats
, PTLRPC_REQQDEPTH_CNTR
,
216 svc_counter_config
, "req_qdepth", "reqs");
217 lprocfs_counter_init(svc_stats
, PTLRPC_REQACTIVE_CNTR
,
218 svc_counter_config
, "req_active", "reqs");
219 lprocfs_counter_init(svc_stats
, PTLRPC_TIMEOUT
,
220 svc_counter_config
, "req_timeout", "sec");
221 lprocfs_counter_init(svc_stats
, PTLRPC_REQBUF_AVAIL_CNTR
,
222 svc_counter_config
, "reqbuf_avail", "bufs");
223 for (i
= 0; i
< EXTRA_LAST_OPC
; i
++) {
227 case BRW_WRITE_BYTES
:
235 lprocfs_counter_init(svc_stats
, PTLRPC_LAST_CNTR
+ i
,
237 ll_eopcode2str(i
), units
);
239 for (i
= 0; i
< LUSTRE_MAX_OPCODES
; i
++) {
240 __u32 opcode
= ll_rpc_opcode_table
[i
].opcode
;
242 lprocfs_counter_init(svc_stats
,
243 EXTRA_MAX_OPCODES
+ i
, svc_counter_config
,
244 ll_opcode2str(opcode
), "usec");
247 rc
= ldebugfs_register_stats(svc_debugfs_entry
, name
, svc_stats
);
250 ldebugfs_remove(&svc_debugfs_entry
);
251 lprocfs_free_stats(&svc_stats
);
254 *debugfs_root_ret
= svc_debugfs_entry
;
255 *stats_ret
= svc_stats
;
260 ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file
*m
, void *v
)
262 struct ptlrpc_service
*svc
= m
->private;
263 struct ptlrpc_service_part
*svcpt
;
267 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
268 total
+= svcpt
->scp_hist_nrqbds
;
270 seq_printf(m
, "%d\n", total
);
274 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len
);
277 ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file
*m
, void *n
)
279 struct ptlrpc_service
*svc
= m
->private;
280 struct ptlrpc_service_part
*svcpt
;
284 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
285 total
+= svc
->srv_hist_nrqbds_cpt_max
;
287 seq_printf(m
, "%d\n", total
);
292 ptlrpc_lprocfs_req_history_max_seq_write(struct file
*file
,
293 const char __user
*buffer
,
294 size_t count
, loff_t
*off
)
296 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
301 rc
= lprocfs_write_helper(buffer
, count
, &val
);
308 /* This sanity check is more of an insanity check; we can still
309 * hose a kernel by allowing the request history to grow too
312 bufpages
= (svc
->srv_buf_size
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
313 if (val
> totalram_pages
/ (2 * bufpages
))
316 spin_lock(&svc
->srv_lock
);
319 svc
->srv_hist_nrqbds_cpt_max
= 0;
321 svc
->srv_hist_nrqbds_cpt_max
= max(1, (val
/ svc
->srv_ncpts
));
323 spin_unlock(&svc
->srv_lock
);
328 LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max
);
330 static ssize_t
threads_min_show(struct kobject
*kobj
, struct attribute
*attr
,
333 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
336 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
);
339 static ssize_t
threads_min_store(struct kobject
*kobj
, struct attribute
*attr
,
340 const char *buffer
, size_t count
)
342 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
345 int rc
= kstrtoul(buffer
, 10, &val
);
350 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
353 spin_lock(&svc
->srv_lock
);
354 if (val
> svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
) {
355 spin_unlock(&svc
->srv_lock
);
359 svc
->srv_nthrs_cpt_init
= val
/ svc
->srv_ncpts
;
361 spin_unlock(&svc
->srv_lock
);
365 LUSTRE_RW_ATTR(threads_min
);
367 static ssize_t
threads_started_show(struct kobject
*kobj
,
368 struct attribute
*attr
,
371 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
373 struct ptlrpc_service_part
*svcpt
;
377 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
378 total
+= svcpt
->scp_nthrs_running
;
380 return sprintf(buf
, "%d\n", total
);
382 LUSTRE_RO_ATTR(threads_started
);
384 static ssize_t
threads_max_show(struct kobject
*kobj
, struct attribute
*attr
,
387 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
390 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
);
393 static ssize_t
threads_max_store(struct kobject
*kobj
, struct attribute
*attr
,
394 const char *buffer
, size_t count
)
396 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
399 int rc
= kstrtoul(buffer
, 10, &val
);
404 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
407 spin_lock(&svc
->srv_lock
);
408 if (val
< svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
) {
409 spin_unlock(&svc
->srv_lock
);
413 svc
->srv_nthrs_cpt_limit
= val
/ svc
->srv_ncpts
;
415 spin_unlock(&svc
->srv_lock
);
419 LUSTRE_RW_ATTR(threads_max
);
427 * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
429 * \param[in] state The policy state
431 static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state
)
436 case NRS_POL_STATE_INVALID
:
438 case NRS_POL_STATE_STOPPED
:
440 case NRS_POL_STATE_STOPPING
:
442 case NRS_POL_STATE_STARTING
:
444 case NRS_POL_STATE_STARTED
:
450 * Obtains status information for \a policy.
452 * Information is copied in \a info.
454 * \param[in] policy The policy
455 * \param[out] info Holds returned status information
457 static void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy
*policy
,
458 struct ptlrpc_nrs_pol_info
*info
)
460 assert_spin_locked(&policy
->pol_nrs
->nrs_lock
);
462 memcpy(info
->pi_name
, policy
->pol_desc
->pd_name
, NRS_POL_NAME_MAX
);
464 info
->pi_fallback
= !!(policy
->pol_flags
& PTLRPC_NRS_FL_FALLBACK
);
465 info
->pi_state
= policy
->pol_state
;
467 * XXX: These are accessed without holding
468 * ptlrpc_service_part::scp_req_lock.
470 info
->pi_req_queued
= policy
->pol_req_queued
;
471 info
->pi_req_started
= policy
->pol_req_started
;
475 * Reads and prints policy status information for all policies of a PTLRPC
478 static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file
*m
, void *n
)
480 struct ptlrpc_service
*svc
= m
->private;
481 struct ptlrpc_service_part
*svcpt
;
482 struct ptlrpc_nrs
*nrs
;
483 struct ptlrpc_nrs_policy
*policy
;
484 struct ptlrpc_nrs_pol_info
*infos
;
485 struct ptlrpc_nrs_pol_info tmp
;
487 unsigned pol_idx
= 0;
493 * Serialize NRS core lprocfs operations with policy registration/
496 mutex_lock(&nrs_core
.nrs_mutex
);
499 * Use the first service partition's regular NRS head in order to obtain
500 * the number of policies registered with NRS heads of this service. All
501 * service partitions will have the same number of policies.
503 nrs
= nrs_svcpt2nrs(svc
->srv_parts
[0], false);
505 spin_lock(&nrs
->nrs_lock
);
506 num_pols
= svc
->srv_parts
[0]->scp_nrs_reg
.nrs_num_pols
;
507 spin_unlock(&nrs
->nrs_lock
);
509 infos
= kcalloc(num_pols
, sizeof(*infos
), GFP_NOFS
);
516 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
517 nrs
= nrs_svcpt2nrs(svcpt
, hp
);
518 spin_lock(&nrs
->nrs_lock
);
522 list_for_each_entry(policy
, &nrs
->nrs_policy_list
, pol_list
) {
523 LASSERT(pol_idx
< num_pols
);
525 nrs_policy_get_info_locked(policy
, &tmp
);
527 * Copy values when handling the first service
531 memcpy(infos
[pol_idx
].pi_name
, tmp
.pi_name
,
533 memcpy(&infos
[pol_idx
].pi_state
, &tmp
.pi_state
,
534 sizeof(tmp
.pi_state
));
535 infos
[pol_idx
].pi_fallback
= tmp
.pi_fallback
;
537 * For the rest of the service partitions
538 * sanity-check the values we get.
541 LASSERT(strncmp(infos
[pol_idx
].pi_name
,
543 NRS_POL_NAME_MAX
) == 0);
545 * Not asserting ptlrpc_nrs_pol_info::pi_state,
546 * because it may be different between
547 * instances of the same policy in different
548 * service partitions.
550 LASSERT(infos
[pol_idx
].pi_fallback
==
554 infos
[pol_idx
].pi_req_queued
+= tmp
.pi_req_queued
;
555 infos
[pol_idx
].pi_req_started
+= tmp
.pi_req_started
;
559 spin_unlock(&nrs
->nrs_lock
);
563 * Policy status information output is in YAML format.
579 * high_priority_requests:
592 seq_printf(m
, "%s\n",
593 !hp
? "\nregular_requests:" : "high_priority_requests:");
595 for (pol_idx
= 0; pol_idx
< num_pols
; pol_idx
++) {
596 seq_printf(m
, " - name: %s\n"
600 " active: %-20d\n\n",
601 infos
[pol_idx
].pi_name
,
602 nrs_state2str(infos
[pol_idx
].pi_state
),
603 infos
[pol_idx
].pi_fallback
? "yes" : "no",
604 (int)infos
[pol_idx
].pi_req_queued
,
605 (int)infos
[pol_idx
].pi_req_started
);
608 if (!hp
&& nrs_svc_has_hp(svc
)) {
609 memset(infos
, 0, num_pols
* sizeof(*infos
));
612 * Redo the processing for the service's HP NRS heads' policies.
620 mutex_unlock(&nrs_core
.nrs_mutex
);
626 * The longest valid command string is the maximum policy name size, plus the
627 * length of the " reg" substring
629 #define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1)
632 * Starts and stops a given policy on a PTLRPC service.
634 * Commands consist of the policy name, followed by an optional [reg|hp] token;
635 * if the optional token is omitted, the operation is performed on both the
636 * regular and high-priority (if the service has one) NRS head.
638 static ssize_t
ptlrpc_lprocfs_nrs_seq_write(struct file
*file
,
639 const char __user
*buffer
,
640 size_t count
, loff_t
*off
)
642 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
643 enum ptlrpc_nrs_queue_type queue
= PTLRPC_NRS_QUEUE_BOTH
;
645 char *cmd_copy
= NULL
;
649 if (count
>= LPROCFS_NRS_WR_MAX_CMD
)
652 cmd
= kzalloc(LPROCFS_NRS_WR_MAX_CMD
, GFP_NOFS
);
656 * strsep() modifies its argument, so keep a copy
660 if (copy_from_user(cmd
, buffer
, count
)) {
667 token
= strsep(&cmd
, " ");
669 if (strlen(token
) > NRS_POL_NAME_MAX
- 1) {
675 * No [reg|hp] token has been specified
681 * The second token is either NULL, or an optional [reg|hp] string
683 if (strcmp(cmd
, "reg") == 0) {
684 queue
= PTLRPC_NRS_QUEUE_REG
;
685 } else if (strcmp(cmd
, "hp") == 0) {
686 queue
= PTLRPC_NRS_QUEUE_HP
;
694 if (queue
== PTLRPC_NRS_QUEUE_HP
&& !nrs_svc_has_hp(svc
)) {
697 } else if (queue
== PTLRPC_NRS_QUEUE_BOTH
&& !nrs_svc_has_hp(svc
)) {
698 queue
= PTLRPC_NRS_QUEUE_REG
;
702 * Serialize NRS core lprocfs operations with policy registration/
705 mutex_lock(&nrs_core
.nrs_mutex
);
707 rc
= ptlrpc_nrs_policy_control(svc
, queue
, token
, PTLRPC_NRS_CTL_START
,
710 mutex_unlock(&nrs_core
.nrs_mutex
);
714 return rc
< 0 ? rc
: count
;
717 LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs
);
721 struct ptlrpc_srh_iterator
{
724 struct ptlrpc_request
*srhi_req
;
728 ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part
*svcpt
,
729 struct ptlrpc_srh_iterator
*srhi
,
733 struct ptlrpc_request
*req
;
735 if (srhi
->srhi_req
&& srhi
->srhi_seq
> svcpt
->scp_hist_seq_culled
&&
736 srhi
->srhi_seq
<= seq
) {
737 /* If srhi_req was set previously, hasn't been culled and
738 * we're searching for a seq on or after it (i.e. more
739 * recent), search from it onwards.
740 * Since the service history is LRU (i.e. culled reqs will
741 * be near the head), we shouldn't have to do long
744 LASSERTF(srhi
->srhi_seq
== srhi
->srhi_req
->rq_history_seq
,
745 "%s:%d: seek seq %llu, request seq %llu\n",
746 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
747 srhi
->srhi_seq
, srhi
->srhi_req
->rq_history_seq
);
748 LASSERTF(!list_empty(&svcpt
->scp_hist_reqs
),
749 "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n",
750 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
751 seq
, srhi
->srhi_seq
, svcpt
->scp_hist_seq_culled
);
752 e
= &srhi
->srhi_req
->rq_history_list
;
754 /* search from start */
755 e
= svcpt
->scp_hist_reqs
.next
;
758 while (e
!= &svcpt
->scp_hist_reqs
) {
759 req
= list_entry(e
, struct ptlrpc_request
, rq_history_list
);
761 if (req
->rq_history_seq
>= seq
) {
762 srhi
->srhi_seq
= req
->rq_history_seq
;
763 srhi
->srhi_req
= req
;
773 * ptlrpc history sequence is used as "position" of seq_file, in some case,
774 * seq_read() will increase "position" to indicate reading the next
775 * element, however, low bits of history sequence are reserved for CPT id
776 * (check the details from comments before ptlrpc_req_add_history), which
777 * means seq_read() might change CPT id of history sequence and never
778 * finish reading of requests on a CPT. To make it work, we have to shift
779 * CPT id to high bits and timestamp to low bits, so seq_read() will only
780 * increase timestamp which can correctly indicate the next position.
783 /* convert seq_file pos to cpt */
784 #define PTLRPC_REQ_POS2CPT(svc, pos) \
785 ((svc)->srv_cpt_bits == 0 ? 0 : \
786 (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
788 /* make up seq_file pos from cpt */
789 #define PTLRPC_REQ_CPT2POS(svc, cpt) \
790 ((svc)->srv_cpt_bits == 0 ? 0 : \
791 (cpt) << (64 - (svc)->srv_cpt_bits))
793 /* convert sequence to position */
794 #define PTLRPC_REQ_SEQ2POS(svc, seq) \
795 ((svc)->srv_cpt_bits == 0 ? (seq) : \
796 ((seq) >> (svc)->srv_cpt_bits) | \
797 ((seq) << (64 - (svc)->srv_cpt_bits)))
799 /* convert position to sequence */
800 #define PTLRPC_REQ_POS2SEQ(svc, pos) \
801 ((svc)->srv_cpt_bits == 0 ? (pos) : \
802 ((__u64)(pos) << (svc)->srv_cpt_bits) | \
803 ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
806 ptlrpc_lprocfs_svc_req_history_start(struct seq_file
*s
, loff_t
*pos
)
808 struct ptlrpc_service
*svc
= s
->private;
809 struct ptlrpc_service_part
*svcpt
;
810 struct ptlrpc_srh_iterator
*srhi
;
815 if (sizeof(loff_t
) != sizeof(__u64
)) { /* can't support */
816 CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n",
817 (int)sizeof(loff_t
));
821 srhi
= kzalloc(sizeof(*srhi
), GFP_NOFS
);
826 srhi
->srhi_req
= NULL
;
828 cpt
= PTLRPC_REQ_POS2CPT(svc
, *pos
);
830 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
831 if (i
< cpt
) /* skip */
833 if (i
> cpt
) /* make up the lowest position for this CPT */
834 *pos
= PTLRPC_REQ_CPT2POS(svc
, i
);
836 spin_lock(&svcpt
->scp_lock
);
837 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
,
838 PTLRPC_REQ_POS2SEQ(svc
, *pos
));
839 spin_unlock(&svcpt
->scp_lock
);
841 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
852 ptlrpc_lprocfs_svc_req_history_stop(struct seq_file
*s
, void *iter
)
854 struct ptlrpc_srh_iterator
*srhi
= iter
;
860 ptlrpc_lprocfs_svc_req_history_next(struct seq_file
*s
,
861 void *iter
, loff_t
*pos
)
863 struct ptlrpc_service
*svc
= s
->private;
864 struct ptlrpc_srh_iterator
*srhi
= iter
;
865 struct ptlrpc_service_part
*svcpt
;
870 for (i
= srhi
->srhi_idx
; i
< svc
->srv_ncpts
; i
++) {
871 svcpt
= svc
->srv_parts
[i
];
873 if (i
> srhi
->srhi_idx
) { /* reset iterator for a new CPT */
874 srhi
->srhi_req
= NULL
;
877 } else { /* the next sequence */
878 seq
= srhi
->srhi_seq
+ (1 << svc
->srv_cpt_bits
);
881 spin_lock(&svcpt
->scp_lock
);
882 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, seq
);
883 spin_unlock(&svcpt
->scp_lock
);
885 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
895 static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file
*s
, void *iter
)
897 struct ptlrpc_service
*svc
= s
->private;
898 struct ptlrpc_srh_iterator
*srhi
= iter
;
899 struct ptlrpc_service_part
*svcpt
;
900 struct ptlrpc_request
*req
;
903 LASSERT(srhi
->srhi_idx
< svc
->srv_ncpts
);
905 svcpt
= svc
->srv_parts
[srhi
->srhi_idx
];
907 spin_lock(&svcpt
->scp_lock
);
909 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, srhi
->srhi_seq
);
912 char nidstr
[LNET_NIDSTR_SIZE
];
914 req
= srhi
->srhi_req
;
916 libcfs_nid2str_r(req
->rq_self
, nidstr
, sizeof(nidstr
));
917 /* Print common req fields.
918 * CAVEAT EMPTOR: we're racing with the service handler
919 * here. The request could contain any old crap, so you
920 * must be just as careful as the service's request
921 * parser. Currently I only print stuff here I know is OK
922 * to look at coz it was set up in request_in_callback()!!!
924 seq_printf(s
, "%lld:%s:%s:x%llu:%d:%s:%lld:%lds(%+lds) ",
925 req
->rq_history_seq
, nidstr
,
926 libcfs_id2str(req
->rq_peer
), req
->rq_xid
,
927 req
->rq_reqlen
, ptlrpc_rqphase2str(req
),
928 (s64
)req
->rq_arrival_time
.tv_sec
,
929 (long)(req
->rq_sent
- req
->rq_arrival_time
.tv_sec
),
930 (long)(req
->rq_sent
- req
->rq_deadline
));
931 if (!svc
->srv_ops
.so_req_printer
)
934 svc
->srv_ops
.so_req_printer(s
, srhi
->srhi_req
);
937 spin_unlock(&svcpt
->scp_lock
);
942 ptlrpc_lprocfs_svc_req_history_open(struct inode
*inode
, struct file
*file
)
944 static struct seq_operations sops
= {
945 .start
= ptlrpc_lprocfs_svc_req_history_start
,
946 .stop
= ptlrpc_lprocfs_svc_req_history_stop
,
947 .next
= ptlrpc_lprocfs_svc_req_history_next
,
948 .show
= ptlrpc_lprocfs_svc_req_history_show
,
950 struct seq_file
*seqf
;
953 rc
= seq_open(file
, &sops
);
957 seqf
= file
->private_data
;
958 seqf
->private = inode
->i_private
;
962 /* See also lprocfs_rd_timeouts */
963 static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file
*m
, void *n
)
965 struct ptlrpc_service
*svc
= m
->private;
966 struct ptlrpc_service_part
*svcpt
;
974 seq_printf(m
, "adaptive timeouts off, using obd_timeout %u\n",
979 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
980 cur
= at_get(&svcpt
->scp_at_estimate
);
981 worst
= svcpt
->scp_at_estimate
.at_worst_ever
;
982 worstt
= svcpt
->scp_at_estimate
.at_worst_time
;
983 s2dhms(&ts
, ktime_get_real_seconds() - worstt
);
985 seq_printf(m
, "%10s : cur %3u worst %3u (at %lld, "
986 DHMS_FMT
" ago) ", "service",
987 cur
, worst
, (s64
)worstt
, DHMS_VARS(&ts
));
989 lprocfs_at_hist_helper(m
, &svcpt
->scp_at_estimate
);
995 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts
);
997 static ssize_t
high_priority_ratio_show(struct kobject
*kobj
,
998 struct attribute
*attr
,
1001 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1003 return sprintf(buf
, "%d\n", svc
->srv_hpreq_ratio
);
1006 static ssize_t
high_priority_ratio_store(struct kobject
*kobj
,
1007 struct attribute
*attr
,
1011 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1016 rc
= kstrtoint(buffer
, 10, &val
);
1023 spin_lock(&svc
->srv_lock
);
1024 svc
->srv_hpreq_ratio
= val
;
1025 spin_unlock(&svc
->srv_lock
);
1029 LUSTRE_RW_ATTR(high_priority_ratio
);
1031 static struct attribute
*ptlrpc_svc_attrs
[] = {
1032 &lustre_attr_threads_min
.attr
,
1033 &lustre_attr_threads_started
.attr
,
1034 &lustre_attr_threads_max
.attr
,
1035 &lustre_attr_high_priority_ratio
.attr
,
1039 static void ptlrpc_sysfs_svc_release(struct kobject
*kobj
)
1041 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1044 complete(&svc
->srv_kobj_unregister
);
1047 static struct kobj_type ptlrpc_svc_ktype
= {
1048 .default_attrs
= ptlrpc_svc_attrs
,
1049 .sysfs_ops
= &lustre_sysfs_ops
,
1050 .release
= ptlrpc_sysfs_svc_release
,
1053 void ptlrpc_sysfs_unregister_service(struct ptlrpc_service
*svc
)
1055 /* Let's see if we had a chance at initialization first */
1056 if (svc
->srv_kobj
.kset
) {
1057 kobject_put(&svc
->srv_kobj
);
1058 wait_for_completion(&svc
->srv_kobj_unregister
);
1062 int ptlrpc_sysfs_register_service(struct kset
*parent
,
1063 struct ptlrpc_service
*svc
)
1067 svc
->srv_kobj
.kset
= parent
;
1068 init_completion(&svc
->srv_kobj_unregister
);
1069 rc
= kobject_init_and_add(&svc
->srv_kobj
, &ptlrpc_svc_ktype
, NULL
,
1070 "%s", svc
->srv_name
);
1075 void ptlrpc_ldebugfs_register_service(struct dentry
*entry
,
1076 struct ptlrpc_service
*svc
)
1078 struct lprocfs_vars lproc_vars
[] = {
1079 {.name
= "req_buffer_history_len",
1080 .fops
= &ptlrpc_lprocfs_req_history_len_fops
,
1082 {.name
= "req_buffer_history_max",
1083 .fops
= &ptlrpc_lprocfs_req_history_max_fops
,
1085 {.name
= "timeouts",
1086 .fops
= &ptlrpc_lprocfs_timeouts_fops
,
1088 {.name
= "nrs_policies",
1089 .fops
= &ptlrpc_lprocfs_nrs_fops
,
1093 static const struct file_operations req_history_fops
= {
1094 .owner
= THIS_MODULE
,
1095 .open
= ptlrpc_lprocfs_svc_req_history_open
,
1097 .llseek
= seq_lseek
,
1098 .release
= lprocfs_seq_release
,
1103 ptlrpc_ldebugfs_register(entry
, svc
->srv_name
,
1104 "stats", &svc
->srv_debugfs_entry
,
1107 if (IS_ERR_OR_NULL(svc
->srv_debugfs_entry
))
1110 ldebugfs_add_vars(svc
->srv_debugfs_entry
, lproc_vars
, NULL
);
1112 rc
= ldebugfs_seq_create(svc
->srv_debugfs_entry
, "req_history",
1113 0400, &req_history_fops
, svc
);
1115 CWARN("Error adding the req_history file\n");
1118 void ptlrpc_lprocfs_register_obd(struct obd_device
*obddev
)
1120 ptlrpc_ldebugfs_register(obddev
->obd_debugfs_entry
, NULL
, "stats",
1121 &obddev
->obd_svc_debugfs_entry
,
1122 &obddev
->obd_svc_stats
);
1124 EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd
);
1126 void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request
*req
, long amount
)
1128 struct lprocfs_stats
*svc_stats
;
1129 __u32 op
= lustre_msg_get_opc(req
->rq_reqmsg
);
1130 int opc
= opcode_offset(op
);
1132 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1133 if (!svc_stats
|| opc
<= 0)
1135 LASSERT(opc
< LUSTRE_MAX_OPCODES
);
1136 if (!(op
== LDLM_ENQUEUE
|| op
== MDS_REINT
))
1137 lprocfs_counter_add(svc_stats
, opc
+ EXTRA_MAX_OPCODES
, amount
);
1140 void ptlrpc_lprocfs_brw(struct ptlrpc_request
*req
, int bytes
)
1142 struct lprocfs_stats
*svc_stats
;
1145 if (!req
->rq_import
)
1147 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1150 idx
= lustre_msg_get_opc(req
->rq_reqmsg
);
1153 idx
= BRW_READ_BYTES
+ PTLRPC_LAST_CNTR
;
1156 idx
= BRW_WRITE_BYTES
+ PTLRPC_LAST_CNTR
;
1159 LASSERTF(0, "unsupported opcode %u\n", idx
);
1163 lprocfs_counter_add(svc_stats
, idx
, bytes
);
1166 EXPORT_SYMBOL(ptlrpc_lprocfs_brw
);
1168 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service
*svc
)
1170 if (!IS_ERR_OR_NULL(svc
->srv_debugfs_entry
))
1171 ldebugfs_remove(&svc
->srv_debugfs_entry
);
1174 lprocfs_free_stats(&svc
->srv_stats
);
1177 void ptlrpc_lprocfs_unregister_obd(struct obd_device
*obd
)
1179 if (!IS_ERR_OR_NULL(obd
->obd_svc_debugfs_entry
))
1180 ldebugfs_remove(&obd
->obd_svc_debugfs_entry
);
1182 if (obd
->obd_svc_stats
)
1183 lprocfs_free_stats(&obd
->obd_svc_stats
);
1185 EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd
);
1189 int lprocfs_wr_ping(struct file
*file
, const char __user
*buffer
,
1190 size_t count
, loff_t
*off
)
1192 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1193 struct ptlrpc_request
*req
;
1196 rc
= lprocfs_climp_check(obd
);
1200 req
= ptlrpc_prep_ping(obd
->u
.cli
.cl_import
);
1201 up_read(&obd
->u
.cli
.cl_sem
);
1205 req
->rq_send_state
= LUSTRE_IMP_FULL
;
1207 rc
= ptlrpc_queue_wait(req
);
1209 ptlrpc_req_finished(req
);
1214 EXPORT_SYMBOL(lprocfs_wr_ping
);
1216 /* Write the connection UUID to this file to attempt to connect to that node.
1217 * The connection UUID is a node's primary NID. For example,
1218 * "echo connection=192.168.0.1@tcp0::instance > .../import".
1220 int lprocfs_wr_import(struct file
*file
, const char __user
*buffer
,
1221 size_t count
, loff_t
*off
)
1223 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1224 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1229 const char prefix
[] = "connection=";
1230 const int prefix_len
= sizeof(prefix
) - 1;
1232 if (count
> PAGE_SIZE
- 1 || count
<= prefix_len
)
1235 kbuf
= kzalloc(count
+ 1, GFP_NOFS
);
1239 if (copy_from_user(kbuf
, buffer
, count
)) {
1246 /* only support connection=uuid::instance now */
1247 if (strncmp(prefix
, kbuf
, prefix_len
) != 0) {
1252 uuid
= kbuf
+ prefix_len
;
1253 ptr
= strstr(uuid
, "::");
1260 ptr
+= strlen("::");
1261 inst
= simple_strtoul(ptr
, &endptr
, 10);
1263 CERROR("config: wrong instance # %s\n", ptr
);
1264 } else if (inst
!= imp
->imp_connect_data
.ocd_instance
) {
1265 CDEBUG(D_INFO
, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n",
1266 imp
->imp_obd
->obd_name
,
1267 imp
->imp_connect_data
.ocd_instance
, inst
);
1270 CDEBUG(D_INFO
, "IR: %s has already been connecting to new target(%u)\n",
1271 imp
->imp_obd
->obd_name
, inst
);
1276 ptlrpc_recover_import(imp
, uuid
, 1);
1282 EXPORT_SYMBOL(lprocfs_wr_import
);
1284 int lprocfs_rd_pinger_recov(struct seq_file
*m
, void *n
)
1286 struct obd_device
*obd
= m
->private;
1287 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1290 rc
= lprocfs_climp_check(obd
);
1294 seq_printf(m
, "%d\n", !imp
->imp_no_pinger_recover
);
1295 up_read(&obd
->u
.cli
.cl_sem
);
1299 EXPORT_SYMBOL(lprocfs_rd_pinger_recov
);
1301 int lprocfs_wr_pinger_recov(struct file
*file
, const char __user
*buffer
,
1302 size_t count
, loff_t
*off
)
1304 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1305 struct client_obd
*cli
= &obd
->u
.cli
;
1306 struct obd_import
*imp
= cli
->cl_import
;
1309 rc
= lprocfs_write_helper(buffer
, count
, &val
);
1313 if (val
!= 0 && val
!= 1)
1316 rc
= lprocfs_climp_check(obd
);
1320 spin_lock(&imp
->imp_lock
);
1321 imp
->imp_no_pinger_recover
= !val
;
1322 spin_unlock(&imp
->imp_lock
);
1323 up_read(&obd
->u
.cli
.cl_sem
);
1327 EXPORT_SYMBOL(lprocfs_wr_pinger_recov
);