4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/pinger.c
38 * Portal-RPC reconnection and replay operations, for use in recovery.
41 #define DEBUG_SUBSYSTEM S_RPC
43 #include "../include/obd_support.h"
44 #include "../include/obd_class.h"
45 #include "ptlrpc_internal.h"
47 struct mutex pinger_mutex
;
48 static LIST_HEAD(pinger_imports
);
49 static struct list_head timeout_list
= LIST_HEAD_INIT(timeout_list
);
51 struct ptlrpc_request
*
52 ptlrpc_prep_ping(struct obd_import
*imp
)
54 struct ptlrpc_request
*req
;
56 req
= ptlrpc_request_alloc_pack(imp
, &RQF_OBD_PING
,
57 LUSTRE_OBD_VERSION
, OBD_PING
);
59 ptlrpc_request_set_replen(req
);
60 req
->rq_no_resend
= req
->rq_no_delay
= 1;
65 int ptlrpc_obd_ping(struct obd_device
*obd
)
68 struct ptlrpc_request
*req
;
70 req
= ptlrpc_prep_ping(obd
->u
.cli
.cl_import
);
74 req
->rq_send_state
= LUSTRE_IMP_FULL
;
76 rc
= ptlrpc_queue_wait(req
);
78 ptlrpc_req_finished(req
);
82 EXPORT_SYMBOL(ptlrpc_obd_ping
);
84 static int ptlrpc_ping(struct obd_import
*imp
)
86 struct ptlrpc_request
*req
;
88 req
= ptlrpc_prep_ping(imp
);
90 CERROR("OOM trying to ping %s->%s\n",
91 imp
->imp_obd
->obd_uuid
.uuid
,
92 obd2cli_tgt(imp
->imp_obd
));
96 DEBUG_REQ(D_INFO
, req
, "pinging %s->%s",
97 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
));
103 static void ptlrpc_update_next_ping(struct obd_import
*imp
, int soon
)
105 int time
= soon
? PING_INTERVAL_SHORT
: PING_INTERVAL
;
107 if (imp
->imp_state
== LUSTRE_IMP_DISCON
) {
108 int dtime
= max_t(int, CONNECTION_SWITCH_MIN
,
110 at_get(&imp
->imp_at
.iat_net_latency
));
111 time
= min(time
, dtime
);
113 imp
->imp_next_ping
= cfs_time_shift(time
);
116 static inline int imp_is_deactive(struct obd_import
*imp
)
118 return (imp
->imp_deactive
||
119 OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE
));
122 static inline int ptlrpc_next_reconnect(struct obd_import
*imp
)
124 if (imp
->imp_server_timeout
)
125 return cfs_time_shift(obd_timeout
/ 2);
127 return cfs_time_shift(obd_timeout
);
130 static long pinger_check_timeout(unsigned long time
)
132 struct timeout_item
*item
;
133 unsigned long timeout
= PING_INTERVAL
;
135 /* The timeout list is a increase order sorted list */
136 mutex_lock(&pinger_mutex
);
137 list_for_each_entry(item
, &timeout_list
, ti_chain
) {
138 int ti_timeout
= item
->ti_timeout
;
140 if (timeout
> ti_timeout
)
141 timeout
= ti_timeout
;
144 mutex_unlock(&pinger_mutex
);
146 return cfs_time_sub(cfs_time_add(time
, cfs_time_seconds(timeout
)),
152 void ptlrpc_pinger_ir_up(void)
154 CDEBUG(D_HA
, "IR up\n");
157 EXPORT_SYMBOL(ptlrpc_pinger_ir_up
);
159 void ptlrpc_pinger_ir_down(void)
161 CDEBUG(D_HA
, "IR down\n");
164 EXPORT_SYMBOL(ptlrpc_pinger_ir_down
);
166 static void ptlrpc_pinger_process_import(struct obd_import
*imp
,
167 unsigned long this_ping
)
174 spin_lock(&imp
->imp_lock
);
176 level
= imp
->imp_state
;
177 force
= imp
->imp_force_verify
;
178 force_next
= imp
->imp_force_next_verify
;
180 * This will be used below only if the import is "FULL".
182 suppress
= ir_up
&& OCD_HAS_FLAG(&imp
->imp_connect_data
, PINGLESS
);
184 imp
->imp_force_verify
= 0;
186 if (cfs_time_aftereq(imp
->imp_next_ping
- 5 * CFS_TICK
, this_ping
) &&
188 spin_unlock(&imp
->imp_lock
);
192 imp
->imp_force_next_verify
= 0;
194 spin_unlock(&imp
->imp_lock
);
196 CDEBUG(level
== LUSTRE_IMP_FULL
? D_INFO
: D_HA
, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n",
197 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
),
198 ptlrpc_import_state_name(level
), level
, force
, force_next
,
199 imp
->imp_deactive
, imp
->imp_pingable
, suppress
);
201 if (level
== LUSTRE_IMP_DISCON
&& !imp_is_deactive(imp
)) {
202 /* wait for a while before trying recovery again */
203 imp
->imp_next_ping
= ptlrpc_next_reconnect(imp
);
204 if (!imp
->imp_no_pinger_recover
)
205 ptlrpc_initiate_recovery(imp
);
206 } else if (level
!= LUSTRE_IMP_FULL
||
207 imp
->imp_obd
->obd_no_recov
||
208 imp_is_deactive(imp
)) {
209 CDEBUG(D_HA
, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n",
210 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
),
211 ptlrpc_import_state_name(level
));
213 spin_lock(&imp
->imp_lock
);
214 imp
->imp_force_verify
= 1;
215 spin_unlock(&imp
->imp_lock
);
217 } else if ((imp
->imp_pingable
&& !suppress
) || force_next
|| force
) {
222 static int ptlrpc_pinger_main(void *arg
)
224 struct ptlrpc_thread
*thread
= arg
;
226 /* Record that the thread is running */
227 thread_set_flags(thread
, SVC_RUNNING
);
228 wake_up(&thread
->t_ctl_waitq
);
230 /* And now, loop forever, pinging as needed. */
232 unsigned long this_ping
= cfs_time_current();
233 struct l_wait_info lwi
;
234 long time_to_next_wake
;
235 struct timeout_item
*item
;
236 struct list_head
*iter
;
238 mutex_lock(&pinger_mutex
);
239 list_for_each_entry(item
, &timeout_list
, ti_chain
) {
240 item
->ti_cb(item
, item
->ti_cb_data
);
242 list_for_each(iter
, &pinger_imports
) {
243 struct obd_import
*imp
=
244 list_entry(iter
, struct obd_import
,
247 ptlrpc_pinger_process_import(imp
, this_ping
);
248 /* obd_timeout might have changed */
249 if (imp
->imp_pingable
&& imp
->imp_next_ping
&&
250 cfs_time_after(imp
->imp_next_ping
,
251 cfs_time_add(this_ping
,
252 cfs_time_seconds(PING_INTERVAL
))))
253 ptlrpc_update_next_ping(imp
, 0);
255 mutex_unlock(&pinger_mutex
);
257 /* Wait until the next ping time, or until we're stopped. */
258 time_to_next_wake
= pinger_check_timeout(this_ping
);
259 /* The ping sent by ptlrpc_send_rpc may get sent out
260 * say .01 second after this.
261 * ptlrpc_pinger_sending_on_import will then set the
262 * next ping time to next_ping + .01 sec, which means
263 * we will SKIP the next ping at next_ping, and the
264 * ping will get sent 2 timeouts from now! Beware.
266 CDEBUG(D_INFO
, "next wakeup in " CFS_DURATION_T
" (%ld)\n",
268 cfs_time_add(this_ping
,
269 cfs_time_seconds(PING_INTERVAL
)));
270 if (time_to_next_wake
> 0) {
271 lwi
= LWI_TIMEOUT(max_t(long, time_to_next_wake
,
272 cfs_time_seconds(1)),
274 l_wait_event(thread
->t_ctl_waitq
,
275 thread_is_stopping(thread
) ||
276 thread_is_event(thread
),
278 if (thread_test_and_clear_flags(thread
, SVC_STOPPING
))
280 /* woken after adding import to reset timer */
281 thread_test_and_clear_flags(thread
, SVC_EVENT
);
285 thread_set_flags(thread
, SVC_STOPPED
);
286 wake_up(&thread
->t_ctl_waitq
);
288 CDEBUG(D_NET
, "pinger thread exiting, process %d\n", current_pid());
292 static struct ptlrpc_thread pinger_thread
;
294 int ptlrpc_start_pinger(void)
296 struct l_wait_info lwi
= { 0 };
297 struct task_struct
*task
;
300 if (!thread_is_init(&pinger_thread
) &&
301 !thread_is_stopped(&pinger_thread
))
304 init_waitqueue_head(&pinger_thread
.t_ctl_waitq
);
306 strcpy(pinger_thread
.t_name
, "ll_ping");
308 task
= kthread_run(ptlrpc_pinger_main
, &pinger_thread
,
309 pinger_thread
.t_name
);
312 CERROR("cannot start pinger thread: rc = %d\n", rc
);
315 l_wait_event(pinger_thread
.t_ctl_waitq
,
316 thread_is_running(&pinger_thread
), &lwi
);
321 static int ptlrpc_pinger_remove_timeouts(void);
323 int ptlrpc_stop_pinger(void)
325 struct l_wait_info lwi
= { 0 };
328 if (thread_is_init(&pinger_thread
) ||
329 thread_is_stopped(&pinger_thread
))
332 ptlrpc_pinger_remove_timeouts();
333 thread_set_flags(&pinger_thread
, SVC_STOPPING
);
334 wake_up(&pinger_thread
.t_ctl_waitq
);
336 l_wait_event(pinger_thread
.t_ctl_waitq
,
337 thread_is_stopped(&pinger_thread
), &lwi
);
342 void ptlrpc_pinger_sending_on_import(struct obd_import
*imp
)
344 ptlrpc_update_next_ping(imp
, 0);
346 EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import
);
348 void ptlrpc_pinger_commit_expected(struct obd_import
*imp
)
350 ptlrpc_update_next_ping(imp
, 1);
351 assert_spin_locked(&imp
->imp_lock
);
353 * Avoid reading stale imp_connect_data. When not sure if pings are
354 * expected or not on next connection, we assume they are not and force
355 * one anyway to guarantee the chance of updating
356 * imp_peer_committed_transno.
358 if (imp
->imp_state
!= LUSTRE_IMP_FULL
||
359 OCD_HAS_FLAG(&imp
->imp_connect_data
, PINGLESS
))
360 imp
->imp_force_next_verify
= 1;
363 int ptlrpc_pinger_add_import(struct obd_import
*imp
)
365 if (!list_empty(&imp
->imp_pinger_chain
))
368 mutex_lock(&pinger_mutex
);
369 CDEBUG(D_HA
, "adding pingable import %s->%s\n",
370 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
));
371 /* if we add to pinger we want recovery on this import */
372 imp
->imp_obd
->obd_no_recov
= 0;
373 ptlrpc_update_next_ping(imp
, 0);
374 /* XXX sort, blah blah */
375 list_add_tail(&imp
->imp_pinger_chain
, &pinger_imports
);
376 class_import_get(imp
);
378 ptlrpc_pinger_wake_up();
379 mutex_unlock(&pinger_mutex
);
383 EXPORT_SYMBOL(ptlrpc_pinger_add_import
);
385 int ptlrpc_pinger_del_import(struct obd_import
*imp
)
387 if (list_empty(&imp
->imp_pinger_chain
))
390 mutex_lock(&pinger_mutex
);
391 list_del_init(&imp
->imp_pinger_chain
);
392 CDEBUG(D_HA
, "removing pingable import %s->%s\n",
393 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
));
394 /* if we remove from pinger we don't want recovery on this import */
395 imp
->imp_obd
->obd_no_recov
= 1;
396 class_import_put(imp
);
397 mutex_unlock(&pinger_mutex
);
400 EXPORT_SYMBOL(ptlrpc_pinger_del_import
);
403 * Register a timeout callback to the pinger list, and the callback will
404 * be called when timeout happens.
406 static struct timeout_item
*ptlrpc_new_timeout(int time
,
407 enum timeout_event event
,
408 timeout_cb_t cb
, void *data
)
410 struct timeout_item
*ti
;
412 ti
= kzalloc(sizeof(*ti
), GFP_NOFS
);
416 INIT_LIST_HEAD(&ti
->ti_obd_list
);
417 INIT_LIST_HEAD(&ti
->ti_chain
);
418 ti
->ti_timeout
= time
;
419 ti
->ti_event
= event
;
421 ti
->ti_cb_data
= data
;
427 * Register timeout event on the pinger thread.
428 * Note: the timeout list is an sorted list with increased timeout value.
430 static struct timeout_item
*
431 ptlrpc_pinger_register_timeout(int time
, enum timeout_event event
,
432 timeout_cb_t cb
, void *data
)
434 struct timeout_item
*item
, *tmp
;
436 LASSERT(mutex_is_locked(&pinger_mutex
));
438 list_for_each_entry(item
, &timeout_list
, ti_chain
)
439 if (item
->ti_event
== event
)
442 item
= ptlrpc_new_timeout(time
, event
, cb
, data
);
444 list_for_each_entry_reverse(tmp
, &timeout_list
, ti_chain
) {
445 if (tmp
->ti_timeout
< time
) {
446 list_add(&item
->ti_chain
, &tmp
->ti_chain
);
450 list_add(&item
->ti_chain
, &timeout_list
);
456 /* Add a client_obd to the timeout event list, when timeout(@time)
457 * happens, the callback(@cb) will be called.
459 int ptlrpc_add_timeout_client(int time
, enum timeout_event event
,
460 timeout_cb_t cb
, void *data
,
461 struct list_head
*obd_list
)
463 struct timeout_item
*ti
;
465 mutex_lock(&pinger_mutex
);
466 ti
= ptlrpc_pinger_register_timeout(time
, event
, cb
, data
);
468 mutex_unlock(&pinger_mutex
);
471 list_add(obd_list
, &ti
->ti_obd_list
);
472 mutex_unlock(&pinger_mutex
);
475 EXPORT_SYMBOL(ptlrpc_add_timeout_client
);
477 int ptlrpc_del_timeout_client(struct list_head
*obd_list
,
478 enum timeout_event event
)
480 struct timeout_item
*ti
= NULL
, *item
;
482 if (list_empty(obd_list
))
484 mutex_lock(&pinger_mutex
);
485 list_del_init(obd_list
);
487 * If there are no obd attached to the timeout event
488 * list, remove this timeout event from the pinger
490 list_for_each_entry(item
, &timeout_list
, ti_chain
) {
491 if (item
->ti_event
== event
) {
496 if (list_empty(&ti
->ti_obd_list
)) {
497 list_del(&ti
->ti_chain
);
500 mutex_unlock(&pinger_mutex
);
503 EXPORT_SYMBOL(ptlrpc_del_timeout_client
);
505 static int ptlrpc_pinger_remove_timeouts(void)
507 struct timeout_item
*item
, *tmp
;
509 mutex_lock(&pinger_mutex
);
510 list_for_each_entry_safe(item
, tmp
, &timeout_list
, ti_chain
) {
511 LASSERT(list_empty(&item
->ti_obd_list
));
512 list_del(&item
->ti_chain
);
515 mutex_unlock(&pinger_mutex
);
519 void ptlrpc_pinger_wake_up(void)
521 thread_add_flags(&pinger_thread
, SVC_EVENT
);
522 wake_up(&pinger_thread
.t_ctl_waitq
);