4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/pinger.c
38 * Portal-RPC reconnection and replay operations, for use in recovery.
41 #define DEBUG_SUBSYSTEM S_RPC
43 #include "../include/obd_support.h"
44 #include "../include/obd_class.h"
45 #include "ptlrpc_internal.h"
47 struct mutex pinger_mutex
;
48 static LIST_HEAD(pinger_imports
);
49 static struct list_head timeout_list
= LIST_HEAD_INIT(timeout_list
);
51 struct ptlrpc_request
*
52 ptlrpc_prep_ping(struct obd_import
*imp
)
54 struct ptlrpc_request
*req
;
56 req
= ptlrpc_request_alloc_pack(imp
, &RQF_OBD_PING
,
57 LUSTRE_OBD_VERSION
, OBD_PING
);
59 ptlrpc_request_set_replen(req
);
60 req
->rq_no_resend
= req
->rq_no_delay
= 1;
65 int ptlrpc_obd_ping(struct obd_device
*obd
)
68 struct ptlrpc_request
*req
;
70 req
= ptlrpc_prep_ping(obd
->u
.cli
.cl_import
);
74 req
->rq_send_state
= LUSTRE_IMP_FULL
;
76 rc
= ptlrpc_queue_wait(req
);
78 ptlrpc_req_finished(req
);
82 EXPORT_SYMBOL(ptlrpc_obd_ping
);
84 static int ptlrpc_ping(struct obd_import
*imp
)
86 struct ptlrpc_request
*req
;
88 req
= ptlrpc_prep_ping(imp
);
90 CERROR("OOM trying to ping %s->%s\n",
91 imp
->imp_obd
->obd_uuid
.uuid
,
92 obd2cli_tgt(imp
->imp_obd
));
96 DEBUG_REQ(D_INFO
, req
, "pinging %s->%s",
97 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
));
103 static void ptlrpc_update_next_ping(struct obd_import
*imp
, int soon
)
105 int time
= soon
? PING_INTERVAL_SHORT
: PING_INTERVAL
;
107 if (imp
->imp_state
== LUSTRE_IMP_DISCON
) {
108 int dtime
= max_t(int, CONNECTION_SWITCH_MIN
,
110 at_get(&imp
->imp_at
.iat_net_latency
));
111 time
= min(time
, dtime
);
113 imp
->imp_next_ping
= cfs_time_shift(time
);
116 static inline int imp_is_deactive(struct obd_import
*imp
)
118 return (imp
->imp_deactive
||
119 OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE
));
122 static inline int ptlrpc_next_reconnect(struct obd_import
*imp
)
124 if (imp
->imp_server_timeout
)
125 return cfs_time_shift(obd_timeout
/ 2);
127 return cfs_time_shift(obd_timeout
);
130 static long pinger_check_timeout(unsigned long time
)
132 struct timeout_item
*item
;
133 unsigned long timeout
= PING_INTERVAL
;
135 /* The timeout list is a increase order sorted list */
136 mutex_lock(&pinger_mutex
);
137 list_for_each_entry(item
, &timeout_list
, ti_chain
) {
138 int ti_timeout
= item
->ti_timeout
;
140 if (timeout
> ti_timeout
)
141 timeout
= ti_timeout
;
144 mutex_unlock(&pinger_mutex
);
146 return cfs_time_sub(cfs_time_add(time
, cfs_time_seconds(timeout
)),
152 void ptlrpc_pinger_ir_up(void)
154 CDEBUG(D_HA
, "IR up\n");
157 EXPORT_SYMBOL(ptlrpc_pinger_ir_up
);
159 void ptlrpc_pinger_ir_down(void)
161 CDEBUG(D_HA
, "IR down\n");
164 EXPORT_SYMBOL(ptlrpc_pinger_ir_down
);
166 static void ptlrpc_pinger_process_import(struct obd_import
*imp
,
167 unsigned long this_ping
)
174 spin_lock(&imp
->imp_lock
);
176 level
= imp
->imp_state
;
177 force
= imp
->imp_force_verify
;
178 force_next
= imp
->imp_force_next_verify
;
180 * This will be used below only if the import is "FULL".
182 suppress
= ir_up
&& OCD_HAS_FLAG(&imp
->imp_connect_data
, PINGLESS
);
184 imp
->imp_force_verify
= 0;
186 if (cfs_time_aftereq(imp
->imp_next_ping
- 5 * CFS_TICK
, this_ping
) &&
188 spin_unlock(&imp
->imp_lock
);
192 imp
->imp_force_next_verify
= 0;
194 spin_unlock(&imp
->imp_lock
);
196 CDEBUG(level
== LUSTRE_IMP_FULL
? D_INFO
: D_HA
, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n",
197 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
),
198 ptlrpc_import_state_name(level
), level
, force
, force_next
,
199 imp
->imp_deactive
, imp
->imp_pingable
, suppress
);
201 if (level
== LUSTRE_IMP_DISCON
&& !imp_is_deactive(imp
)) {
202 /* wait for a while before trying recovery again */
203 imp
->imp_next_ping
= ptlrpc_next_reconnect(imp
);
204 if (!imp
->imp_no_pinger_recover
)
205 ptlrpc_initiate_recovery(imp
);
206 } else if (level
!= LUSTRE_IMP_FULL
||
207 imp
->imp_obd
->obd_no_recov
||
208 imp_is_deactive(imp
)) {
209 CDEBUG(D_HA
, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n",
210 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
),
211 ptlrpc_import_state_name(level
));
213 spin_lock(&imp
->imp_lock
);
214 imp
->imp_force_verify
= 1;
215 spin_unlock(&imp
->imp_lock
);
217 } else if ((imp
->imp_pingable
&& !suppress
) || force_next
|| force
) {
222 static int ptlrpc_pinger_main(void *arg
)
224 struct ptlrpc_thread
*thread
= arg
;
226 /* Record that the thread is running */
227 thread_set_flags(thread
, SVC_RUNNING
);
228 wake_up(&thread
->t_ctl_waitq
);
230 /* And now, loop forever, pinging as needed. */
232 unsigned long this_ping
= cfs_time_current();
233 struct l_wait_info lwi
;
234 long time_to_next_wake
;
235 struct timeout_item
*item
;
236 struct list_head
*iter
;
238 mutex_lock(&pinger_mutex
);
239 list_for_each_entry(item
, &timeout_list
, ti_chain
) {
240 item
->ti_cb(item
, item
->ti_cb_data
);
242 list_for_each(iter
, &pinger_imports
) {
243 struct obd_import
*imp
=
244 list_entry(iter
, struct obd_import
,
247 ptlrpc_pinger_process_import(imp
, this_ping
);
248 /* obd_timeout might have changed */
249 if (imp
->imp_pingable
&& imp
->imp_next_ping
&&
250 cfs_time_after(imp
->imp_next_ping
,
251 cfs_time_add(this_ping
,
252 cfs_time_seconds(PING_INTERVAL
))))
253 ptlrpc_update_next_ping(imp
, 0);
255 mutex_unlock(&pinger_mutex
);
257 /* Wait until the next ping time, or until we're stopped. */
258 time_to_next_wake
= pinger_check_timeout(this_ping
);
259 /* The ping sent by ptlrpc_send_rpc may get sent out
260 say .01 second after this.
261 ptlrpc_pinger_sending_on_import will then set the
262 next ping time to next_ping + .01 sec, which means
263 we will SKIP the next ping at next_ping, and the
264 ping will get sent 2 timeouts from now! Beware. */
265 CDEBUG(D_INFO
, "next wakeup in " CFS_DURATION_T
" (%ld)\n",
267 cfs_time_add(this_ping
,
268 cfs_time_seconds(PING_INTERVAL
)));
269 if (time_to_next_wake
> 0) {
270 lwi
= LWI_TIMEOUT(max_t(long, time_to_next_wake
,
271 cfs_time_seconds(1)),
273 l_wait_event(thread
->t_ctl_waitq
,
274 thread_is_stopping(thread
) ||
275 thread_is_event(thread
),
277 if (thread_test_and_clear_flags(thread
, SVC_STOPPING
))
279 /* woken after adding import to reset timer */
280 thread_test_and_clear_flags(thread
, SVC_EVENT
);
284 thread_set_flags(thread
, SVC_STOPPED
);
285 wake_up(&thread
->t_ctl_waitq
);
287 CDEBUG(D_NET
, "pinger thread exiting, process %d\n", current_pid());
291 static struct ptlrpc_thread pinger_thread
;
293 int ptlrpc_start_pinger(void)
295 struct l_wait_info lwi
= { 0 };
298 if (!thread_is_init(&pinger_thread
) &&
299 !thread_is_stopped(&pinger_thread
))
302 init_waitqueue_head(&pinger_thread
.t_ctl_waitq
);
304 strcpy(pinger_thread
.t_name
, "ll_ping");
306 rc
= PTR_ERR(kthread_run(ptlrpc_pinger_main
, &pinger_thread
,
307 "%s", pinger_thread
.t_name
));
308 if (IS_ERR_VALUE(rc
)) {
309 CERROR("cannot start thread: %d\n", rc
);
312 l_wait_event(pinger_thread
.t_ctl_waitq
,
313 thread_is_running(&pinger_thread
), &lwi
);
318 static int ptlrpc_pinger_remove_timeouts(void);
320 int ptlrpc_stop_pinger(void)
322 struct l_wait_info lwi
= { 0 };
325 if (thread_is_init(&pinger_thread
) ||
326 thread_is_stopped(&pinger_thread
))
329 ptlrpc_pinger_remove_timeouts();
330 thread_set_flags(&pinger_thread
, SVC_STOPPING
);
331 wake_up(&pinger_thread
.t_ctl_waitq
);
333 l_wait_event(pinger_thread
.t_ctl_waitq
,
334 thread_is_stopped(&pinger_thread
), &lwi
);
339 void ptlrpc_pinger_sending_on_import(struct obd_import
*imp
)
341 ptlrpc_update_next_ping(imp
, 0);
343 EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import
);
345 void ptlrpc_pinger_commit_expected(struct obd_import
*imp
)
347 ptlrpc_update_next_ping(imp
, 1);
348 assert_spin_locked(&imp
->imp_lock
);
350 * Avoid reading stale imp_connect_data. When not sure if pings are
351 * expected or not on next connection, we assume they are not and force
352 * one anyway to guarantee the chance of updating
353 * imp_peer_committed_transno.
355 if (imp
->imp_state
!= LUSTRE_IMP_FULL
||
356 OCD_HAS_FLAG(&imp
->imp_connect_data
, PINGLESS
))
357 imp
->imp_force_next_verify
= 1;
360 int ptlrpc_pinger_add_import(struct obd_import
*imp
)
362 if (!list_empty(&imp
->imp_pinger_chain
))
365 mutex_lock(&pinger_mutex
);
366 CDEBUG(D_HA
, "adding pingable import %s->%s\n",
367 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
));
368 /* if we add to pinger we want recovery on this import */
369 imp
->imp_obd
->obd_no_recov
= 0;
370 ptlrpc_update_next_ping(imp
, 0);
371 /* XXX sort, blah blah */
372 list_add_tail(&imp
->imp_pinger_chain
, &pinger_imports
);
373 class_import_get(imp
);
375 ptlrpc_pinger_wake_up();
376 mutex_unlock(&pinger_mutex
);
380 EXPORT_SYMBOL(ptlrpc_pinger_add_import
);
382 int ptlrpc_pinger_del_import(struct obd_import
*imp
)
384 if (list_empty(&imp
->imp_pinger_chain
))
387 mutex_lock(&pinger_mutex
);
388 list_del_init(&imp
->imp_pinger_chain
);
389 CDEBUG(D_HA
, "removing pingable import %s->%s\n",
390 imp
->imp_obd
->obd_uuid
.uuid
, obd2cli_tgt(imp
->imp_obd
));
391 /* if we remove from pinger we don't want recovery on this import */
392 imp
->imp_obd
->obd_no_recov
= 1;
393 class_import_put(imp
);
394 mutex_unlock(&pinger_mutex
);
397 EXPORT_SYMBOL(ptlrpc_pinger_del_import
);
400 * Register a timeout callback to the pinger list, and the callback will
401 * be called when timeout happens.
403 static struct timeout_item
*ptlrpc_new_timeout(int time
,
404 enum timeout_event event
, timeout_cb_t cb
, void *data
)
406 struct timeout_item
*ti
;
408 ti
= kzalloc(sizeof(*ti
), GFP_NOFS
);
412 INIT_LIST_HEAD(&ti
->ti_obd_list
);
413 INIT_LIST_HEAD(&ti
->ti_chain
);
414 ti
->ti_timeout
= time
;
415 ti
->ti_event
= event
;
417 ti
->ti_cb_data
= data
;
423 * Register timeout event on the pinger thread.
424 * Note: the timeout list is an sorted list with increased timeout value.
426 static struct timeout_item
*
427 ptlrpc_pinger_register_timeout(int time
, enum timeout_event event
,
428 timeout_cb_t cb
, void *data
)
430 struct timeout_item
*item
, *tmp
;
432 LASSERT(mutex_is_locked(&pinger_mutex
));
434 list_for_each_entry(item
, &timeout_list
, ti_chain
)
435 if (item
->ti_event
== event
)
438 item
= ptlrpc_new_timeout(time
, event
, cb
, data
);
440 list_for_each_entry_reverse(tmp
, &timeout_list
, ti_chain
) {
441 if (tmp
->ti_timeout
< time
) {
442 list_add(&item
->ti_chain
, &tmp
->ti_chain
);
446 list_add(&item
->ti_chain
, &timeout_list
);
452 /* Add a client_obd to the timeout event list, when timeout(@time)
453 * happens, the callback(@cb) will be called.
455 int ptlrpc_add_timeout_client(int time
, enum timeout_event event
,
456 timeout_cb_t cb
, void *data
,
457 struct list_head
*obd_list
)
459 struct timeout_item
*ti
;
461 mutex_lock(&pinger_mutex
);
462 ti
= ptlrpc_pinger_register_timeout(time
, event
, cb
, data
);
464 mutex_unlock(&pinger_mutex
);
467 list_add(obd_list
, &ti
->ti_obd_list
);
468 mutex_unlock(&pinger_mutex
);
471 EXPORT_SYMBOL(ptlrpc_add_timeout_client
);
473 int ptlrpc_del_timeout_client(struct list_head
*obd_list
,
474 enum timeout_event event
)
476 struct timeout_item
*ti
= NULL
, *item
;
478 if (list_empty(obd_list
))
480 mutex_lock(&pinger_mutex
);
481 list_del_init(obd_list
);
483 * If there are no obd attached to the timeout event
484 * list, remove this timeout event from the pinger
486 list_for_each_entry(item
, &timeout_list
, ti_chain
) {
487 if (item
->ti_event
== event
) {
492 LASSERTF(ti
!= NULL
, "ti is NULL !\n");
493 if (list_empty(&ti
->ti_obd_list
)) {
494 list_del(&ti
->ti_chain
);
497 mutex_unlock(&pinger_mutex
);
500 EXPORT_SYMBOL(ptlrpc_del_timeout_client
);
502 static int ptlrpc_pinger_remove_timeouts(void)
504 struct timeout_item
*item
, *tmp
;
506 mutex_lock(&pinger_mutex
);
507 list_for_each_entry_safe(item
, tmp
, &timeout_list
, ti_chain
) {
508 LASSERT(list_empty(&item
->ti_obd_list
));
509 list_del(&item
->ti_chain
);
512 mutex_unlock(&pinger_mutex
);
516 void ptlrpc_pinger_wake_up(void)
518 thread_add_flags(&pinger_thread
, SVC_EVENT
);
519 wake_up(&pinger_thread
.t_ctl_waitq
);