Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/ptlrpc/pinger.c | |
37 | * | |
38 | * Portal-RPC reconnection and replay operations, for use in recovery. | |
39 | */ | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_RPC | |
42 | ||
e27db149 GKH |
43 | #include "../include/obd_support.h" |
44 | #include "../include/obd_class.h" | |
d7e09d03 PT |
45 | #include "ptlrpc_internal.h" |
46 | ||
47 | static int suppress_pings; | |
8cc7b4b9 PT |
48 | module_param(suppress_pings, int, 0644); |
49 | MODULE_PARM_DESC(suppress_pings, "Suppress pings"); | |
d7e09d03 PT |
50 | |
51 | struct mutex pinger_mutex; | |
52 | static LIST_HEAD(pinger_imports); | |
53 | static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list); | |
54 | ||
7d46a21a | 55 | int ptlrpc_pinger_suppress_pings(void) |
d7e09d03 PT |
56 | { |
57 | return suppress_pings; | |
58 | } | |
59 | EXPORT_SYMBOL(ptlrpc_pinger_suppress_pings); | |
60 | ||
61 | struct ptlrpc_request * | |
62 | ptlrpc_prep_ping(struct obd_import *imp) | |
63 | { | |
64 | struct ptlrpc_request *req; | |
65 | ||
66 | req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, | |
67 | LUSTRE_OBD_VERSION, OBD_PING); | |
68 | if (req) { | |
69 | ptlrpc_request_set_replen(req); | |
70 | req->rq_no_resend = req->rq_no_delay = 1; | |
71 | } | |
72 | return req; | |
73 | } | |
74 | ||
75 | int ptlrpc_obd_ping(struct obd_device *obd) | |
76 | { | |
77 | int rc; | |
78 | struct ptlrpc_request *req; | |
d7e09d03 PT |
79 | |
80 | req = ptlrpc_prep_ping(obd->u.cli.cl_import); | |
81 | if (req == NULL) | |
0a3bdb00 | 82 | return -ENOMEM; |
d7e09d03 PT |
83 | |
84 | req->rq_send_state = LUSTRE_IMP_FULL; | |
85 | ||
86 | rc = ptlrpc_queue_wait(req); | |
87 | ||
88 | ptlrpc_req_finished(req); | |
89 | ||
0a3bdb00 | 90 | return rc; |
d7e09d03 PT |
91 | } |
92 | EXPORT_SYMBOL(ptlrpc_obd_ping); | |
93 | ||
f9aaa43e | 94 | static int ptlrpc_ping(struct obd_import *imp) |
d7e09d03 PT |
95 | { |
96 | struct ptlrpc_request *req; | |
d7e09d03 PT |
97 | |
98 | req = ptlrpc_prep_ping(imp); | |
99 | if (req == NULL) { | |
100 | CERROR("OOM trying to ping %s->%s\n", | |
101 | imp->imp_obd->obd_uuid.uuid, | |
102 | obd2cli_tgt(imp->imp_obd)); | |
0a3bdb00 | 103 | return -ENOMEM; |
d7e09d03 PT |
104 | } |
105 | ||
106 | DEBUG_REQ(D_INFO, req, "pinging %s->%s", | |
107 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
c5c4c6fa | 108 | ptlrpcd_add_req(req); |
d7e09d03 | 109 | |
0a3bdb00 | 110 | return 0; |
d7e09d03 PT |
111 | } |
112 | ||
f9aaa43e | 113 | static void ptlrpc_update_next_ping(struct obd_import *imp, int soon) |
d7e09d03 PT |
114 | { |
115 | int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL; | |
116 | if (imp->imp_state == LUSTRE_IMP_DISCON) { | |
117 | int dtime = max_t(int, CONNECTION_SWITCH_MIN, | |
118 | AT_OFF ? 0 : | |
119 | at_get(&imp->imp_at.iat_net_latency)); | |
120 | time = min(time, dtime); | |
121 | } | |
122 | imp->imp_next_ping = cfs_time_shift(time); | |
123 | } | |
124 | ||
125 | void ptlrpc_ping_import_soon(struct obd_import *imp) | |
126 | { | |
127 | imp->imp_next_ping = cfs_time_current(); | |
128 | } | |
129 | ||
130 | static inline int imp_is_deactive(struct obd_import *imp) | |
131 | { | |
132 | return (imp->imp_deactive || | |
133 | OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE)); | |
134 | } | |
135 | ||
136 | static inline int ptlrpc_next_reconnect(struct obd_import *imp) | |
137 | { | |
138 | if (imp->imp_server_timeout) | |
139 | return cfs_time_shift(obd_timeout / 2); | |
140 | else | |
141 | return cfs_time_shift(obd_timeout); | |
142 | } | |
143 | ||
f9aaa43e | 144 | static long pinger_check_timeout(unsigned long time) |
d7e09d03 PT |
145 | { |
146 | struct timeout_item *item; | |
a649ad1d | 147 | unsigned long timeout = PING_INTERVAL; |
d7e09d03 PT |
148 | |
149 | /* The timeout list is a increase order sorted list */ | |
150 | mutex_lock(&pinger_mutex); | |
151 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
152 | int ti_timeout = item->ti_timeout; | |
153 | if (timeout > ti_timeout) | |
154 | timeout = ti_timeout; | |
155 | break; | |
156 | } | |
157 | mutex_unlock(&pinger_mutex); | |
158 | ||
159 | return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)), | |
160 | cfs_time_current()); | |
161 | } | |
162 | ||
d7e09d03 PT |
163 | static bool ir_up; |
164 | ||
165 | void ptlrpc_pinger_ir_up(void) | |
166 | { | |
167 | CDEBUG(D_HA, "IR up\n"); | |
168 | ir_up = true; | |
169 | } | |
170 | EXPORT_SYMBOL(ptlrpc_pinger_ir_up); | |
171 | ||
172 | void ptlrpc_pinger_ir_down(void) | |
173 | { | |
174 | CDEBUG(D_HA, "IR down\n"); | |
175 | ir_up = false; | |
176 | } | |
177 | EXPORT_SYMBOL(ptlrpc_pinger_ir_down); | |
178 | ||
179 | static void ptlrpc_pinger_process_import(struct obd_import *imp, | |
180 | unsigned long this_ping) | |
181 | { | |
182 | int level; | |
183 | int force; | |
184 | int force_next; | |
185 | int suppress; | |
186 | ||
187 | spin_lock(&imp->imp_lock); | |
188 | ||
189 | level = imp->imp_state; | |
190 | force = imp->imp_force_verify; | |
191 | force_next = imp->imp_force_next_verify; | |
192 | /* | |
193 | * This will be used below only if the import is "FULL". | |
194 | */ | |
195 | suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS); | |
196 | ||
197 | imp->imp_force_verify = 0; | |
198 | ||
199 | if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) && | |
200 | !force) { | |
201 | spin_unlock(&imp->imp_lock); | |
202 | return; | |
203 | } | |
204 | ||
205 | imp->imp_force_next_verify = 0; | |
206 | ||
207 | spin_unlock(&imp->imp_lock); | |
208 | ||
2d00bd17 | 209 | CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n", |
d7e09d03 PT |
210 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), |
211 | ptlrpc_import_state_name(level), level, force, force_next, | |
212 | imp->imp_deactive, imp->imp_pingable, suppress); | |
213 | ||
214 | if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) { | |
215 | /* wait for a while before trying recovery again */ | |
216 | imp->imp_next_ping = ptlrpc_next_reconnect(imp); | |
217 | if (!imp->imp_no_pinger_recover) | |
218 | ptlrpc_initiate_recovery(imp); | |
219 | } else if (level != LUSTRE_IMP_FULL || | |
220 | imp->imp_obd->obd_no_recov || | |
221 | imp_is_deactive(imp)) { | |
2d00bd17 | 222 | CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n", |
d7e09d03 PT |
223 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), |
224 | ptlrpc_import_state_name(level)); | |
cca8fca1 AS |
225 | if (force) { |
226 | spin_lock(&imp->imp_lock); | |
227 | imp->imp_force_verify = 1; | |
228 | spin_unlock(&imp->imp_lock); | |
229 | } | |
d7e09d03 PT |
230 | } else if ((imp->imp_pingable && !suppress) || force_next || force) { |
231 | ptlrpc_ping(imp); | |
232 | } | |
233 | } | |
234 | ||
235 | static int ptlrpc_pinger_main(void *arg) | |
236 | { | |
237 | struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; | |
d7e09d03 PT |
238 | |
239 | /* Record that the thread is running */ | |
240 | thread_set_flags(thread, SVC_RUNNING); | |
241 | wake_up(&thread->t_ctl_waitq); | |
242 | ||
243 | /* And now, loop forever, pinging as needed. */ | |
244 | while (1) { | |
a649ad1d | 245 | unsigned long this_ping = cfs_time_current(); |
d7e09d03 | 246 | struct l_wait_info lwi; |
b2d201bd | 247 | long time_to_next_wake; |
d7e09d03 PT |
248 | struct timeout_item *item; |
249 | struct list_head *iter; | |
250 | ||
251 | mutex_lock(&pinger_mutex); | |
252 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
253 | item->ti_cb(item, item->ti_cb_data); | |
254 | } | |
255 | list_for_each(iter, &pinger_imports) { | |
256 | struct obd_import *imp = | |
257 | list_entry(iter, struct obd_import, | |
258 | imp_pinger_chain); | |
259 | ||
260 | ptlrpc_pinger_process_import(imp, this_ping); | |
261 | /* obd_timeout might have changed */ | |
262 | if (imp->imp_pingable && imp->imp_next_ping && | |
263 | cfs_time_after(imp->imp_next_ping, | |
264 | cfs_time_add(this_ping, | |
265 | cfs_time_seconds(PING_INTERVAL)))) | |
266 | ptlrpc_update_next_ping(imp, 0); | |
267 | } | |
268 | mutex_unlock(&pinger_mutex); | |
269 | /* update memory usage info */ | |
270 | obd_update_maxusage(); | |
271 | ||
272 | /* Wait until the next ping time, or until we're stopped. */ | |
273 | time_to_next_wake = pinger_check_timeout(this_ping); | |
274 | /* The ping sent by ptlrpc_send_rpc may get sent out | |
275 | say .01 second after this. | |
276 | ptlrpc_pinger_sending_on_import will then set the | |
277 | next ping time to next_ping + .01 sec, which means | |
278 | we will SKIP the next ping at next_ping, and the | |
279 | ping will get sent 2 timeouts from now! Beware. */ | |
280 | CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" (" | |
281 | CFS_TIME_T")\n", time_to_next_wake, | |
1d8cb70c GD |
282 | cfs_time_add(this_ping, |
283 | cfs_time_seconds(PING_INTERVAL))); | |
d7e09d03 | 284 | if (time_to_next_wake > 0) { |
b2d201bd | 285 | lwi = LWI_TIMEOUT(max_t(long, time_to_next_wake, |
d7e09d03 PT |
286 | cfs_time_seconds(1)), |
287 | NULL, NULL); | |
288 | l_wait_event(thread->t_ctl_waitq, | |
289 | thread_is_stopping(thread) || | |
290 | thread_is_event(thread), | |
291 | &lwi); | |
71e8dd9a | 292 | if (thread_test_and_clear_flags(thread, SVC_STOPPING)) |
d7e09d03 | 293 | break; |
71e8dd9a AM |
294 | /* woken after adding import to reset timer */ |
295 | thread_test_and_clear_flags(thread, SVC_EVENT); | |
d7e09d03 PT |
296 | } |
297 | } | |
298 | ||
299 | thread_set_flags(thread, SVC_STOPPED); | |
300 | wake_up(&thread->t_ctl_waitq); | |
301 | ||
302 | CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid()); | |
303 | return 0; | |
304 | } | |
305 | ||
20802057 | 306 | static struct ptlrpc_thread pinger_thread; |
d7e09d03 PT |
307 | |
308 | int ptlrpc_start_pinger(void) | |
309 | { | |
310 | struct l_wait_info lwi = { 0 }; | |
311 | int rc; | |
d7e09d03 | 312 | |
20802057 DE |
313 | if (!thread_is_init(&pinger_thread) && |
314 | !thread_is_stopped(&pinger_thread)) | |
0a3bdb00 | 315 | return -EALREADY; |
d7e09d03 | 316 | |
20802057 | 317 | init_waitqueue_head(&pinger_thread.t_ctl_waitq); |
d7e09d03 | 318 | |
20802057 | 319 | strcpy(pinger_thread.t_name, "ll_ping"); |
d7e09d03 | 320 | |
9edf0f67 KC |
321 | rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, &pinger_thread, |
322 | "%s", pinger_thread.t_name)); | |
d7e09d03 PT |
323 | if (IS_ERR_VALUE(rc)) { |
324 | CERROR("cannot start thread: %d\n", rc); | |
0a3bdb00 | 325 | return rc; |
d7e09d03 | 326 | } |
20802057 DE |
327 | l_wait_event(pinger_thread.t_ctl_waitq, |
328 | thread_is_running(&pinger_thread), &lwi); | |
d7e09d03 PT |
329 | |
330 | if (suppress_pings) | |
2d00bd17 | 331 | CWARN("Pings will be suppressed at the request of the administrator. The configuration shall meet the additional requirements described in the manual. (Search for the \"suppress_pings\" kernel module parameter.)\n"); |
d7e09d03 | 332 | |
0a3bdb00 | 333 | return 0; |
d7e09d03 PT |
334 | } |
335 | ||
336 | int ptlrpc_pinger_remove_timeouts(void); | |
337 | ||
338 | int ptlrpc_stop_pinger(void) | |
339 | { | |
340 | struct l_wait_info lwi = { 0 }; | |
341 | int rc = 0; | |
d7e09d03 | 342 | |
b39f15c9 PT |
343 | if (thread_is_init(&pinger_thread) || |
344 | thread_is_stopped(&pinger_thread)) | |
0a3bdb00 | 345 | return -EALREADY; |
d7e09d03 PT |
346 | |
347 | ptlrpc_pinger_remove_timeouts(); | |
20802057 DE |
348 | thread_set_flags(&pinger_thread, SVC_STOPPING); |
349 | wake_up(&pinger_thread.t_ctl_waitq); | |
d7e09d03 | 350 | |
20802057 DE |
351 | l_wait_event(pinger_thread.t_ctl_waitq, |
352 | thread_is_stopped(&pinger_thread), &lwi); | |
d7e09d03 | 353 | |
0a3bdb00 | 354 | return rc; |
d7e09d03 PT |
355 | } |
356 | ||
357 | void ptlrpc_pinger_sending_on_import(struct obd_import *imp) | |
358 | { | |
359 | ptlrpc_update_next_ping(imp, 0); | |
360 | } | |
361 | EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import); | |
362 | ||
363 | void ptlrpc_pinger_commit_expected(struct obd_import *imp) | |
364 | { | |
365 | ptlrpc_update_next_ping(imp, 1); | |
5e42bc9d | 366 | assert_spin_locked(&imp->imp_lock); |
d7e09d03 PT |
367 | /* |
368 | * Avoid reading stale imp_connect_data. When not sure if pings are | |
369 | * expected or not on next connection, we assume they are not and force | |
370 | * one anyway to guarantee the chance of updating | |
371 | * imp_peer_committed_transno. | |
372 | */ | |
373 | if (imp->imp_state != LUSTRE_IMP_FULL || | |
374 | OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS)) | |
375 | imp->imp_force_next_verify = 1; | |
376 | } | |
377 | ||
378 | int ptlrpc_pinger_add_import(struct obd_import *imp) | |
379 | { | |
d7e09d03 | 380 | if (!list_empty(&imp->imp_pinger_chain)) |
0a3bdb00 | 381 | return -EALREADY; |
d7e09d03 PT |
382 | |
383 | mutex_lock(&pinger_mutex); | |
384 | CDEBUG(D_HA, "adding pingable import %s->%s\n", | |
385 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
386 | /* if we add to pinger we want recovery on this import */ | |
387 | imp->imp_obd->obd_no_recov = 0; | |
388 | ptlrpc_update_next_ping(imp, 0); | |
389 | /* XXX sort, blah blah */ | |
390 | list_add_tail(&imp->imp_pinger_chain, &pinger_imports); | |
391 | class_import_get(imp); | |
392 | ||
393 | ptlrpc_pinger_wake_up(); | |
394 | mutex_unlock(&pinger_mutex); | |
395 | ||
0a3bdb00 | 396 | return 0; |
d7e09d03 PT |
397 | } |
398 | EXPORT_SYMBOL(ptlrpc_pinger_add_import); | |
399 | ||
400 | int ptlrpc_pinger_del_import(struct obd_import *imp) | |
401 | { | |
d7e09d03 | 402 | if (list_empty(&imp->imp_pinger_chain)) |
0a3bdb00 | 403 | return -ENOENT; |
d7e09d03 PT |
404 | |
405 | mutex_lock(&pinger_mutex); | |
406 | list_del_init(&imp->imp_pinger_chain); | |
407 | CDEBUG(D_HA, "removing pingable import %s->%s\n", | |
408 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
409 | /* if we remove from pinger we don't want recovery on this import */ | |
410 | imp->imp_obd->obd_no_recov = 1; | |
411 | class_import_put(imp); | |
412 | mutex_unlock(&pinger_mutex); | |
0a3bdb00 | 413 | return 0; |
d7e09d03 PT |
414 | } |
415 | EXPORT_SYMBOL(ptlrpc_pinger_del_import); | |
416 | ||
417 | /** | |
418 | * Register a timeout callback to the pinger list, and the callback will | |
419 | * be called when timeout happens. | |
420 | */ | |
f9aaa43e MF |
421 | static struct timeout_item *ptlrpc_new_timeout(int time, |
422 | enum timeout_event event, timeout_cb_t cb, void *data) | |
d7e09d03 PT |
423 | { |
424 | struct timeout_item *ti; | |
425 | ||
9ae10597 | 426 | ti = kzalloc(sizeof(*ti), GFP_NOFS); |
d7e09d03 | 427 | if (!ti) |
fbe7c6c7 | 428 | return NULL; |
d7e09d03 PT |
429 | |
430 | INIT_LIST_HEAD(&ti->ti_obd_list); | |
431 | INIT_LIST_HEAD(&ti->ti_chain); | |
432 | ti->ti_timeout = time; | |
433 | ti->ti_event = event; | |
434 | ti->ti_cb = cb; | |
435 | ti->ti_cb_data = data; | |
436 | ||
437 | return ti; | |
438 | } | |
439 | ||
440 | /** | |
fa4d19c4 | 441 | * Register timeout event on the pinger thread. |
d7e09d03 PT |
442 | * Note: the timeout list is an sorted list with increased timeout value. |
443 | */ | |
444 | static struct timeout_item* | |
445 | ptlrpc_pinger_register_timeout(int time, enum timeout_event event, | |
446 | timeout_cb_t cb, void *data) | |
447 | { | |
448 | struct timeout_item *item, *tmp; | |
449 | ||
450 | LASSERT(mutex_is_locked(&pinger_mutex)); | |
451 | ||
452 | list_for_each_entry(item, &timeout_list, ti_chain) | |
453 | if (item->ti_event == event) | |
454 | goto out; | |
455 | ||
456 | item = ptlrpc_new_timeout(time, event, cb, data); | |
457 | if (item) { | |
458 | list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) { | |
459 | if (tmp->ti_timeout < time) { | |
460 | list_add(&item->ti_chain, &tmp->ti_chain); | |
461 | goto out; | |
462 | } | |
463 | } | |
464 | list_add(&item->ti_chain, &timeout_list); | |
465 | } | |
466 | out: | |
467 | return item; | |
468 | } | |
469 | ||
470 | /* Add a client_obd to the timeout event list, when timeout(@time) | |
471 | * happens, the callback(@cb) will be called. | |
472 | */ | |
473 | int ptlrpc_add_timeout_client(int time, enum timeout_event event, | |
474 | timeout_cb_t cb, void *data, | |
475 | struct list_head *obd_list) | |
476 | { | |
477 | struct timeout_item *ti; | |
478 | ||
479 | mutex_lock(&pinger_mutex); | |
480 | ti = ptlrpc_pinger_register_timeout(time, event, cb, data); | |
481 | if (!ti) { | |
482 | mutex_unlock(&pinger_mutex); | |
fbe7c6c7 | 483 | return -EINVAL; |
d7e09d03 PT |
484 | } |
485 | list_add(obd_list, &ti->ti_obd_list); | |
486 | mutex_unlock(&pinger_mutex); | |
487 | return 0; | |
488 | } | |
489 | EXPORT_SYMBOL(ptlrpc_add_timeout_client); | |
490 | ||
491 | int ptlrpc_del_timeout_client(struct list_head *obd_list, | |
492 | enum timeout_event event) | |
493 | { | |
494 | struct timeout_item *ti = NULL, *item; | |
495 | ||
496 | if (list_empty(obd_list)) | |
497 | return 0; | |
498 | mutex_lock(&pinger_mutex); | |
499 | list_del_init(obd_list); | |
500 | /** | |
501 | * If there are no obd attached to the timeout event | |
502 | * list, remove this timeout event from the pinger | |
503 | */ | |
504 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
505 | if (item->ti_event == event) { | |
506 | ti = item; | |
507 | break; | |
508 | } | |
509 | } | |
998d2766 | 510 | LASSERTF(ti != NULL, "ti is NULL !\n"); |
d7e09d03 PT |
511 | if (list_empty(&ti->ti_obd_list)) { |
512 | list_del(&ti->ti_chain); | |
9ae10597 | 513 | kfree(ti); |
d7e09d03 PT |
514 | } |
515 | mutex_unlock(&pinger_mutex); | |
516 | return 0; | |
517 | } | |
518 | EXPORT_SYMBOL(ptlrpc_del_timeout_client); | |
519 | ||
520 | int ptlrpc_pinger_remove_timeouts(void) | |
521 | { | |
522 | struct timeout_item *item, *tmp; | |
523 | ||
524 | mutex_lock(&pinger_mutex); | |
525 | list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) { | |
526 | LASSERT(list_empty(&item->ti_obd_list)); | |
527 | list_del(&item->ti_chain); | |
9ae10597 | 528 | kfree(item); |
d7e09d03 PT |
529 | } |
530 | mutex_unlock(&pinger_mutex); | |
531 | return 0; | |
532 | } | |
533 | ||
7d46a21a | 534 | void ptlrpc_pinger_wake_up(void) |
d7e09d03 | 535 | { |
20802057 DE |
536 | thread_add_flags(&pinger_thread, SVC_EVENT); |
537 | wake_up(&pinger_thread.t_ctl_waitq); | |
d7e09d03 PT |
538 | } |
539 | ||
540 | /* Ping evictor thread */ | |
541 | #define PET_READY 1 | |
542 | #define PET_TERMINATE 2 | |
543 | ||
225f597c | 544 | static int pet_refcount; |
d0bfef31 CH |
545 | static int pet_state; |
546 | static wait_queue_head_t pet_waitq; | |
f9aaa43e | 547 | static LIST_HEAD(pet_list); |
d7e09d03 PT |
548 | static DEFINE_SPINLOCK(pet_lock); |
549 | ||
550 | int ping_evictor_wake(struct obd_export *exp) | |
551 | { | |
552 | struct obd_device *obd; | |
553 | ||
554 | spin_lock(&pet_lock); | |
555 | if (pet_state != PET_READY) { | |
556 | /* eventually the new obd will call here again. */ | |
557 | spin_unlock(&pet_lock); | |
558 | return 1; | |
559 | } | |
560 | ||
561 | obd = class_exp2obd(exp); | |
562 | if (list_empty(&obd->obd_evict_list)) { | |
563 | class_incref(obd, "evictor", obd); | |
564 | list_add(&obd->obd_evict_list, &pet_list); | |
565 | } | |
566 | spin_unlock(&pet_lock); | |
567 | ||
568 | wake_up(&pet_waitq); | |
569 | return 0; | |
570 | } | |
571 | ||
572 | static int ping_evictor_main(void *arg) | |
573 | { | |
574 | struct obd_device *obd; | |
575 | struct obd_export *exp; | |
576 | struct l_wait_info lwi = { 0 }; | |
577 | time_t expire_time; | |
d7e09d03 PT |
578 | |
579 | unshare_fs_struct(); | |
580 | ||
581 | CDEBUG(D_HA, "Starting Ping Evictor\n"); | |
582 | pet_state = PET_READY; | |
583 | while (1) { | |
584 | l_wait_event(pet_waitq, (!list_empty(&pet_list)) || | |
585 | (pet_state == PET_TERMINATE), &lwi); | |
586 | ||
587 | /* loop until all obd's will be removed */ | |
588 | if ((pet_state == PET_TERMINATE) && list_empty(&pet_list)) | |
589 | break; | |
590 | ||
591 | /* we only get here if pet_exp != NULL, and the end of this | |
592 | * loop is the only place which sets it NULL again, so lock | |
593 | * is not strictly necessary. */ | |
594 | spin_lock(&pet_lock); | |
595 | obd = list_entry(pet_list.next, struct obd_device, | |
596 | obd_evict_list); | |
597 | spin_unlock(&pet_lock); | |
598 | ||
7264b8a5 | 599 | expire_time = get_seconds() - PING_EVICT_TIMEOUT; |
d7e09d03 PT |
600 | |
601 | CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n", | |
602 | obd->obd_name, expire_time); | |
603 | ||
604 | /* Exports can't be deleted out of the list while we hold | |
605 | * the obd lock (class_unlink_export), which means we can't | |
606 | * lose the last ref on the export. If they've already been | |
607 | * removed from the list, we won't find them here. */ | |
608 | spin_lock(&obd->obd_dev_lock); | |
609 | while (!list_empty(&obd->obd_exports_timed)) { | |
610 | exp = list_entry(obd->obd_exports_timed.next, | |
611 | struct obd_export, | |
612 | exp_obd_chain_timed); | |
613 | if (expire_time > exp->exp_last_request_time) { | |
614 | class_export_get(exp); | |
615 | spin_unlock(&obd->obd_dev_lock); | |
2d00bd17 | 616 | LCONSOLE_WARN("%s: haven't heard from client %s (at %s) in %ld seconds. I think it's dead, and I am evicting it. exp %p, cur %ld expire %ld last %ld\n", |
d7e09d03 PT |
617 | obd->obd_name, |
618 | obd_uuid2str(&exp->exp_client_uuid), | |
619 | obd_export_nid2str(exp), | |
7264b8a5 | 620 | (long)(get_seconds() - |
d7e09d03 | 621 | exp->exp_last_request_time), |
7264b8a5 | 622 | exp, (long)get_seconds(), |
d7e09d03 PT |
623 | (long)expire_time, |
624 | (long)exp->exp_last_request_time); | |
625 | CDEBUG(D_HA, "Last request was at %ld\n", | |
626 | exp->exp_last_request_time); | |
627 | class_fail_export(exp); | |
628 | class_export_put(exp); | |
629 | spin_lock(&obd->obd_dev_lock); | |
630 | } else { | |
631 | /* List is sorted, so everyone below is ok */ | |
632 | break; | |
633 | } | |
634 | } | |
635 | spin_unlock(&obd->obd_dev_lock); | |
636 | ||
637 | spin_lock(&pet_lock); | |
638 | list_del_init(&obd->obd_evict_list); | |
639 | spin_unlock(&pet_lock); | |
640 | ||
641 | class_decref(obd, "evictor", obd); | |
642 | } | |
643 | CDEBUG(D_HA, "Exiting Ping Evictor\n"); | |
644 | ||
0a3bdb00 | 645 | return 0; |
d7e09d03 PT |
646 | } |
647 | ||
648 | void ping_evictor_start(void) | |
649 | { | |
68b636b6 | 650 | struct task_struct *task; |
d7e09d03 PT |
651 | |
652 | if (++pet_refcount > 1) | |
653 | return; | |
654 | ||
655 | init_waitqueue_head(&pet_waitq); | |
656 | ||
657 | task = kthread_run(ping_evictor_main, NULL, "ll_evictor"); | |
658 | if (IS_ERR(task)) { | |
659 | pet_refcount--; | |
660 | CERROR("Cannot start ping evictor thread: %ld\n", | |
661 | PTR_ERR(task)); | |
662 | } | |
663 | } | |
664 | EXPORT_SYMBOL(ping_evictor_start); | |
665 | ||
666 | void ping_evictor_stop(void) | |
667 | { | |
668 | if (--pet_refcount > 0) | |
669 | return; | |
670 | ||
671 | pet_state = PET_TERMINATE; | |
672 | wake_up(&pet_waitq); | |
673 | } | |
674 | EXPORT_SYMBOL(ping_evictor_stop); |