kernel/cpu.c

   1 /* CPU control.
   2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
   3  *
   4  * This code is licenced under the GPL.
   5  */
   6 #include <linux/proc_fs.h>
   7 #include <linux/smp.h>
   8 #include <linux/init.h>
   9 #include <linux/notifier.h>
  10 #include <linux/sched.h>
  11 #include <linux/unistd.h>
  12 #include <linux/cpu.h>
  13 #include <linux/oom.h>
  14 #include <linux/rcupdate.h>
  15 #include <linux/export.h>
  16 #include <linux/bug.h>
  17 #include <linux/kthread.h>
  18 #include <linux/stop_machine.h>
  19 #include <linux/mutex.h>
  20 #include <linux/gfp.h>
  21 #include <linux/suspend.h>
  22 #include <linux/lockdep.h>
  23 #include <linux/tick.h>
  24 #include <linux/irq.h>
  25
  26 #include <trace/events/power.h>
  27 #define CREATE_TRACE_POINTS
  28 #include <trace/events/cpuhp.h>
  29
  30 #include "smpboot.h"
  31
  32 /**
  33  * cpuhp_cpu_state - Per cpu hotplug state storage
  34  * @state:      The current cpu state
  35  * @target:     The target state
  36  */
  37 struct cpuhp_cpu_state {
  38         enum cpuhp_state        state;
  39         enum cpuhp_state        target;
  40 };
  41
  42 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
  43
  44 /**
  45  * cpuhp_step - Hotplug state machine step
  46  * @name:       Name of the step
  47  * @startup:    Startup function of the step
  48  * @teardown:   Teardown function of the step
  49  * @skip_onerr: Do not invoke the functions on error rollback
  50  *              Will go away once the notifiers are gone
  51  */
  52 struct cpuhp_step {
  53         const char      *name;
  54         int             (*startup)(unsigned int cpu);
  55         int             (*teardown)(unsigned int cpu);
  56         bool            skip_onerr;
  57 };
  58
  59 static DEFINE_MUTEX(cpuhp_state_mutex);
  60 static struct cpuhp_step cpuhp_bp_states[];
  61 static struct cpuhp_step cpuhp_ap_states[];
  62
  63 /**
  64  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
  65  * @cpu:        The cpu for which the callback should be invoked
  66  * @step:       The step in the state machine
  67  * @cb:         The callback function to invoke
  68  *
  69  * Called from cpu hotplug and from the state register machinery
  70  */
  71 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
  72                                  int (*cb)(unsigned int))
  73 {
  74         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  75         int ret = 0;
  76
  77         if (cb) {
  78                 trace_cpuhp_enter(cpu, st->target, step, cb);
  79                 ret = cb(cpu);
  80                 trace_cpuhp_exit(cpu, st->state, step, ret);
  81         }
  82         return ret;
  83 }
  84
  85 #ifdef CONFIG_SMP
  86 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
  87 static DEFINE_MUTEX(cpu_add_remove_lock);
  88 bool cpuhp_tasks_frozen;
  89 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
  90
  91 /*
  92  * The following two APIs (cpu_maps_update_begin/done) must be used when
  93  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
  94  * The APIs cpu_notifier_register_begin/done() must be used to protect CPU
  95  * hotplug callback (un)registration performed using __register_cpu_notifier()
  96  * or __unregister_cpu_notifier().
  97  */
  98 void cpu_maps_update_begin(void)
  99 {
 100         mutex_lock(&cpu_add_remove_lock);
 101 }
 102 EXPORT_SYMBOL(cpu_notifier_register_begin);
 103
 104 void cpu_maps_update_done(void)
 105 {
 106         mutex_unlock(&cpu_add_remove_lock);
 107 }
 108 EXPORT_SYMBOL(cpu_notifier_register_done);
 109
 110 static RAW_NOTIFIER_HEAD(cpu_chain);
 111
 112 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 113  * Should always be manipulated under cpu_add_remove_lock
 114  */
 115 static int cpu_hotplug_disabled;
 116
 117 #ifdef CONFIG_HOTPLUG_CPU
 118
 119 static struct {
 120         struct task_struct *active_writer;
 121         /* wait queue to wake up the active_writer */
 122         wait_queue_head_t wq;
 123         /* verifies that no writer will get active while readers are active */
 124         struct mutex lock;
 125         /*
 126          * Also blocks the new readers during
 127          * an ongoing cpu hotplug operation.
 128          */
 129         atomic_t refcount;
 130
 131 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 132         struct lockdep_map dep_map;
 133 #endif
 134 } cpu_hotplug = {
 135         .active_writer = NULL,
 136         .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
 137         .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
 138 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 139         .dep_map = {.name = "cpu_hotplug.lock" },
 140 #endif
 141 };
 142
 143 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
 144 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
 145 #define cpuhp_lock_acquire_tryread() \
 146                                   lock_map_acquire_tryread(&cpu_hotplug.dep_map)
 147 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
 148 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
 149
 150
 151 void get_online_cpus(void)
 152 {
 153         might_sleep();
 154         if (cpu_hotplug.active_writer == current)
 155                 return;
 156         cpuhp_lock_acquire_read();
 157         mutex_lock(&cpu_hotplug.lock);
 158         atomic_inc(&cpu_hotplug.refcount);
 159         mutex_unlock(&cpu_hotplug.lock);
 160 }
 161 EXPORT_SYMBOL_GPL(get_online_cpus);
 162
 163 void put_online_cpus(void)
 164 {
 165         int refcount;
 166
 167         if (cpu_hotplug.active_writer == current)
 168                 return;
 169
 170         refcount = atomic_dec_return(&cpu_hotplug.refcount);
 171         if (WARN_ON(refcount < 0)) /* try to fix things up */
 172                 atomic_inc(&cpu_hotplug.refcount);
 173
 174         if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
 175                 wake_up(&cpu_hotplug.wq);
 176
 177         cpuhp_lock_release();
 178
 179 }
 180 EXPORT_SYMBOL_GPL(put_online_cpus);
 181
 182 /*
 183  * This ensures that the hotplug operation can begin only when the
 184  * refcount goes to zero.
 185  *
 186  * Note that during a cpu-hotplug operation, the new readers, if any,
 187  * will be blocked by the cpu_hotplug.lock
 188  *
 189  * Since cpu_hotplug_begin() is always called after invoking
 190  * cpu_maps_update_begin(), we can be sure that only one writer is active.
 191  *
 192  * Note that theoretically, there is a possibility of a livelock:
 193  * - Refcount goes to zero, last reader wakes up the sleeping
 194  *   writer.
 195  * - Last reader unlocks the cpu_hotplug.lock.
 196  * - A new reader arrives at this moment, bumps up the refcount.
 197  * - The writer acquires the cpu_hotplug.lock finds the refcount
 198  *   non zero and goes to sleep again.
 199  *
 200  * However, this is very difficult to achieve in practice since
 201  * get_online_cpus() not an api which is called all that often.
 202  *
 203  */
 204 void cpu_hotplug_begin(void)
 205 {
 206         DEFINE_WAIT(wait);
 207
 208         cpu_hotplug.active_writer = current;
 209         cpuhp_lock_acquire();
 210
 211         for (;;) {
 212                 mutex_lock(&cpu_hotplug.lock);
 213                 prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
 214                 if (likely(!atomic_read(&cpu_hotplug.refcount)))
 215                                 break;
 216                 mutex_unlock(&cpu_hotplug.lock);
 217                 schedule();
 218         }
 219         finish_wait(&cpu_hotplug.wq, &wait);
 220 }
 221
 222 void cpu_hotplug_done(void)
 223 {
 224         cpu_hotplug.active_writer = NULL;
 225         mutex_unlock(&cpu_hotplug.lock);
 226         cpuhp_lock_release();
 227 }
 228
 229 /*
 230  * Wait for currently running CPU hotplug operations to complete (if any) and
 231  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 232  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 233  * hotplug path before performing hotplug operations. So acquiring that lock
 234  * guarantees mutual exclusion from any currently running hotplug operations.
 235  */
 236 void cpu_hotplug_disable(void)
 237 {
 238         cpu_maps_update_begin();
 239         cpu_hotplug_disabled++;
 240         cpu_maps_update_done();
 241 }
 242 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
 243
 244 void cpu_hotplug_enable(void)
 245 {
 246         cpu_maps_update_begin();
 247         WARN_ON(--cpu_hotplug_disabled < 0);
 248         cpu_maps_update_done();
 249 }
 250 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 251 #endif  /* CONFIG_HOTPLUG_CPU */
 252
 253 /* Need to know about CPUs going up/down? */
 254 int register_cpu_notifier(struct notifier_block *nb)
 255 {
 256         int ret;
 257         cpu_maps_update_begin();
 258         ret = raw_notifier_chain_register(&cpu_chain, nb);
 259         cpu_maps_update_done();
 260         return ret;
 261 }
 262
 263 int __register_cpu_notifier(struct notifier_block *nb)
 264 {
 265         return raw_notifier_chain_register(&cpu_chain, nb);
 266 }
 267
 268 static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
 269                         int *nr_calls)
 270 {
 271         unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
 272         void *hcpu = (void *)(long)cpu;
 273
 274         int ret;
 275
 276         ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
 277                                         nr_calls);
 278
 279         return notifier_to_errno(ret);
 280 }
 281
 282 static int cpu_notify(unsigned long val, unsigned int cpu)
 283 {
 284         return __cpu_notify(val, cpu, -1, NULL);
 285 }
 286
 287 /* Notifier wrappers for transitioning to state machine */
 288 static int notify_prepare(unsigned int cpu)
 289 {
 290         int nr_calls = 0;
 291         int ret;
 292
 293         ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
 294         if (ret) {
 295                 nr_calls--;
 296                 printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
 297                                 __func__, cpu);
 298                 __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
 299         }
 300         return ret;
 301 }
 302
 303 static int notify_online(unsigned int cpu)
 304 {
 305         cpu_notify(CPU_ONLINE, cpu);
 306         return 0;
 307 }
 308
 309 static int notify_starting(unsigned int cpu)
 310 {
 311         cpu_notify(CPU_STARTING, cpu);
 312         return 0;
 313 }
 314
 315 static int bringup_cpu(unsigned int cpu)
 316 {
 317         struct task_struct *idle = idle_thread_get(cpu);
 318         int ret;
 319
 320         /* Arch-specific enabling code. */
 321         ret = __cpu_up(cpu, idle);
 322         if (ret) {
 323                 cpu_notify(CPU_UP_CANCELED, cpu);
 324                 return ret;
 325         }
 326         BUG_ON(!cpu_online(cpu));
 327         return 0;
 328 }
 329
 330 #ifdef CONFIG_HOTPLUG_CPU
 331 EXPORT_SYMBOL(register_cpu_notifier);
 332 EXPORT_SYMBOL(__register_cpu_notifier);
 333
 334 void unregister_cpu_notifier(struct notifier_block *nb)
 335 {
 336         cpu_maps_update_begin();
 337         raw_notifier_chain_unregister(&cpu_chain, nb);
 338         cpu_maps_update_done();
 339 }
 340 EXPORT_SYMBOL(unregister_cpu_notifier);
 341
 342 void __unregister_cpu_notifier(struct notifier_block *nb)
 343 {
 344         raw_notifier_chain_unregister(&cpu_chain, nb);
 345 }
 346 EXPORT_SYMBOL(__unregister_cpu_notifier);
 347
 348 /**
 349  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 350  * @cpu: a CPU id
 351  *
 352  * This function walks all processes, finds a valid mm struct for each one and
 353  * then clears a corresponding bit in mm's cpumask.  While this all sounds
 354  * trivial, there are various non-obvious corner cases, which this function
 355  * tries to solve in a safe manner.
 356  *
 357  * Also note that the function uses a somewhat relaxed locking scheme, so it may
 358  * be called only for an already offlined CPU.
 359  */
 360 void clear_tasks_mm_cpumask(int cpu)
 361 {
 362         struct task_struct *p;
 363
 364         /*
 365          * This function is called after the cpu is taken down and marked
 366          * offline, so its not like new tasks will ever get this cpu set in
 367          * their mm mask. -- Peter Zijlstra
 368          * Thus, we may use rcu_read_lock() here, instead of grabbing
 369          * full-fledged tasklist_lock.
 370          */
 371         WARN_ON(cpu_online(cpu));
 372         rcu_read_lock();
 373         for_each_process(p) {
 374                 struct task_struct *t;
 375
 376                 /*
 377                  * Main thread might exit, but other threads may still have
 378                  * a valid mm. Find one.
 379                  */
 380                 t = find_lock_task_mm(p);
 381                 if (!t)
 382                         continue;
 383                 cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
 384                 task_unlock(t);
 385         }
 386         rcu_read_unlock();
 387 }
 388
 389 static inline void check_for_tasks(int dead_cpu)
 390 {
 391         struct task_struct *g, *p;
 392
 393         read_lock(&tasklist_lock);
 394         for_each_process_thread(g, p) {
 395                 if (!p->on_rq)
 396                         continue;
 397                 /*
 398                  * We do the check with unlocked task_rq(p)->lock.
 399                  * Order the reading to do not warn about a task,
 400                  * which was running on this cpu in the past, and
 401                  * it's just been woken on another cpu.
 402                  */
 403                 rmb();
 404                 if (task_cpu(p) != dead_cpu)
 405                         continue;
 406
 407                 pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
 408                         p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
 409         }
 410         read_unlock(&tasklist_lock);
 411 }
 412
 413 static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
 414 {
 415         BUG_ON(cpu_notify(val, cpu));
 416 }
 417
 418 static int notify_down_prepare(unsigned int cpu)
 419 {
 420         int err, nr_calls = 0;
 421
 422         err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
 423         if (err) {
 424                 nr_calls--;
 425                 __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
 426                 pr_warn("%s: attempt to take down CPU %u failed\n",
 427                                 __func__, cpu);
 428         }
 429         return err;
 430 }
 431
 432 static int notify_dying(unsigned int cpu)
 433 {
 434         cpu_notify(CPU_DYING, cpu);
 435         return 0;
 436 }
 437
 438 /* Take this CPU down. */
 439 static int take_cpu_down(void *_param)
 440 {
 441         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 442         enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
 443         int err, cpu = smp_processor_id();
 444
 445         /* Ensure this CPU doesn't handle any more interrupts. */
 446         err = __cpu_disable();
 447         if (err < 0)
 448                 return err;
 449
 450         /* Invoke the former CPU_DYING callbacks */
 451         for (; st->state > target; st->state--) {
 452                 struct cpuhp_step *step = cpuhp_ap_states + st->state;
 453
 454                 cpuhp_invoke_callback(cpu, st->state, step->teardown);
 455         }
 456         /* Give up timekeeping duties */
 457         tick_handover_do_timer();
 458         /* Park the stopper thread */
 459         stop_machine_park(cpu);
 460         return 0;
 461 }
 462
 463 static int takedown_cpu(unsigned int cpu)
 464 {
 465         int err;
 466
 467         /*
 468          * By now we've cleared cpu_active_mask, wait for all preempt-disabled
 469          * and RCU users of this state to go away such that all new such users
 470          * will observe it.
 471          *
 472          * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
 473          * not imply sync_sched(), so wait for both.
 474          *
 475          * Do sync before park smpboot threads to take care the rcu boost case.
 476          */
 477         if (IS_ENABLED(CONFIG_PREEMPT))
 478                 synchronize_rcu_mult(call_rcu, call_rcu_sched);
 479         else
 480                 synchronize_rcu();
 481
 482         smpboot_park_threads(cpu);
 483
 484         /*
 485          * Prevent irq alloc/free while the dying cpu reorganizes the
 486          * interrupt affinities.
 487          */
 488         irq_lock_sparse();
 489
 490         /*
 491          * So now all preempt/rcu users must observe !cpu_active().
 492          */
 493         err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
 494         if (err) {
 495                 /* CPU didn't die: tell everyone.  Can't complain. */
 496                 cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
 497                 irq_unlock_sparse();
 498                 return err;
 499         }
 500         BUG_ON(cpu_online(cpu));
 501
 502         /*
 503          * The migration_call() CPU_DYING callback will have removed all
 504          * runnable tasks from the cpu, there's only the idle task left now
 505          * that the migration thread is done doing the stop_machine thing.
 506          *
 507          * Wait for the stop thread to go away.
 508          */
 509         while (!per_cpu(cpu_dead_idle, cpu))
 510                 cpu_relax();
 511         smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
 512         per_cpu(cpu_dead_idle, cpu) = false;
 513
 514         /* Interrupts are moved away from the dying cpu, reenable alloc/free */
 515         irq_unlock_sparse();
 516
 517         hotplug_cpu__broadcast_tick_pull(cpu);
 518         /* This actually kills the CPU. */
 519         __cpu_die(cpu);
 520
 521         tick_cleanup_dead_cpu(cpu);
 522         return 0;
 523 }
 524
 525 static int notify_dead(unsigned int cpu)
 526 {
 527         cpu_notify_nofail(CPU_DEAD, cpu);
 528         check_for_tasks(cpu);
 529         return 0;
 530 }
 531
 532 #else
 533 #define notify_down_prepare     NULL
 534 #define takedown_cpu            NULL
 535 #define notify_dead             NULL
 536 #define notify_dying            NULL
 537 #endif
 538
 539 #ifdef CONFIG_HOTPLUG_CPU
 540 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
 541 {
 542         for (st->state++; st->state < st->target; st->state++) {
 543                 struct cpuhp_step *step = cpuhp_bp_states + st->state;
 544
 545                 if (!step->skip_onerr)
 546                         cpuhp_invoke_callback(cpu, st->state, step->startup);
 547         }
 548 }
 549
 550 /* Requires cpu_add_remove_lock to be held */
 551 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 552                            enum cpuhp_state target)
 553 {
 554         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 555         int prev_state, ret = 0;
 556         bool hasdied = false;
 557
 558         if (num_online_cpus() == 1)
 559                 return -EBUSY;
 560
 561         if (!cpu_online(cpu))
 562                 return -EINVAL;
 563
 564         cpu_hotplug_begin();
 565
 566         cpuhp_tasks_frozen = tasks_frozen;
 567
 568         prev_state = st->state;
 569         st->target = target;
 570         for (; st->state > st->target; st->state--) {
 571                 struct cpuhp_step *step = cpuhp_bp_states + st->state;
 572
 573                 ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
 574                 if (ret) {
 575                         st->target = prev_state;
 576                         undo_cpu_down(cpu, st);
 577                         break;
 578                 }
 579         }
 580         hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
 581
 582         cpu_hotplug_done();
 583         /* This post dead nonsense must die */
 584         if (!ret && hasdied)
 585                 cpu_notify_nofail(CPU_POST_DEAD, cpu);
 586         return ret;
 587 }
 588
 589 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
 590 {
 591         int err;
 592
 593         cpu_maps_update_begin();
 594
 595         if (cpu_hotplug_disabled) {
 596                 err = -EBUSY;
 597                 goto out;
 598         }
 599
 600         err = _cpu_down(cpu, 0, target);
 601
 602 out:
 603         cpu_maps_update_done();
 604         return err;
 605 }
 606 int cpu_down(unsigned int cpu)
 607 {
 608         return do_cpu_down(cpu, CPUHP_OFFLINE);
 609 }
 610 EXPORT_SYMBOL(cpu_down);
 611 #endif /*CONFIG_HOTPLUG_CPU*/
 612
 613 /*
 614  * Unpark per-CPU smpboot kthreads at CPU-online time.
 615  */
 616 static int smpboot_thread_call(struct notifier_block *nfb,
 617                                unsigned long action, void *hcpu)
 618 {
 619         int cpu = (long)hcpu;
 620
 621         switch (action & ~CPU_TASKS_FROZEN) {
 622
 623         case CPU_DOWN_FAILED:
 624         case CPU_ONLINE:
 625                 smpboot_unpark_threads(cpu);
 626                 break;
 627
 628         default:
 629                 break;
 630         }
 631
 632         return NOTIFY_OK;
 633 }
 634
 635 static struct notifier_block smpboot_thread_notifier = {
 636         .notifier_call = smpboot_thread_call,
 637         .priority = CPU_PRI_SMPBOOT,
 638 };
 639
 640 void smpboot_thread_init(void)
 641 {
 642         register_cpu_notifier(&smpboot_thread_notifier);
 643 }
 644
 645 /**
 646  * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 647  * @cpu: cpu that just started
 648  *
 649  * This function calls the cpu_chain notifiers with CPU_STARTING.
 650  * It must be called by the arch code on the new cpu, before the new cpu
 651  * enables interrupts and before the "boot" cpu returns from __cpu_up().
 652  */
 653 void notify_cpu_starting(unsigned int cpu)
 654 {
 655         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 656         enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
 657
 658         while (st->state < target) {
 659                 struct cpuhp_step *step;
 660
 661                 st->state++;
 662                 step = cpuhp_ap_states + st->state;
 663                 cpuhp_invoke_callback(cpu, st->state, step->startup);
 664         }
 665 }
 666
 667 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 668 {
 669         for (st->state--; st->state > st->target; st->state--) {
 670                 struct cpuhp_step *step = cpuhp_bp_states + st->state;
 671
 672                 if (!step->skip_onerr)
 673                         cpuhp_invoke_callback(cpu, st->state, step->teardown);
 674         }
 675 }
 676
 677 /* Requires cpu_add_remove_lock to be held */
 678 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 679 {
 680         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 681         struct task_struct *idle;
 682         int prev_state, ret = 0;
 683
 684         cpu_hotplug_begin();
 685
 686         if (cpu_online(cpu) || !cpu_present(cpu)) {
 687                 ret = -EINVAL;
 688                 goto out;
 689         }
 690
 691         /* Let it fail before we try to bring the cpu up */
 692         idle = idle_thread_get(cpu);
 693         if (IS_ERR(idle)) {
 694                 ret = PTR_ERR(idle);
 695                 goto out;
 696         }
 697
 698         cpuhp_tasks_frozen = tasks_frozen;
 699
 700         prev_state = st->state;
 701         st->target = target;
 702         while (st->state < st->target) {
 703                 struct cpuhp_step *step;
 704
 705                 st->state++;
 706                 step = cpuhp_bp_states + st->state;
 707                 ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
 708                 if (ret) {
 709                         st->target = prev_state;
 710                         undo_cpu_up(cpu, st);
 711                         break;
 712                 }
 713         }
 714 out:
 715         cpu_hotplug_done();
 716         return ret;
 717 }
 718
 719 static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
 720 {
 721         int err = 0;
 722
 723         if (!cpu_possible(cpu)) {
 724                 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
 725                        cpu);
 726 #if defined(CONFIG_IA64)
 727                 pr_err("please check additional_cpus= boot parameter\n");
 728 #endif
 729                 return -EINVAL;
 730         }
 731
 732         err = try_online_node(cpu_to_node(cpu));
 733         if (err)
 734                 return err;
 735
 736         cpu_maps_update_begin();
 737
 738         if (cpu_hotplug_disabled) {
 739                 err = -EBUSY;
 740                 goto out;
 741         }
 742
 743         err = _cpu_up(cpu, 0, target);
 744 out:
 745         cpu_maps_update_done();
 746         return err;
 747 }
 748
 749 int cpu_up(unsigned int cpu)
 750 {
 751         return do_cpu_up(cpu, CPUHP_ONLINE);
 752 }
 753 EXPORT_SYMBOL_GPL(cpu_up);
 754
 755 #ifdef CONFIG_PM_SLEEP_SMP
 756 static cpumask_var_t frozen_cpus;
 757
 758 int disable_nonboot_cpus(void)
 759 {
 760         int cpu, first_cpu, error = 0;
 761
 762         cpu_maps_update_begin();
 763         first_cpu = cpumask_first(cpu_online_mask);
 764         /*
 765          * We take down all of the non-boot CPUs in one shot to avoid races
 766          * with the userspace trying to use the CPU hotplug at the same time
 767          */
 768         cpumask_clear(frozen_cpus);
 769
 770         pr_info("Disabling non-boot CPUs ...\n");
 771         for_each_online_cpu(cpu) {
 772                 if (cpu == first_cpu)
 773                         continue;
 774                 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
 775                 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
 776                 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
 777                 if (!error)
 778                         cpumask_set_cpu(cpu, frozen_cpus);
 779                 else {
 780                         pr_err("Error taking CPU%d down: %d\n", cpu, error);
 781                         break;
 782                 }
 783         }
 784
 785         if (!error)
 786                 BUG_ON(num_online_cpus() > 1);
 787         else
 788                 pr_err("Non-boot CPUs are not disabled\n");
 789
 790         /*
 791          * Make sure the CPUs won't be enabled by someone else. We need to do
 792          * this even in case of failure as all disable_nonboot_cpus() users are
 793          * supposed to do enable_nonboot_cpus() on the failure path.
 794          */
 795         cpu_hotplug_disabled++;
 796
 797         cpu_maps_update_done();
 798         return error;
 799 }
 800
 801 void __weak arch_enable_nonboot_cpus_begin(void)
 802 {
 803 }
 804
 805 void __weak arch_enable_nonboot_cpus_end(void)
 806 {
 807 }
 808
 809 void enable_nonboot_cpus(void)
 810 {
 811         int cpu, error;
 812
 813         /* Allow everyone to use the CPU hotplug again */
 814         cpu_maps_update_begin();
 815         WARN_ON(--cpu_hotplug_disabled < 0);
 816         if (cpumask_empty(frozen_cpus))
 817                 goto out;
 818
 819         pr_info("Enabling non-boot CPUs ...\n");
 820
 821         arch_enable_nonboot_cpus_begin();
 822
 823         for_each_cpu(cpu, frozen_cpus) {
 824                 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
 825                 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
 826                 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 827                 if (!error) {
 828                         pr_info("CPU%d is up\n", cpu);
 829                         continue;
 830                 }
 831                 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
 832         }
 833
 834         arch_enable_nonboot_cpus_end();
 835
 836         cpumask_clear(frozen_cpus);
 837 out:
 838         cpu_maps_update_done();
 839 }
 840
 841 static int __init alloc_frozen_cpus(void)
 842 {
 843         if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
 844                 return -ENOMEM;
 845         return 0;
 846 }
 847 core_initcall(alloc_frozen_cpus);
 848
 849 /*
 850  * When callbacks for CPU hotplug notifications are being executed, we must
 851  * ensure that the state of the system with respect to the tasks being frozen
 852  * or not, as reported by the notification, remains unchanged *throughout the
 853  * duration* of the execution of the callbacks.
 854  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 855  *
 856  * This synchronization is implemented by mutually excluding regular CPU
 857  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 858  * Hibernate notifications.
 859  */
 860 static int
 861 cpu_hotplug_pm_callback(struct notifier_block *nb,
 862                         unsigned long action, void *ptr)
 863 {
 864         switch (action) {
 865
 866         case PM_SUSPEND_PREPARE:
 867         case PM_HIBERNATION_PREPARE:
 868                 cpu_hotplug_disable();
 869                 break;
 870
 871         case PM_POST_SUSPEND:
 872         case PM_POST_HIBERNATION:
 873                 cpu_hotplug_enable();
 874                 break;
 875
 876         default:
 877                 return NOTIFY_DONE;
 878         }
 879
 880         return NOTIFY_OK;
 881 }
 882
 883
 884 static int __init cpu_hotplug_pm_sync_init(void)
 885 {
 886         /*
 887          * cpu_hotplug_pm_callback has higher priority than x86
 888          * bsp_pm_callback which depends on cpu_hotplug_pm_callback
 889          * to disable cpu hotplug to avoid cpu hotplug race.
 890          */
 891         pm_notifier(cpu_hotplug_pm_callback, 0);
 892         return 0;
 893 }
 894 core_initcall(cpu_hotplug_pm_sync_init);
 895
 896 #endif /* CONFIG_PM_SLEEP_SMP */
 897
 898 #endif /* CONFIG_SMP */
 899
 900 /* Boot processor state steps */
 901 static struct cpuhp_step cpuhp_bp_states[] = {
 902         [CPUHP_OFFLINE] = {
 903                 .name                   = "offline",
 904                 .startup                = NULL,
 905                 .teardown               = NULL,
 906         },
 907 #ifdef CONFIG_SMP
 908         [CPUHP_CREATE_THREADS]= {
 909                 .name                   = "threads:create",
 910                 .startup                = smpboot_create_threads,
 911                 .teardown               = NULL,
 912         },
 913         [CPUHP_NOTIFY_PREPARE] = {
 914                 .name                   = "notify:prepare",
 915                 .startup                = notify_prepare,
 916                 .teardown               = notify_dead,
 917                 .skip_onerr             = true,
 918         },
 919         [CPUHP_BRINGUP_CPU] = {
 920                 .name                   = "cpu:bringup",
 921                 .startup                = bringup_cpu,
 922                 .teardown               = NULL,
 923         },
 924         [CPUHP_TEARDOWN_CPU] = {
 925                 .name                   = "cpu:teardown",
 926                 .startup                = NULL,
 927                 .teardown               = takedown_cpu,
 928         },
 929         [CPUHP_NOTIFY_ONLINE] = {
 930                 .name                   = "notify:online",
 931                 .startup                = notify_online,
 932                 .teardown               = notify_down_prepare,
 933         },
 934 #endif
 935         [CPUHP_ONLINE] = {
 936                 .name                   = "online",
 937                 .startup                = NULL,
 938                 .teardown               = NULL,
 939         },
 940 };
 941
 942 /* Application processor state steps */
 943 static struct cpuhp_step cpuhp_ap_states[] = {
 944 #ifdef CONFIG_SMP
 945         [CPUHP_AP_NOTIFY_STARTING] = {
 946                 .name                   = "notify:starting",
 947                 .startup                = notify_starting,
 948                 .teardown               = notify_dying,
 949                 .skip_onerr             = true,
 950         },
 951 #endif
 952         [CPUHP_ONLINE] = {
 953                 .name                   = "online",
 954                 .startup                = NULL,
 955                 .teardown               = NULL,
 956         },
 957 };
 958
 959 static bool cpuhp_is_ap_state(enum cpuhp_state state)
 960 {
 961         return (state > CPUHP_AP_OFFLINE && state < CPUHP_AP_ONLINE);
 962 }
 963
 964 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 965 {
 966         struct cpuhp_step *sp;
 967
 968         sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
 969         return sp + state;
 970 }
 971
 972 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
 973 static ssize_t show_cpuhp_state(struct device *dev,
 974                                 struct device_attribute *attr, char *buf)
 975 {
 976         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
 977
 978         return sprintf(buf, "%d\n", st->state);
 979 }
 980 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
 981
 982 static ssize_t show_cpuhp_target(struct device *dev,
 983                                  struct device_attribute *attr, char *buf)
 984 {
 985         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
 986
 987         return sprintf(buf, "%d\n", st->target);
 988 }
 989 static DEVICE_ATTR(target, 0444, show_cpuhp_target, NULL);
 990
 991 static struct attribute *cpuhp_cpu_attrs[] = {
 992         &dev_attr_state.attr,
 993         &dev_attr_target.attr,
 994         NULL
 995 };
 996
 997 static struct attribute_group cpuhp_cpu_attr_group = {
 998         .attrs = cpuhp_cpu_attrs,
 999         .name = "hotplug",
1000         NULL
1001 };
1002
1003 static ssize_t show_cpuhp_states(struct device *dev,
1004                                  struct device_attribute *attr, char *buf)
1005 {
1006         ssize_t cur, res = 0;
1007         int i;
1008
1009         mutex_lock(&cpuhp_state_mutex);
1010         for (i = 0; i <= CPUHP_ONLINE; i++) {
1011                 struct cpuhp_step *sp = cpuhp_get_step(i);
1012
1013                 if (sp->name) {
1014                         cur = sprintf(buf, "%3d: %s\n", i, sp->name);
1015                         buf += cur;
1016                         res += cur;
1017                 }
1018         }
1019         mutex_unlock(&cpuhp_state_mutex);
1020         return res;
1021 }
1022 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
1023
1024 static struct attribute *cpuhp_cpu_root_attrs[] = {
1025         &dev_attr_states.attr,
1026         NULL
1027 };
1028
1029 static struct attribute_group cpuhp_cpu_root_attr_group = {
1030         .attrs = cpuhp_cpu_root_attrs,
1031         .name = "hotplug",
1032         NULL
1033 };
1034
1035 static int __init cpuhp_sysfs_init(void)
1036 {
1037         int cpu, ret;
1038
1039         ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
1040                                  &cpuhp_cpu_root_attr_group);
1041         if (ret)
1042                 return ret;
1043
1044         for_each_possible_cpu(cpu) {
1045                 struct device *dev = get_cpu_device(cpu);
1046
1047                 if (!dev)
1048                         continue;
1049                 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
1050                 if (ret)
1051                         return ret;
1052         }
1053         return 0;
1054 }
1055 device_initcall(cpuhp_sysfs_init);
1056 #endif
1057
1058 /*
1059  * cpu_bit_bitmap[] is a special, "compressed" data structure that
1060  * represents all NR_CPUS bits binary values of 1<<nr.
1061  *
1062  * It is used by cpumask_of() to get a constant address to a CPU
1063  * mask value that has a single bit set only.
1064  */
1065
1066 /* cpu_bit_bitmap[0] is empty - so we can back into it */
1067 #define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
1068 #define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
1069 #define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
1070 #define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1071
1072 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
1073
1074         MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
1075         MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
1076 #if BITS_PER_LONG > 32
1077         MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
1078         MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
1079 #endif
1080 };
1081 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1082
1083 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
1084 EXPORT_SYMBOL(cpu_all_bits);
1085
1086 #ifdef CONFIG_INIT_ALL_POSSIBLE
1087 struct cpumask __cpu_possible_mask __read_mostly
1088         = {CPU_BITS_ALL};
1089 #else
1090 struct cpumask __cpu_possible_mask __read_mostly;
1091 #endif
1092 EXPORT_SYMBOL(__cpu_possible_mask);
1093
1094 struct cpumask __cpu_online_mask __read_mostly;
1095 EXPORT_SYMBOL(__cpu_online_mask);
1096
1097 struct cpumask __cpu_present_mask __read_mostly;
1098 EXPORT_SYMBOL(__cpu_present_mask);
1099
1100 struct cpumask __cpu_active_mask __read_mostly;
1101 EXPORT_SYMBOL(__cpu_active_mask);
1102
1103 void init_cpu_present(const struct cpumask *src)
1104 {
1105         cpumask_copy(&__cpu_present_mask, src);
1106 }
1107
1108 void init_cpu_possible(const struct cpumask *src)
1109 {
1110         cpumask_copy(&__cpu_possible_mask, src);
1111 }
1112
1113 void init_cpu_online(const struct cpumask *src)
1114 {
1115         cpumask_copy(&__cpu_online_mask, src);
1116 }
1117
1118 /*
1119  * Activate the first processor.
1120  */
1121 void __init boot_cpu_init(void)
1122 {
1123         int cpu = smp_processor_id();
1124
1125         /* Mark the boot cpu "present", "online" etc for SMP and UP case */
1126         set_cpu_online(cpu, true);
1127         set_cpu_active(cpu, true);
1128         set_cpu_present(cpu, true);
1129         set_cpu_possible(cpu, true);
1130 }
1131
1132 /*
1133  * Must be called _AFTER_ setting up the per_cpu areas
1134  */
1135 void __init boot_cpu_state_init(void)
1136 {
1137         per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
1138 }