| 1 | /* |
| 2 | * The "user cache". |
| 3 | * |
| 4 | * (C) Copyright 1991-2000 Linus Torvalds |
| 5 | * |
| 6 | * We have a per-user structure to keep track of how many |
| 7 | * processes, files etc the user has claimed, in order to be |
| 8 | * able to have per-user limits for system resources. |
| 9 | */ |
| 10 | |
| 11 | #include <linux/init.h> |
| 12 | #include <linux/sched.h> |
| 13 | #include <linux/slab.h> |
| 14 | #include <linux/bitops.h> |
| 15 | #include <linux/key.h> |
| 16 | #include <linux/interrupt.h> |
| 17 | #include <linux/module.h> |
| 18 | #include <linux/user_namespace.h> |
| 19 | #include "cred-internals.h" |
| 20 | |
| 21 | struct user_namespace init_user_ns = { |
| 22 | .kref = { |
| 23 | .refcount = ATOMIC_INIT(2), |
| 24 | }, |
| 25 | .creator = &root_user, |
| 26 | }; |
| 27 | EXPORT_SYMBOL_GPL(init_user_ns); |
| 28 | |
| 29 | /* |
| 30 | * UID task count cache, to get fast user lookup in "alloc_uid" |
| 31 | * when changing user ID's (ie setuid() and friends). |
| 32 | */ |
| 33 | |
| 34 | #define UIDHASH_MASK (UIDHASH_SZ - 1) |
| 35 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) |
| 36 | #define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid))) |
| 37 | |
| 38 | static struct kmem_cache *uid_cachep; |
| 39 | |
| 40 | /* |
| 41 | * The uidhash_lock is mostly taken from process context, but it is |
| 42 | * occasionally also taken from softirq/tasklet context, when |
| 43 | * task-structs get RCU-freed. Hence all locking must be softirq-safe. |
| 44 | * But free_uid() is also called with local interrupts disabled, and running |
| 45 | * local_bh_enable() with local interrupts disabled is an error - we'll run |
| 46 | * softirq callbacks, and they can unconditionally enable interrupts, and |
| 47 | * the caller of free_uid() didn't expect that.. |
| 48 | */ |
| 49 | static DEFINE_SPINLOCK(uidhash_lock); |
| 50 | |
| 51 | /* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ |
| 52 | struct user_struct root_user = { |
| 53 | .__count = ATOMIC_INIT(2), |
| 54 | .processes = ATOMIC_INIT(1), |
| 55 | .files = ATOMIC_INIT(0), |
| 56 | .sigpending = ATOMIC_INIT(0), |
| 57 | .locked_shm = 0, |
| 58 | .user_ns = &init_user_ns, |
| 59 | #ifdef CONFIG_USER_SCHED |
| 60 | .tg = &init_task_group, |
| 61 | #endif |
| 62 | }; |
| 63 | |
| 64 | /* |
| 65 | * These routines must be called with the uidhash spinlock held! |
| 66 | */ |
| 67 | static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) |
| 68 | { |
| 69 | hlist_add_head(&up->uidhash_node, hashent); |
| 70 | } |
| 71 | |
| 72 | static void uid_hash_remove(struct user_struct *up) |
| 73 | { |
| 74 | hlist_del_init(&up->uidhash_node); |
| 75 | put_user_ns(up->user_ns); |
| 76 | } |
| 77 | |
| 78 | static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) |
| 79 | { |
| 80 | struct user_struct *user; |
| 81 | struct hlist_node *h; |
| 82 | |
| 83 | hlist_for_each_entry(user, h, hashent, uidhash_node) { |
| 84 | if (user->uid == uid) { |
| 85 | atomic_inc(&user->__count); |
| 86 | return user; |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | return NULL; |
| 91 | } |
| 92 | |
| 93 | #ifdef CONFIG_USER_SCHED |
| 94 | |
| 95 | static void sched_destroy_user(struct user_struct *up) |
| 96 | { |
| 97 | sched_destroy_group(up->tg); |
| 98 | } |
| 99 | |
| 100 | static int sched_create_user(struct user_struct *up) |
| 101 | { |
| 102 | int rc = 0; |
| 103 | |
| 104 | up->tg = sched_create_group(&root_task_group); |
| 105 | if (IS_ERR(up->tg)) |
| 106 | rc = -ENOMEM; |
| 107 | |
| 108 | set_tg_uid(up); |
| 109 | |
| 110 | return rc; |
| 111 | } |
| 112 | |
| 113 | #else /* CONFIG_USER_SCHED */ |
| 114 | |
| 115 | static void sched_destroy_user(struct user_struct *up) { } |
| 116 | static int sched_create_user(struct user_struct *up) { return 0; } |
| 117 | |
| 118 | #endif /* CONFIG_USER_SCHED */ |
| 119 | |
| 120 | #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) |
| 121 | |
| 122 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ |
| 123 | static DEFINE_MUTEX(uids_mutex); |
| 124 | |
| 125 | static inline void uids_mutex_lock(void) |
| 126 | { |
| 127 | mutex_lock(&uids_mutex); |
| 128 | } |
| 129 | |
| 130 | static inline void uids_mutex_unlock(void) |
| 131 | { |
| 132 | mutex_unlock(&uids_mutex); |
| 133 | } |
| 134 | |
| 135 | /* uid directory attributes */ |
| 136 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 137 | static ssize_t cpu_shares_show(struct kobject *kobj, |
| 138 | struct kobj_attribute *attr, |
| 139 | char *buf) |
| 140 | { |
| 141 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); |
| 142 | |
| 143 | return sprintf(buf, "%lu\n", sched_group_shares(up->tg)); |
| 144 | } |
| 145 | |
| 146 | static ssize_t cpu_shares_store(struct kobject *kobj, |
| 147 | struct kobj_attribute *attr, |
| 148 | const char *buf, size_t size) |
| 149 | { |
| 150 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); |
| 151 | unsigned long shares; |
| 152 | int rc; |
| 153 | |
| 154 | sscanf(buf, "%lu", &shares); |
| 155 | |
| 156 | rc = sched_group_set_shares(up->tg, shares); |
| 157 | |
| 158 | return (rc ? rc : size); |
| 159 | } |
| 160 | |
| 161 | static struct kobj_attribute cpu_share_attr = |
| 162 | __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); |
| 163 | #endif |
| 164 | |
| 165 | #ifdef CONFIG_RT_GROUP_SCHED |
| 166 | static ssize_t cpu_rt_runtime_show(struct kobject *kobj, |
| 167 | struct kobj_attribute *attr, |
| 168 | char *buf) |
| 169 | { |
| 170 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); |
| 171 | |
| 172 | return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg)); |
| 173 | } |
| 174 | |
| 175 | static ssize_t cpu_rt_runtime_store(struct kobject *kobj, |
| 176 | struct kobj_attribute *attr, |
| 177 | const char *buf, size_t size) |
| 178 | { |
| 179 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); |
| 180 | unsigned long rt_runtime; |
| 181 | int rc; |
| 182 | |
| 183 | sscanf(buf, "%ld", &rt_runtime); |
| 184 | |
| 185 | rc = sched_group_set_rt_runtime(up->tg, rt_runtime); |
| 186 | |
| 187 | return (rc ? rc : size); |
| 188 | } |
| 189 | |
| 190 | static struct kobj_attribute cpu_rt_runtime_attr = |
| 191 | __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); |
| 192 | |
| 193 | static ssize_t cpu_rt_period_show(struct kobject *kobj, |
| 194 | struct kobj_attribute *attr, |
| 195 | char *buf) |
| 196 | { |
| 197 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); |
| 198 | |
| 199 | return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg)); |
| 200 | } |
| 201 | |
| 202 | static ssize_t cpu_rt_period_store(struct kobject *kobj, |
| 203 | struct kobj_attribute *attr, |
| 204 | const char *buf, size_t size) |
| 205 | { |
| 206 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); |
| 207 | unsigned long rt_period; |
| 208 | int rc; |
| 209 | |
| 210 | sscanf(buf, "%lu", &rt_period); |
| 211 | |
| 212 | rc = sched_group_set_rt_period(up->tg, rt_period); |
| 213 | |
| 214 | return (rc ? rc : size); |
| 215 | } |
| 216 | |
| 217 | static struct kobj_attribute cpu_rt_period_attr = |
| 218 | __ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store); |
| 219 | #endif |
| 220 | |
| 221 | /* default attributes per uid directory */ |
| 222 | static struct attribute *uids_attributes[] = { |
| 223 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 224 | &cpu_share_attr.attr, |
| 225 | #endif |
| 226 | #ifdef CONFIG_RT_GROUP_SCHED |
| 227 | &cpu_rt_runtime_attr.attr, |
| 228 | &cpu_rt_period_attr.attr, |
| 229 | #endif |
| 230 | NULL |
| 231 | }; |
| 232 | |
| 233 | /* the lifetime of user_struct is not managed by the core (now) */ |
| 234 | static void uids_release(struct kobject *kobj) |
| 235 | { |
| 236 | return; |
| 237 | } |
| 238 | |
| 239 | static struct kobj_type uids_ktype = { |
| 240 | .sysfs_ops = &kobj_sysfs_ops, |
| 241 | .default_attrs = uids_attributes, |
| 242 | .release = uids_release, |
| 243 | }; |
| 244 | |
| 245 | /* |
| 246 | * Create /sys/kernel/uids/<uid>/cpu_share file for this user |
| 247 | * We do not create this file for users in a user namespace (until |
| 248 | * sysfs tagging is implemented). |
| 249 | * |
| 250 | * See Documentation/scheduler/sched-design-CFS.txt for ramifications. |
| 251 | */ |
| 252 | static int uids_user_create(struct user_struct *up) |
| 253 | { |
| 254 | struct kobject *kobj = &up->kobj; |
| 255 | int error; |
| 256 | |
| 257 | memset(kobj, 0, sizeof(struct kobject)); |
| 258 | if (up->user_ns != &init_user_ns) |
| 259 | return 0; |
| 260 | kobj->kset = uids_kset; |
| 261 | error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid); |
| 262 | if (error) { |
| 263 | kobject_put(kobj); |
| 264 | goto done; |
| 265 | } |
| 266 | |
| 267 | kobject_uevent(kobj, KOBJ_ADD); |
| 268 | done: |
| 269 | return error; |
| 270 | } |
| 271 | |
| 272 | /* create these entries in sysfs: |
| 273 | * "/sys/kernel/uids" directory |
| 274 | * "/sys/kernel/uids/0" directory (for root user) |
| 275 | * "/sys/kernel/uids/0/cpu_share" file (for root user) |
| 276 | */ |
| 277 | int __init uids_sysfs_init(void) |
| 278 | { |
| 279 | uids_kset = kset_create_and_add("uids", NULL, kernel_kobj); |
| 280 | if (!uids_kset) |
| 281 | return -ENOMEM; |
| 282 | |
| 283 | return uids_user_create(&root_user); |
| 284 | } |
| 285 | |
| 286 | /* work function to remove sysfs directory for a user and free up |
| 287 | * corresponding structures. |
| 288 | */ |
| 289 | static void cleanup_user_struct(struct work_struct *w) |
| 290 | { |
| 291 | struct user_struct *up = container_of(w, struct user_struct, work); |
| 292 | unsigned long flags; |
| 293 | int remove_user = 0; |
| 294 | |
| 295 | /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() |
| 296 | * atomic. |
| 297 | */ |
| 298 | uids_mutex_lock(); |
| 299 | |
| 300 | local_irq_save(flags); |
| 301 | |
| 302 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { |
| 303 | uid_hash_remove(up); |
| 304 | remove_user = 1; |
| 305 | spin_unlock_irqrestore(&uidhash_lock, flags); |
| 306 | } else { |
| 307 | local_irq_restore(flags); |
| 308 | } |
| 309 | |
| 310 | if (!remove_user) |
| 311 | goto done; |
| 312 | |
| 313 | if (up->user_ns == &init_user_ns) { |
| 314 | kobject_uevent(&up->kobj, KOBJ_REMOVE); |
| 315 | kobject_del(&up->kobj); |
| 316 | kobject_put(&up->kobj); |
| 317 | } |
| 318 | |
| 319 | sched_destroy_user(up); |
| 320 | key_put(up->uid_keyring); |
| 321 | key_put(up->session_keyring); |
| 322 | kmem_cache_free(uid_cachep, up); |
| 323 | |
| 324 | done: |
| 325 | uids_mutex_unlock(); |
| 326 | } |
| 327 | |
| 328 | /* IRQs are disabled and uidhash_lock is held upon function entry. |
| 329 | * IRQ state (as stored in flags) is restored and uidhash_lock released |
| 330 | * upon function exit. |
| 331 | */ |
| 332 | static void free_user(struct user_struct *up, unsigned long flags) |
| 333 | { |
| 334 | /* restore back the count */ |
| 335 | atomic_inc(&up->__count); |
| 336 | spin_unlock_irqrestore(&uidhash_lock, flags); |
| 337 | |
| 338 | INIT_WORK(&up->work, cleanup_user_struct); |
| 339 | schedule_work(&up->work); |
| 340 | } |
| 341 | |
| 342 | #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ |
| 343 | |
| 344 | int uids_sysfs_init(void) { return 0; } |
| 345 | static inline int uids_user_create(struct user_struct *up) { return 0; } |
| 346 | static inline void uids_mutex_lock(void) { } |
| 347 | static inline void uids_mutex_unlock(void) { } |
| 348 | |
| 349 | /* IRQs are disabled and uidhash_lock is held upon function entry. |
| 350 | * IRQ state (as stored in flags) is restored and uidhash_lock released |
| 351 | * upon function exit. |
| 352 | */ |
| 353 | static void free_user(struct user_struct *up, unsigned long flags) |
| 354 | { |
| 355 | uid_hash_remove(up); |
| 356 | spin_unlock_irqrestore(&uidhash_lock, flags); |
| 357 | sched_destroy_user(up); |
| 358 | key_put(up->uid_keyring); |
| 359 | key_put(up->session_keyring); |
| 360 | kmem_cache_free(uid_cachep, up); |
| 361 | } |
| 362 | |
| 363 | #endif |
| 364 | |
| 365 | #if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED) |
| 366 | /* |
| 367 | * We need to check if a setuid can take place. This function should be called |
| 368 | * before successfully completing the setuid. |
| 369 | */ |
| 370 | int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) |
| 371 | { |
| 372 | |
| 373 | return sched_rt_can_attach(up->tg, tsk); |
| 374 | |
| 375 | } |
| 376 | #else |
| 377 | int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) |
| 378 | { |
| 379 | return 1; |
| 380 | } |
| 381 | #endif |
| 382 | |
| 383 | /* |
| 384 | * Locate the user_struct for the passed UID. If found, take a ref on it. The |
| 385 | * caller must undo that ref with free_uid(). |
| 386 | * |
| 387 | * If the user_struct could not be found, return NULL. |
| 388 | */ |
| 389 | struct user_struct *find_user(uid_t uid) |
| 390 | { |
| 391 | struct user_struct *ret; |
| 392 | unsigned long flags; |
| 393 | struct user_namespace *ns = current_user_ns(); |
| 394 | |
| 395 | spin_lock_irqsave(&uidhash_lock, flags); |
| 396 | ret = uid_hash_find(uid, uidhashentry(ns, uid)); |
| 397 | spin_unlock_irqrestore(&uidhash_lock, flags); |
| 398 | return ret; |
| 399 | } |
| 400 | |
| 401 | void free_uid(struct user_struct *up) |
| 402 | { |
| 403 | unsigned long flags; |
| 404 | |
| 405 | if (!up) |
| 406 | return; |
| 407 | |
| 408 | local_irq_save(flags); |
| 409 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) |
| 410 | free_user(up, flags); |
| 411 | else |
| 412 | local_irq_restore(flags); |
| 413 | } |
| 414 | |
| 415 | struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) |
| 416 | { |
| 417 | struct hlist_head *hashent = uidhashentry(ns, uid); |
| 418 | struct user_struct *up, *new; |
| 419 | |
| 420 | /* Make uid_hash_find() + uids_user_create() + uid_hash_insert() |
| 421 | * atomic. |
| 422 | */ |
| 423 | uids_mutex_lock(); |
| 424 | |
| 425 | spin_lock_irq(&uidhash_lock); |
| 426 | up = uid_hash_find(uid, hashent); |
| 427 | spin_unlock_irq(&uidhash_lock); |
| 428 | |
| 429 | if (!up) { |
| 430 | new = kmem_cache_zalloc(uid_cachep, GFP_KERNEL); |
| 431 | if (!new) |
| 432 | goto out_unlock; |
| 433 | |
| 434 | new->uid = uid; |
| 435 | atomic_set(&new->__count, 1); |
| 436 | |
| 437 | if (sched_create_user(new) < 0) |
| 438 | goto out_free_user; |
| 439 | |
| 440 | new->user_ns = get_user_ns(ns); |
| 441 | |
| 442 | if (uids_user_create(new)) |
| 443 | goto out_destoy_sched; |
| 444 | |
| 445 | /* |
| 446 | * Before adding this, check whether we raced |
| 447 | * on adding the same user already.. |
| 448 | */ |
| 449 | spin_lock_irq(&uidhash_lock); |
| 450 | up = uid_hash_find(uid, hashent); |
| 451 | if (up) { |
| 452 | /* This case is not possible when CONFIG_USER_SCHED |
| 453 | * is defined, since we serialize alloc_uid() using |
| 454 | * uids_mutex. Hence no need to call |
| 455 | * sched_destroy_user() or remove_user_sysfs_dir(). |
| 456 | */ |
| 457 | key_put(new->uid_keyring); |
| 458 | key_put(new->session_keyring); |
| 459 | kmem_cache_free(uid_cachep, new); |
| 460 | } else { |
| 461 | uid_hash_insert(new, hashent); |
| 462 | up = new; |
| 463 | } |
| 464 | spin_unlock_irq(&uidhash_lock); |
| 465 | } |
| 466 | |
| 467 | uids_mutex_unlock(); |
| 468 | |
| 469 | return up; |
| 470 | |
| 471 | out_destoy_sched: |
| 472 | sched_destroy_user(new); |
| 473 | put_user_ns(new->user_ns); |
| 474 | out_free_user: |
| 475 | kmem_cache_free(uid_cachep, new); |
| 476 | out_unlock: |
| 477 | uids_mutex_unlock(); |
| 478 | return NULL; |
| 479 | } |
| 480 | |
| 481 | static int __init uid_cache_init(void) |
| 482 | { |
| 483 | int n; |
| 484 | |
| 485 | uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), |
| 486 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 487 | |
| 488 | for(n = 0; n < UIDHASH_SZ; ++n) |
| 489 | INIT_HLIST_HEAD(init_user_ns.uidhash_table + n); |
| 490 | |
| 491 | /* Insert the root user immediately (init already runs as root) */ |
| 492 | spin_lock_irq(&uidhash_lock); |
| 493 | uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0)); |
| 494 | spin_unlock_irq(&uidhash_lock); |
| 495 | |
| 496 | return 0; |
| 497 | } |
| 498 | |
| 499 | module_init(uid_cache_init); |