See Documentation/prctl/seccomp_filter.txt for details.
+config HAVE_GCC_PLUGINS
+ bool
+ help
+ An arch should select this symbol if it supports building with
+ GCC plugins.
+
+menuconfig GCC_PLUGINS
+ bool "GCC plugins"
+ depends on HAVE_GCC_PLUGINS
+ depends on !COMPILE_TEST
+ help
+ GCC plugins are loadable modules that provide extra features to the
+ compiler. They are useful for runtime instrumentation and static analysis.
+
+ See Documentation/gcc-plugins.txt for details.
+
+config GCC_PLUGIN_CYC_COMPLEXITY
+ bool "Compute the cyclomatic complexity of a function"
+ depends on GCC_PLUGINS
+ help
+ The complexity M of a function's control flow graph is defined as:
+ M = E - N + 2P
+ where
+
+ E = the number of edges
+ N = the number of nodes
+ P = the number of connected components (exit nodes).
+
+config GCC_PLUGIN_SANCOV
+ bool
+ depends on GCC_PLUGINS
+ help
+ This plugin inserts a __sanitizer_cov_trace_pc() call at the start of
+ basic blocks. It supports all gcc versions with plugin support (from
+ gcc-4.5 on). It is based on the commit "Add fuzzing coverage support"
+ by Dmitry Vyukov <dvyukov@google.com>.
+
config HAVE_CC_STACKPROTECTOR
bool
help
endchoice
+ config HAVE_ARCH_WITHIN_STACK_FRAMES
+ bool
+ help
+ An architecture should select this if it can walk the kernel stack
+ frames to determine if an object is part of either the arguments
+ or local variables (i.e. that it excludes saved return addresses,
+ and similar) by implementing an inline arch_within_stack_frames(),
+ which is used by CONFIG_HARDENED_USERCOPY.
+
config HAVE_CONTEXT_TRACKING
bool
help
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
+ select HAVE_GCC_PLUGINS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7))
select HAVE_IDE if PCI || ISA || PCMCIA
config ARCH_MULTIPLATFORM
bool "Allow multiple platforms to be selected"
depends on MMU
- select ARCH_WANT_OPTIONAL_GPIOLIB
select ARM_HAS_SG_CHAIN
select ARM_PATCH_PHYS_VIRT
select AUTO_ZRELADDR
config ARM_SINGLE_ARMV7M
bool "ARMv7-M based platforms (Cortex-M0/M3/M4)"
depends on !MMU
- select ARCH_WANT_OPTIONAL_GPIOLIB
select ARM_NVIC
select AUTO_ZRELADDR
select CLKSRC_OF
select SPARSE_IRQ
select USE_OF
-
-config ARCH_CLPS711X
- bool "Cirrus Logic CLPS711x/EP721x/EP731x-based"
- select ARCH_REQUIRE_GPIOLIB
- select AUTO_ZRELADDR
- select CLKSRC_MMIO
- select COMMON_CLK
- select CPU_ARM720T
- select GENERIC_CLOCKEVENTS
- select MFD_SYSCON
- select SOC_BUS
- help
- Support for Cirrus Logic 711x/721x/731x based boards.
-
config ARCH_GEMINI
bool "Cortina Systems Gemini"
- select ARCH_REQUIRE_GPIOLIB
select CLKSRC_MMIO
select CPU_FA526
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
help
Support for the Cortina Systems Gemini family SoCs
config ARCH_EP93XX
bool "EP93xx-based"
select ARCH_HAS_HOLES_MEMORYMODEL
- select ARCH_REQUIRE_GPIOLIB
select ARM_AMBA
select ARM_PATCH_PHYS_VIRT
select ARM_VIC
select CLKSRC_MMIO
select CPU_ARM920T
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
help
This enables support for the Cirrus EP93xx series of CPUs.
config ARCH_IOP32X
bool "IOP32x-based"
depends on MMU
- select ARCH_REQUIRE_GPIOLIB
select CPU_XSCALE
select GPIO_IOP
+ select GPIOLIB
select NEED_RET_TO_USER
select PCI
select PLAT_IOP
config ARCH_IOP33X
bool "IOP33x-based"
depends on MMU
- select ARCH_REQUIRE_GPIOLIB
select CPU_XSCALE
select GPIO_IOP
+ select GPIOLIB
select NEED_RET_TO_USER
select PCI
select PLAT_IOP
bool "IXP4xx-based"
depends on MMU
select ARCH_HAS_DMA_SET_COHERENT_MASK
- select ARCH_REQUIRE_GPIOLIB
select ARCH_SUPPORTS_BIG_ENDIAN
select CLKSRC_MMIO
select CPU_XSCALE
select DMABOUNCE if PCI
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
select MIGHT_HAVE_PCI
select NEED_MACH_IO_H
select USB_EHCI_BIG_ENDIAN_DESC
config ARCH_DOVE
bool "Marvell Dove"
- select ARCH_REQUIRE_GPIOLIB
select CPU_PJ4
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
select MIGHT_HAVE_PCI
select MULTI_IRQ_HANDLER
select MVEBU_MBUS
config ARCH_KS8695
bool "Micrel/Kendin KS8695"
- select ARCH_REQUIRE_GPIOLIB
select CLKSRC_MMIO
select CPU_ARM922T
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
select NEED_MACH_MEMORY_H
help
Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based
config ARCH_W90X900
bool "Nuvoton W90X900 CPU"
- select ARCH_REQUIRE_GPIOLIB
select CLKDEV_LOOKUP
select CLKSRC_MMIO
select CPU_ARM926T
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
help
Support for Nuvoton (Winbond logic dept.) ARM9 processor,
At present, the w90x900 has been renamed nuc900, regarding
config ARCH_LPC32XX
bool "NXP LPC32XX"
- select ARCH_REQUIRE_GPIOLIB
select ARM_AMBA
select CLKDEV_LOOKUP
select CLKSRC_LPC32XX
select COMMON_CLK
select CPU_ARM926T
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
select MULTI_IRQ_HANDLER
select SPARSE_IRQ
select USE_OF
bool "PXA2xx/PXA3xx-based"
depends on MMU
select ARCH_MTD_XIP
- select ARCH_REQUIRE_GPIOLIB
select ARM_CPU_SUSPEND if PM
select AUTO_ZRELADDR
select COMMON_CLK
select CPU_XSCALE if !CPU_XSC3
select GENERIC_CLOCKEVENTS
select GPIO_PXA
+ select GPIOLIB
select HAVE_IDE
select IRQ_DOMAIN
select MULTI_IRQ_HANDLER
config ARCH_SA1100
bool "SA1100-based"
select ARCH_MTD_XIP
- select ARCH_REQUIRE_GPIOLIB
select ARCH_SPARSEMEM_ENABLE
select CLKDEV_LOOKUP
select CLKSRC_MMIO
select CPU_FREQ
select CPU_SA1100
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
select HAVE_IDE
select IRQ_DOMAIN
select ISA
config ARCH_S3C24XX
bool "Samsung S3C24XX SoCs"
- select ARCH_REQUIRE_GPIOLIB
select ATAGS
select CLKDEV_LOOKUP
select CLKSRC_SAMSUNG_PWM
select GENERIC_CLOCKEVENTS
select GPIO_SAMSUNG
+ select GPIOLIB
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C2410_WATCHDOG if WATCHDOG
select HAVE_S3C_RTC if RTC_CLASS
config ARCH_DAVINCI
bool "TI DaVinci"
select ARCH_HAS_HOLES_MEMORYMODEL
- select ARCH_REQUIRE_GPIOLIB
select CLKDEV_LOOKUP
select CPU_ARM926T
select GENERIC_ALLOCATOR
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_CHIP
+ select GPIOLIB
select HAVE_IDE
select USE_OF
select ZONE_DMA
depends on MMU
select ARCH_HAS_HOLES_MEMORYMODEL
select ARCH_OMAP
- select ARCH_REQUIRE_GPIOLIB
select CLKDEV_LOOKUP
select CLKSRC_MMIO
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_CHIP
+ select GPIOLIB
select HAVE_IDE
select IRQ_DOMAIN
select MULTI_IRQ_HANDLER
depends on ARCH_MULTI_V7
select ARM_AMBA
select ARM_GIC
- select ARM_GIC_V2M if PCI_MSI
+ select ARM_GIC_V2M if PCI
select ARM_GIC_V3
select ARM_PSCI
select HAVE_ARM_ARCH_TIMER
config ARCH_EFM32
bool "Energy Micro efm32"
depends on ARM_SINGLE_ARMV7M
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
help
Support for Energy Micro's (now Silicon Labs) efm32 Giant Gecko
processors.
default y
config ARCH_MPS2
- bool "ARM MPS2 paltform"
+ bool "ARM MPS2 platform"
depends on ARM_SINGLE_ARMV7M
select ARM_AMBA
select CLKSRC_MPS2
loop buffer may deliver incorrect instructions. This
workaround disables the loop buffer to avoid the erratum.
+config ARM_ERRATA_818325_852422
+ bool "ARM errata: A12: some seqs of opposed cond code instrs => deadlock or corruption"
+ depends on CPU_V7
+ help
+ This option enables the workaround for:
+ - Cortex-A12 818325: Execution of an UNPREDICTABLE STR or STM
+ instruction might deadlock. Fixed in r0p1.
+ - Cortex-A12 852422: Execution of a sequence of instructions might
+ lead to either a data corruption or a CPU deadlock. Not fixed in
+ any Cortex-A12 cores yet.
+ This workaround for all both errata involves setting bit[12] of the
+ Feature Register. This bit disables an optimisation applied to a
+ sequence of 2 instructions that use opposing condition codes.
+
+config ARM_ERRATA_821420
+ bool "ARM errata: A12: sequence of VMOV to core registers might lead to a dead lock"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 821420 Cortex-A12
+ (all revs) erratum. In very rare timing conditions, a sequence
+ of VMOV to Core registers instructions, for which the second
+ one is in the shadow of a branch or abort, can lead to a
+ deadlock when the VMOV instructions are issued out-of-order.
+
+config ARM_ERRATA_825619
+ bool "ARM errata: A12: DMB NSHST/ISHST mixed ... might cause deadlock"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 825619 Cortex-A12
+ (all revs) erratum. Within rare timing constraints, executing a
+ DMB NSHST or DMB ISHST instruction followed by a mix of Cacheable
+ and Device/Strongly-Ordered loads and stores might cause deadlock
+
+config ARM_ERRATA_852421
+ bool "ARM errata: A17: DMB ST might fail to create order between stores"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 852421 Cortex-A17
+ (r1p0, r1p1, r1p2) erratum. Under very rare timing conditions,
+ execution of a DMB ST instruction might fail to properly order
+ stores from GroupA and stores from GroupB.
+
+config ARM_ERRATA_852423
+ bool "ARM errata: A17: some seqs of opposed cond code instrs => deadlock or corruption"
+ depends on CPU_V7
+ help
+ This option enables the workaround for:
+ - Cortex-A17 852423: Execution of a sequence of instructions might
+ lead to either a data corruption or a CPU deadlock. Not fixed in
+ any Cortex-A17 cores yet.
+ This is identical to Cortex-A12 erratum 852422. It is a separate
+ config option from the A12 erratum due to the way errata are checked
+ for and handled.
+
endmenu
source "arch/arm/common/Kconfig"
#define segment_eq(a, b) ((a) == (b))
-#define __addr_ok(addr) ({ \
- unsigned long flag; \
- __asm__("cmp %2, %0; movlo %0, #0" \
- : "=&r" (flag) \
- : "0" (current_thread_info()->addr_limit), "r" (addr) \
- : "cc"); \
- (flag == 0); })
-
/* We use 33-bit arithmetic here... */
#define __range_ok(addr, size) ({ \
unsigned long flag, roksum; \
extern int __put_user_4(void *, unsigned int);
extern int __put_user_8(void *, unsigned long long);
-#define __put_user_x(__r2, __p, __e, __l, __s) \
- __asm__ __volatile__ ( \
- __asmeq("%0", "r0") __asmeq("%2", "r2") \
- __asmeq("%3", "r1") \
- "bl __put_user_" #__s \
- : "=&r" (__e) \
- : "0" (__p), "r" (__r2), "r" (__l) \
- : "ip", "lr", "cc")
-
-#define __put_user_check(x, p) \
+#define __put_user_check(__pu_val, __ptr, __err, __s) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
- const typeof(*(p)) __user *__tmp_p = (p); \
- register const typeof(*(p)) __r2 asm("r2") = (x); \
- register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \
+ register typeof(__pu_val) __r2 asm("r2") = __pu_val; \
+ register const void __user *__p asm("r0") = __ptr; \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
- unsigned int __ua_flags = uaccess_save_and_enable(); \
- switch (sizeof(*(__p))) { \
- case 1: \
- __put_user_x(__r2, __p, __e, __l, 1); \
- break; \
- case 2: \
- __put_user_x(__r2, __p, __e, __l, 2); \
- break; \
- case 4: \
- __put_user_x(__r2, __p, __e, __l, 4); \
- break; \
- case 8: \
- __put_user_x(__r2, __p, __e, __l, 8); \
- break; \
- default: __e = __put_user_bad(); break; \
- } \
- uaccess_restore(__ua_flags); \
- __e; \
+ __asm__ __volatile__ ( \
+ __asmeq("%0", "r0") __asmeq("%2", "r2") \
+ __asmeq("%3", "r1") \
+ "bl __put_user_" #__s \
+ : "=&r" (__e) \
+ : "0" (__p), "r" (__r2), "r" (__l) \
+ : "ip", "lr", "cc"); \
+ __err = __e; \
})
-#define put_user(x, p) \
- ({ \
- might_fault(); \
- __put_user_check(x, p); \
- })
-
#else /* CONFIG_MMU */
/*
}
#define get_user(x, p) __get_user(x, p)
-#define put_user(x, p) __put_user(x, p)
+#define __put_user_check __put_user_nocheck
#endif /* CONFIG_MMU */
#define __get_user_asm_word(x, addr, err) \
__get_user_asm(x, addr, err, ldr)
+
+#define __put_user_switch(x, ptr, __err, __fn) \
+ do { \
+ const __typeof__(*(ptr)) __user *__pu_ptr = (ptr); \
+ __typeof__(*(ptr)) __pu_val = (x); \
+ unsigned int __ua_flags; \
+ might_fault(); \
+ __ua_flags = uaccess_save_and_enable(); \
+ switch (sizeof(*(ptr))) { \
+ case 1: __fn(__pu_val, __pu_ptr, __err, 1); break; \
+ case 2: __fn(__pu_val, __pu_ptr, __err, 2); break; \
+ case 4: __fn(__pu_val, __pu_ptr, __err, 4); break; \
+ case 8: __fn(__pu_val, __pu_ptr, __err, 8); break; \
+ default: __err = __put_user_bad(); break; \
+ } \
+ uaccess_restore(__ua_flags); \
+ } while (0)
+
+#define put_user(x, ptr) \
+({ \
+ int __pu_err = 0; \
+ __put_user_switch((x), (ptr), __pu_err, __put_user_check); \
+ __pu_err; \
+})
+
#define __put_user(x, ptr) \
({ \
long __pu_err = 0; \
- __put_user_err((x), (ptr), __pu_err); \
+ __put_user_switch((x), (ptr), __pu_err, __put_user_nocheck); \
__pu_err; \
})
#define __put_user_error(x, ptr, err) \
({ \
- __put_user_err((x), (ptr), err); \
+ __put_user_switch((x), (ptr), (err), __put_user_nocheck); \
(void) 0; \
})
-#define __put_user_err(x, ptr, err) \
-do { \
- unsigned long __pu_addr = (unsigned long)(ptr); \
- unsigned int __ua_flags; \
- __typeof__(*(ptr)) __pu_val = (x); \
- __chk_user_ptr(ptr); \
- might_fault(); \
- __ua_flags = uaccess_save_and_enable(); \
- switch (sizeof(*(ptr))) { \
- case 1: __put_user_asm_byte(__pu_val, __pu_addr, err); break; \
- case 2: __put_user_asm_half(__pu_val, __pu_addr, err); break; \
- case 4: __put_user_asm_word(__pu_val, __pu_addr, err); break; \
- case 8: __put_user_asm_dword(__pu_val, __pu_addr, err); break; \
- default: __put_user_bad(); \
- } \
- uaccess_restore(__ua_flags); \
-} while (0)
+#define __put_user_nocheck(x, __pu_ptr, __err, __size) \
+ do { \
+ unsigned long __pu_addr = (unsigned long)__pu_ptr; \
+ __put_user_nocheck_##__size(x, __pu_addr, __err); \
+ } while (0)
+
+#define __put_user_nocheck_1 __put_user_asm_byte
+#define __put_user_nocheck_2 __put_user_asm_half
+#define __put_user_nocheck_4 __put_user_asm_word
+#define __put_user_nocheck_8 __put_user_asm_dword
#define __put_user_asm(x, __pu_addr, err, instr) \
__asm__ __volatile__( \
static inline unsigned long __must_check
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
- unsigned int __ua_flags = uaccess_save_and_enable();
+ unsigned int __ua_flags;
+
+ check_object_size(to, n, false);
+ __ua_flags = uaccess_save_and_enable();
n = arm_copy_from_user(to, from, n);
uaccess_restore(__ua_flags);
return n;
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
#ifndef CONFIG_UACCESS_WITH_MEMCPY
- unsigned int __ua_flags = uaccess_save_and_enable();
+ unsigned int __ua_flags;
+
+ check_object_size(from, n, true);
+ __ua_flags = uaccess_save_and_enable();
n = arm_copy_to_user(to, from, n);
uaccess_restore(__ua_flags);
return n;
#else
+ check_object_size(from, n, true);
return arm_copy_to_user(to, from, n);
#endif
}
select ACPI_CCA_REQUIRED if ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+ select ACPI_MCFG if ACPI
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_KCOV
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_USE_CMPXCHG_LOCKREF
select ARM_ARCH_TIMER
select ARM_GIC
select AUDIT_ARCH_COMPAT_GENERIC
- select ARM_GIC_V2M if PCI_MSI
+ select ARM_GIC_V2M if PCI
select ARM_GIC_V3
- select ARM_GIC_V3_ITS if PCI_MSI
+ select ARM_GIC_V3_ITS if PCI
select ARM_PSCI_FW
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_GCC_PLUGINS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_KPROBES
+ select HAVE_KRETPROBES if HAVE_KPROBES
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select OF_EARLY_FLATTREE
select OF_NUMA if NUMA && OF
select OF_RESERVED_MEM
+ select PCI_ECAM if ACPI
select PERF_USE_VMALLOC
select POWER_RESET
select POWER_SUPPLY
If in doubt, say N here.
+config KEXEC
+ depends on PM_SLEEP_SMP
+ select KEXEC_CORE
+ bool "kexec system call"
+ ---help---
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is independent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
config XEN_DOM0
def_bool y
depends on XEN
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
- select ARM64_MODULE_PLTS
+ select ARM64_MODULE_PLTS if MODULES
select RELOCATABLE
help
Randomizes the virtual address at which the kernel image is
/*
* User space memory access functions
*/
+#include <linux/kasan-checks.h>
#include <linux/string.h>
#include <linux/thread_info.h>
static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
{
- return __arch_copy_from_user(to, from, n);
+ kasan_check_write(to, n);
+ check_object_size(to, n, false);
+ return __arch_copy_from_user(to, from, n);
}
static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
{
- return __arch_copy_to_user(to, from, n);
+ kasan_check_read(from, n);
+ check_object_size(from, n, true);
+ return __arch_copy_to_user(to, from, n);
}
static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
{
- if (access_ok(VERIFY_READ, from, n))
+ kasan_check_write(to, n);
+
+ if (access_ok(VERIFY_READ, from, n)) {
+ check_object_size(to, n, false);
n = __arch_copy_from_user(to, from, n);
- else /* security hole - plug it */
+ } else /* security hole - plug it */
memset(to, 0, n);
return n;
}
static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
{
- if (access_ok(VERIFY_WRITE, to, n))
+ kasan_check_read(from, n);
+
+ if (access_ok(VERIFY_WRITE, to, n)) {
+ check_object_size(from, n, true);
n = __arch_copy_to_user(to, from, n);
+ }
return n;
}
select GENERIC_PENDING_IRQ if SMP
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_LEGACY
- select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
select GENERIC_SMP_IDLE_THREAD
select MODULES_USE_ELF_RELA
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HARDENED_USERCOPY
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select SYSCTL_EXCEPTION_TRACE
- select ARCH_WANT_OPTIONAL_GPIOLIB
select VIRT_TO_BUS if !PPC64
select HAVE_IDE
select HAVE_IOREMAP_PROT
select IRQ_FORCED_THREADING
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_SYSCALL_TRACEPOINTS
- select HAVE_CBPF_JIT if CPU_BIG_ENDIAN
+ select HAVE_CBPF_JIT if !PPC64
+ select HAVE_EBPF_JIT if PPC64
select HAVE_ARCH_JUMP_LABEL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select GENERIC_CPU_AUTOPROBE
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select HAVE_ARCH_HARDENED_USERCOPY
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
interface is strongly in flux, so no good recommendation can be
made.
+config RELOCATABLE
+ bool "Build a relocatable kernel"
+ depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE))
+ select NONSTATIC_KERNEL
+ help
+ This builds a kernel image that is capable of running at the
+ location the kernel is loaded at. For ppc32, there is no any
+ alignment restrictions, and this feature is a superset of
+ DYNAMIC_MEMSTART and hence overrides it. For ppc64, we should use
+ 16k-aligned base address. The kernel is linked as a
+ position-independent executable (PIE) and contains dynamic relocations
+ which are processed early in the bootup process.
+
+ One use is for the kexec on panic case where the recovery kernel
+ must live at a different physical address than the primary
+ kernel.
+
+ Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
+ it has been loaded at and the compile time physical addresses
+ CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START
+ setting can still be useful to bootwrappers that need to know the
+ load address of the kernel (eg. u-boot/mkimage).
+
config CRASH_DUMP
bool "Build a kdump crash kernel"
depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
This option is overridden by CONFIG_RELOCATABLE
-config RELOCATABLE
- bool "Build a relocatable kernel"
- depends on ADVANCED_OPTIONS && FLATMEM && (44x || FSL_BOOKE)
- select NONSTATIC_KERNEL
- help
- This builds a kernel image that is capable of running at the
- location the kernel is loaded at, without any alignment restrictions.
- This feature is a superset of DYNAMIC_MEMSTART and hence overrides it.
-
- One use is for the kexec on panic case where the recovery kernel
- must live at a different physical address than the primary
- kernel.
-
- Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
- it has been loaded at and the compile time physical addresses
- CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START
- setting can still be useful to bootwrappers that need to know the
- load address of the kernel (eg. u-boot/mkimage).
-
-config RELOCATABLE_PPC32
- def_bool y
- depends on PPC32 && RELOCATABLE
-
config PAGE_OFFSET_BOOL
bool "Set custom page offset address"
depends on ADVANCED_OPTIONS
config PIN_TLB
bool "Pinned Kernel TLBs (860 ONLY)"
depends on ADVANCED_OPTIONS && 8xx
+
+config PIN_TLB_IMMR
+ bool "Pinned TLB for IMMR"
+ depends on PIN_TLB
+ default y
endmenu
if PPC64
-config RELOCATABLE
- bool "Build a relocatable kernel"
- depends on !COMPILE_TEST
- select NONSTATIC_KERNEL
- help
- This builds a kernel image that is capable of running anywhere
- in the RMA (real memory area) at any 16k-aligned base address.
- The kernel is linked as a position-independent executable (PIE)
- and contains dynamic relocations which are processed early
- in the bootup process.
-
- One use is for the kexec on panic case where the recovery kernel
- must live at a different physical address than the primary
- kernel.
-
# This value must have zeroes in the bottom 60 bits otherwise lots will break
config PAGE_OFFSET
hex
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_KCOV
select ARCH_HAS_SG_CHAIN
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_EARLY_PFN_TO_NID
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_JUMP_LABEL
select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
select NO_BOOTMEM
select OLD_SIGACTION
select OLD_SIGSUSPEND3
+ select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select TTY
select VIRT_CPU_ACCOUNTING
config SCHED_BOOK
def_bool n
+config SCHED_DRAWER
+ def_bool n
+
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
select SCHED_SMT
select SCHED_MC
select SCHED_BOOK
+ select SCHED_DRAWER
help
Topology scheduler support improves the CPU scheduler's decision
making when dealing with machines that have multi-threading,
This allows you to specify the maximum number of PCI functions which
this kernel will support.
-config PCI_NR_MSI
- int "Maximum number of MSI interrupts (64-32768)"
- range 64 32768
- default "256"
- help
- This defines the number of virtual interrupts the kernel will
- provide for MSI interrupts. If you configure your system to have
- too few drivers will fail to allocate MSI interrupts for all
- PCI devices.
-
source "drivers/pci/Kconfig"
endif # PCI
" jnm 5b\n"
" ex %4,0(%3)\n"
" j 8f\n"
- "7:slgr %0,%0\n"
+ "7: slgr %0,%0\n"
"8:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
" jnm 6b\n"
" ex %4,0(%3)\n"
" j 9f\n"
- "8:slgr %0,%0\n"
+ "8: slgr %0,%0\n"
"9: sacf 768\n"
EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
{
+ check_object_size(to, n, false);
if (static_branch_likely(&have_mvcos))
return copy_from_user_mvcos(to, from, n);
return copy_from_user_mvcp(to, from, n);
unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
{
+ check_object_size(from, n, true);
if (static_branch_likely(&have_mvcos))
return copy_to_user_mvcos(to, from, n);
return copy_to_user_mvcs(to, from, n);
"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
" slgr %0,%3\n"
" j 5f\n"
- "4:slgr %0,%0\n"
+ "4: slgr %0,%0\n"
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b)
: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
select ANON_INODES
select ARCH_CLOCKSOURCE_DATA
select ARCH_DISCARD_MEMBLOCK
+ select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_IPC_PARSE_VERSION if X86_32
- select ARCH_WANT_OPTIONAL_GPIOLIB
select BUILDTIME_EXTABLE_SORT
select CLKEVT_I8253
select CLKSRC_I8253 if X86_32
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_AOUT if X86_32
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
select HAVE_ARCH_SOFT_DIRTY if X86_64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_EBPF_JIT if X86_64
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
+ select HAVE_GCC_PLUGINS
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_HW_BREAKPOINT
select HAVE_IDE
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select PERF_EVENTS
select RTC_LIB
+ select RTC_MC146818_LIB
select SPARSE_IRQ
select SRCU
select SYSCTL_EXCEPTION_TRACE
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
-config ARCH_HWEIGHT_CFLAGS
- string
- default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
- default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-
config ARCH_SUPPORTS_UPROBES
def_bool y
select X86_DMA_REMAP
select SWIOTLB
select MFD_STA2X11
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
default n
---help---
This adds support for boards based on the STA2X11 IO-Hub,
attempts relying on knowledge of the location of kernel
code internals.
- The kernel physical and virtual address can be randomized
- from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that
- using RANDOMIZE_BASE reduces the memory space available to
- kernel modules from 1.5GB to 1GB.)
+ On 64-bit, the kernel physical and virtual addresses are
+ randomized separately. The physical address will be anywhere
+ between 16MB and the top of physical memory (up to 64TB). The
+ virtual address will be randomized from 16MB up to 1GB (9 bits
+ of entropy). Note that this also reduces the memory space
+ available to kernel modules from 1.5GB to 1GB.
+
+ On 32-bit, the kernel physical and virtual addresses are
+ randomized together. They will be randomized from 16MB up to
+ 512MB (8 bits of entropy).
Entropy is generated using the RDRAND instruction if it is
supported. If RDTSC is supported, its value is mixed into
the entropy pool as well. If neither RDRAND nor RDTSC are
- supported, then entropy is read from the i8254 timer.
-
- Since the kernel is built using 2GB addressing, and
- PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of
- entropy is theoretically possible. Currently, with the
- default value for PHYSICAL_ALIGN and due to page table
- layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits.
+ supported, then entropy is read from the i8254 timer. The
+ usable entropy is limited by the kernel being built using
+ 2GB addressing, and that PHYSICAL_ALIGN must be at a
+ minimum of 2MB. As a result, only 10 bits of entropy are
+ theoretically possible, but the implementations are further
+ limited due to memory layouts.
If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
time. To enable it, boot with "kaslr" on the kernel command
Don't change this unless you know what you are doing.
+config RANDOMIZE_MEMORY
+ bool "Randomize the kernel memory sections"
+ depends on X86_64
+ depends on RANDOMIZE_BASE
+ default RANDOMIZE_BASE
+ ---help---
+ Randomizes the base virtual address of kernel memory sections
+ (physical memory mapping, vmalloc & vmemmap). This security feature
+ makes exploits relying on predictable memory locations less reliable.
+
+ The order of allocations remains unchanged. Entropy is generated in
+ the same way as RANDOMIZE_BASE. Current implementation in the optimal
+ configuration have in average 30,000 different possible virtual
+ addresses for each memory section.
+
+ If unsure, say N.
+
+config RANDOMIZE_MEMORY_PHYSICAL_PADDING
+ hex "Physical memory mapping padding" if EXPERT
+ depends on RANDOMIZE_MEMORY
+ default "0xa" if MEMORY_HOTPLUG
+ default "0x0"
+ range 0x1 0x40 if MEMORY_HOTPLUG
+ range 0x0 0x40
+ ---help---
+ Define the padding in terabytes added to the existing physical
+ memory size during kernel memory randomization. It is useful
+ for memory hotplug support but reduces the entropy available for
+ address randomization.
+
+ If unsure, leave at the default value.
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
__u32 flags; /* low level flags */
__u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */
- mm_segment_t addr_limit;
- unsigned int sig_on_uaccess_error:1;
- unsigned int uaccess_err:1; /* uaccess failed */
};
#define INIT_THREAD_INFO(tsk) \
.task = &tsk, \
.flags = 0, \
.cpu = 0, \
- .addr_limit = KERNEL_DS, \
}
#define init_thread_info (init_thread_union.thread_info)
return sp;
}
+ /*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ * 1 if within a frame
+ * -1 if placed across a frame boundary (or outside stack)
+ * 0 unable to determine (no frame pointers, etc)
+ */
+ static inline int arch_within_stack_frames(const void * const stack,
+ const void * const stackend,
+ const void *obj, unsigned long len)
+ {
+ #if defined(CONFIG_FRAME_POINTER)
+ const void *frame = NULL;
+ const void *oldframe;
+
+ oldframe = __builtin_frame_address(1);
+ if (oldframe)
+ frame = __builtin_frame_address(2);
+ /*
+ * low ----------------------------------------------> high
+ * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+ * ^----------------^
+ * allow copies only within here
+ */
+ while (stack <= frame && frame < stackend) {
+ /*
+ * If obj + len extends past the last frame, this
+ * check won't pass and the next frame will be 0,
+ * causing us to bail out and correctly report
+ * the copy as invalid.
+ */
+ if (obj + len <= frame)
+ return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+ oldframe = frame;
+ frame = *(const void * const *)frame;
+ }
+ return -1;
+ #else
+ return 0;
+ #endif
+ }
+
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
#endif
-/* Load thread_info address into "reg" */
-#define GET_THREAD_INFO(reg) \
- _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
- _ASM_SUB $(THREAD_SIZE),reg ;
-
/*
* ASM operand which evaluates to a 'thread_info' address of
* the current task, if it is known that "reg" is exactly "off"
* have to worry about atomic accesses.
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
+#ifdef CONFIG_COMPAT
+#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
+#endif
#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK 1
-static inline void set_restore_sigmask(void)
-{
- struct thread_info *ti = current_thread_info();
- ti->status |= TS_RESTORE_SIGMASK;
- WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
- return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
- struct thread_info *ti = current_thread_info();
- if (!(ti->status & TS_RESTORE_SIGMASK))
- return false;
- ti->status &= ~TS_RESTORE_SIGMASK;
- return true;
-}
static inline bool in_ia32_syscall(void)
{
#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
#define get_ds() (KERNEL_DS)
-#define get_fs() (current_thread_info()->addr_limit)
-#define set_fs(x) (current_thread_info()->addr_limit = (x))
+#define get_fs() (current->thread.addr_limit)
+#define set_fs(x) (current->thread.addr_limit = (x))
#define segment_eq(a, b) ((a).seg == (b).seg)
-#define user_addr_max() (current_thread_info()->addr_limit.seg)
+#define user_addr_max() (current->thread.addr_limit.seg)
#define __addr_ok(addr) \
((unsigned long __force)(addr) < user_addr_max())
} while (0)
#ifdef CONFIG_X86_32
-#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad()
+#define __get_user_asm_u64(x, ptr, retval, errret) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ asm volatile(ASM_STAC "\n" \
+ "1: movl %2,%%eax\n" \
+ "2: movl %3,%%edx\n" \
+ "3: " ASM_CLAC "\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: mov %4,%0\n" \
+ " xorl %%eax,%%eax\n" \
+ " xorl %%edx,%%edx\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 4b) \
+ _ASM_EXTABLE(2b, 4b) \
+ : "=r" (retval), "=A"(x) \
+ : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \
+ "i" (errret), "0" (retval)); \
+})
+
#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
#else
#define __get_user_asm_u64(x, ptr, retval, errret) \
#define __get_user_nocheck(x, ptr, size) \
({ \
int __gu_err; \
- unsigned long __gu_val; \
+ __inttype(*(ptr)) __gu_val; \
__uaccess_begin(); \
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
__uaccess_end(); \
* uaccess_try and catch
*/
#define uaccess_try do { \
- current_thread_info()->uaccess_err = 0; \
+ current->thread.uaccess_err = 0; \
__uaccess_begin(); \
barrier();
#define uaccess_catch(err) \
__uaccess_end(); \
- (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
+ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
} while (0)
/**
* case, and do only runtime checking for non-constant sizes.
*/
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(to, n, false);
n = _copy_from_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_from_user_overflow();
else
__copy_from_user_overflow(sz, n);
might_fault();
/* See the comment in copy_from_user() above. */
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(from, n, true);
n = _copy_to_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_to_user_overflow();
else
__copy_to_user_overflow(sz, n);
#define user_access_begin() __uaccess_begin()
#define user_access_end() __uaccess_end()
-#define unsafe_put_user(x, ptr) \
-({ \
+#define unsafe_put_user(x, ptr, err_label) \
+do { \
int __pu_err; \
__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \
- __builtin_expect(__pu_err, 0); \
-})
+ if (unlikely(__pu_err)) goto err_label; \
+} while (0)
-#define unsafe_get_user(x, ptr) \
-({ \
+#define unsafe_get_user(x, ptr, err_label) \
+do { \
int __gu_err; \
unsigned long __gu_val; \
__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- __builtin_expect(__gu_err, 0); \
-})
+ if (unlikely(__gu_err)) goto err_label; \
+} while (0)
#endif /* _ASM_X86_UACCESS_H */
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
+ # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
#else
# define is_migrate_cma(migratetype) false
+ # define is_migrate_cma_page(_page) false
#endif
#define for_each_migratetype_order(order, type) \
struct pglist_data;
/*
- * zone->lock and zone->lru_lock are two of the hottest locks in the kernel.
+ * zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
* So add a wild amount of padding here to ensure that they fall into separate
* cachelines. There are very few zone structures in the machine, so space
* consumption is not a concern here.
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
- NR_ALLOC_BATCH,
- NR_LRU_BASE,
- NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
- NR_ACTIVE_ANON, /* " " " " " */
- NR_INACTIVE_FILE, /* " " " " " */
- NR_ACTIVE_FILE, /* " " " " " */
- NR_UNEVICTABLE, /* " " " " " */
+ NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
+ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
+ NR_ZONE_ACTIVE_ANON,
+ NR_ZONE_INACTIVE_FILE,
+ NR_ZONE_ACTIVE_FILE,
+ NR_ZONE_UNEVICTABLE,
+ NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
- NR_ANON_PAGES, /* Mapped anonymous pages */
- NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
- only modified from process context */
- NR_FILE_PAGES,
- NR_FILE_DIRTY,
- NR_WRITEBACK,
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
- NR_KERNEL_STACK,
+ NR_KERNEL_STACK_KB, /* measured in KiB */
/* Second 128 byte cacheline */
- NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_BOUNCE,
- NR_VMSCAN_WRITE,
- NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
- NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
- NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
- NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
- NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
- NR_DIRTIED, /* page dirtyings since bootup */
- NR_WRITTEN, /* page writings since bootup */
- NR_PAGES_SCANNED, /* pages scanned since last reclaim */
+#if IS_ENABLED(CONFIG_ZSMALLOC)
+ NR_ZSPAGES, /* allocated in zsmalloc */
+#endif
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
NUMA_MISS, /* allocated in non intended node */
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
#endif
+ NR_FREE_CMA_PAGES,
+ NR_VM_ZONE_STAT_ITEMS };
+
+enum node_stat_item {
+ NR_LRU_BASE,
+ NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+ NR_ACTIVE_ANON, /* " " " " " */
+ NR_INACTIVE_FILE, /* " " " " " */
+ NR_ACTIVE_FILE, /* " " " " " */
+ NR_UNEVICTABLE, /* " " " " " */
+ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
+ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
+ NR_PAGES_SCANNED, /* pages scanned since last reclaim */
WORKINGSET_REFAULT,
WORKINGSET_ACTIVATE,
WORKINGSET_NODERECLAIM,
- NR_ANON_TRANSPARENT_HUGEPAGES,
- NR_FREE_CMA_PAGES,
- NR_VM_ZONE_STAT_ITEMS };
+ NR_ANON_MAPPED, /* Mapped anonymous pages */
+ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
+ only modified from process context */
+ NR_FILE_PAGES,
+ NR_FILE_DIRTY,
+ NR_WRITEBACK,
+ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
+ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
+ NR_SHMEM_THPS,
+ NR_SHMEM_PMDMAPPED,
+ NR_ANON_THPS,
+ NR_UNSTABLE_NFS, /* NFS unstable pages */
+ NR_VMSCAN_WRITE,
+ NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
+ NR_DIRTIED, /* page dirtyings since bootup */
+ NR_WRITTEN, /* page writings since bootup */
+ NR_VM_NODE_STAT_ITEMS
+};
/*
* We do arithmetic on the LRU lists in various places in the code,
/* Evictions & activations on the inactive file list */
atomic_long_t inactive_age;
#ifdef CONFIG_MEMCG
- struct zone *zone;
+ struct pglist_data *pgdat;
#endif
};
#endif
};
+struct per_cpu_nodestat {
+ s8 stat_threshold;
+ s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
+};
+
#endif /* !__GENERATING_BOUNDS.H */
enum zone_type {
#ifdef CONFIG_NUMA
int node;
#endif
-
- /*
- * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
- * this zone's LRU. Maintained by the pageout code.
- */
- unsigned int inactive_ratio;
-
struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
- /*
- * This is a per-zone reserve of pages that are not available
- * to userspace allocations.
- */
- unsigned long totalreserve_pages;
-
#ifndef CONFIG_SPARSEMEM
/*
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
-#ifdef CONFIG_NUMA
- /*
- * zone reclaim becomes active if more unmapped pages exist.
- */
- unsigned long min_unmapped_pages;
- unsigned long min_slab_pages;
-#endif /* CONFIG_NUMA */
-
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
unsigned long wait_table_hash_nr_entries;
unsigned long wait_table_bits;
+ /* Write-intensive fields used from the page allocator */
ZONE_PADDING(_pad1_)
+
/* free areas of different sizes */
struct free_area free_area[MAX_ORDER];
/* zone flags, see below */
unsigned long flags;
- /* Write-intensive fields used from the page allocator */
+ /* Primarily protects free_area */
spinlock_t lock;
+ /* Write-intensive fields used by compaction and vmstats. */
ZONE_PADDING(_pad2_)
- /* Write-intensive fields used by page reclaim */
-
- /* Fields commonly accessed by the page reclaim scanner */
- spinlock_t lru_lock;
- struct lruvec lruvec;
-
/*
* When free pages are below this point, additional steps are taken
* when reading the number of free pages to avoid per-cpu counter
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;
-enum zone_flags {
- ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
- ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */
- ZONE_CONGESTED, /* zone has many dirty pages backed by
+enum pgdat_flags {
+ PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
* a congested BDI
*/
- ZONE_DIRTY, /* reclaim scanning has recently found
+ PGDAT_DIRTY, /* reclaim scanning has recently found
* many dirty file pages at the tail
* of the LRU.
*/
- ZONE_WRITEBACK, /* reclaim scanning has recently found
+ PGDAT_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback
*/
- ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
+ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
};
static inline unsigned long zone_end_pfn(const struct zone *zone)
wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by
mem_hotplug_begin/end() */
- int kswapd_max_order;
- enum zone_type classzone_idx;
+ int kswapd_order;
+ enum zone_type kswapd_classzone_idx;
+
#ifdef CONFIG_COMPACTION
int kcompactd_max_order;
enum zone_type kcompactd_classzone_idx;
/* Number of pages migrated during the rate limiting time interval */
unsigned long numabalancing_migrate_nr_pages;
#endif
+ /*
+ * This is a per-node reserve of pages that are not available
+ * to userspace allocations.
+ */
+ unsigned long totalreserve_pages;
+
+#ifdef CONFIG_NUMA
+ /*
+ * zone reclaim becomes active if more unmapped pages exist.
+ */
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
+#endif /* CONFIG_NUMA */
+
+ /* Write-intensive fields used by page reclaim */
+ ZONE_PADDING(_pad1_)
+ spinlock_t lru_lock;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
struct list_head split_queue;
unsigned long split_queue_len;
#endif
+
+ /* Fields commonly accessed by the page reclaim scanner */
+ struct lruvec lruvec;
+
+ /*
+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+ * this node's LRU. Maintained by the pageout code.
+ */
+ unsigned int inactive_ratio;
+
+ unsigned long flags;
+
+ ZONE_PADDING(_pad2_)
+
+ /* Per-node vmstats */
+ struct per_cpu_nodestat __percpu *per_cpu_nodestats;
+ atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
+static inline spinlock_t *zone_lru_lock(struct zone *zone)
+{
+ return &zone->zone_pgdat->lru_lock;
+}
+
+static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
+{
+ return &pgdat->lruvec;
+}
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
extern void lruvec_init(struct lruvec *lruvec);
-static inline struct zone *lruvec_zone(struct lruvec *lruvec)
+static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
{
#ifdef CONFIG_MEMCG
- return lruvec->zone;
+ return lruvec->pgdat;
#else
- return container_of(lruvec, struct zone, lruvec);
+ return container_of(lruvec, struct pglist_data, lruvec);
#endif
}
void kzfree(const void *);
size_t ksize(const void *);
+ #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
+ const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page);
+ #else
+ static inline const char *__check_heap_object(const void *ptr,
+ unsigned long n,
+ struct page *page)
+ {
+ return NULL;
+ }
+ #endif
+
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
{
if (size != 0 && n > SIZE_MAX / size)
return NULL;
+ if (__builtin_constant_p(n) && __builtin_constant_p(size))
+ return kmalloc(n * size, flags);
return __kmalloc(n * size, flags);
}
#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
-#if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
-/*
- * An arch can define its own version of set_restore_sigmask() to get the
- * job done however works, with or without TIF_RESTORE_SIGMASK.
- */
-#define HAVE_SET_RESTORE_SIGMASK 1
-
-/**
- * set_restore_sigmask() - make sure saved_sigmask processing gets done
- *
- * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
- * will run before returning to user mode, to process the flag. For
- * all callers, TIF_SIGPENDING is already set or it's no harm to set
- * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the
- * arch code will notice on return to user mode, in case those bits
- * are scarce. We set TIF_SIGPENDING here to ensure that the arch
- * signal code always gets run when TIF_RESTORE_SIGMASK is set.
- */
-static inline void set_restore_sigmask(void)
-{
- set_thread_flag(TIF_RESTORE_SIGMASK);
- WARN_ON(!test_thread_flag(TIF_SIGPENDING));
-}
-static inline void clear_restore_sigmask(void)
-{
- clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_restore_sigmask(void)
-{
- return test_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
- return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-#endif /* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */
-
-#ifndef HAVE_SET_RESTORE_SIGMASK
-#error "no set_restore_sigmask() provided and default one won't work"
-#endif
-
+ #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+ static inline int arch_within_stack_frames(const void * const stack,
+ const void * const stackend,
+ const void *obj, unsigned long len)
+ {
+ return 0;
+ }
+ #endif
+
+ #ifdef CONFIG_HARDENED_USERCOPY
+ extern void __check_object_size(const void *ptr, unsigned long n,
+ bool to_user);
+
+ static inline void check_object_size(const void *ptr, unsigned long n,
+ bool to_user)
+ {
+ __check_object_size(ptr, n, to_user);
+ }
+ #else
+ static inline void check_object_size(const void *ptr, unsigned long n,
+ bool to_user)
+ { }
+ #endif /* CONFIG_HARDENED_USERCOPY */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_THREAD_INFO_H */
config COMPILE_TEST
bool "Compile also drivers which will not load"
+ depends on !UML
default n
help
Some drivers can be compiled on a different platform than they are
config LOCALVERSION_AUTO
bool "Automatically append version information to the version string"
default y
+ depends on !COMPILE_TEST
help
This will try to automatically determine if the current tree is a
release tree by looking for git tags that belong to the current
If unsure, say N.
+endchoice
+
config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting"
- depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL
+ depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
help
Select this option to enable fine granularity task irq time
accounting. This is done by reading a timestamp on each
If in doubt, say N here.
-endchoice
-
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
depends on MULTIUSER
config TASKS_RCU
bool
default n
+ depends on !UML
select SRCU
help
This option enables a task-based RCU implementation that uses
used as it forces an exact (power of two) size of the ring buffer.
The number of possible CPUs is used for this computation ignoring
- hotplugging making the compuation optimal for the the worst case
- scenerio while allowing a simple algorithm to be used from bootup.
+ hotplugging making the computation optimal for the worst case
+ scenario while allowing a simple algorithm to be used from bootup.
Examples shift values and their meaning:
17 => 128 KB for each CPU
controls or device isolation.
See
- Documentation/scheduler/sched-design-CFS.txt (CFS)
- - Documentation/cgroups/ (features for grouping, isolation
+ - Documentation/cgroup-v1/ (features for grouping, isolation
and resource control)
Say N if unsure.
CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
CONFIG_BLK_DEV_THROTTLING=y.
- See Documentation/cgroups/blkio-controller.txt for more information.
+ See Documentation/cgroup-v1/blkio-controller.txt for more information.
config DEBUG_BLK_CGROUP
bool "IO controller debugging"
config SLAB
bool "SLAB"
+ select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
config SLUB
bool "SLUB (Unqueued Allocator)"
+ select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
config SLAB_FREELIST_RANDOM
default n
- depends on SLAB
+ depends on SLAB || SLUB
bool "SLAB freelist randomization"
help
- Randomizes the freelist order used on creating new SLABs. This
+ Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
allocator against heap overflows.
(especially when using LTO) for optimizing the code and reducing
binary size. This might have some security advantages as well.
- If unsure say N.
+ If unsure, or if you need to build out-of-tree modules, say N.
endif # MODULES
KCOV_INSTRUMENT_mmzone.o := n
KCOV_INSTRUMENT_vmstat.o := n
+ # Since __builtin_frame_address does work as used, disable the warning.
+ CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
+
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
+ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
}
}
-#ifdef CONFIG_SLAB_FREELIST_RANDOM
-static void freelist_randomize(struct rnd_state *state, freelist_idx_t *list,
- size_t count)
-{
- size_t i;
- unsigned int rand;
-
- for (i = 0; i < count; i++)
- list[i] = i;
-
- /* Fisher-Yates shuffle */
- for (i = count - 1; i > 0; i--) {
- rand = prandom_u32_state(state);
- rand %= (i + 1);
- swap(list[i], list[rand]);
- }
-}
-
-/* Create a random sequence per cache */
-static int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
-{
- unsigned int seed, count = cachep->num;
- struct rnd_state state;
-
- if (count < 2)
- return 0;
-
- /* If it fails, we will just use the global lists */
- cachep->random_seq = kcalloc(count, sizeof(freelist_idx_t), gfp);
- if (!cachep->random_seq)
- return -ENOMEM;
-
- /* Get best entropy at this stage */
- get_random_bytes_arch(&seed, sizeof(seed));
- prandom_seed_state(&state, seed);
-
- freelist_randomize(&state, cachep->random_seq, count);
- return 0;
-}
-
-/* Destroy the per-cache random freelist sequence */
-static void cache_random_seq_destroy(struct kmem_cache *cachep)
-{
- kfree(cachep->random_seq);
- cachep->random_seq = NULL;
-}
-#else
-static inline int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
-{
- return 0;
-}
-static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
-#endif /* CONFIG_SLAB_FREELIST_RANDOM */
-
-
/*
* Initialisation. Called after the page allocator have been initialised and
* before smp_init().
return cpu_cache;
}
-static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
+static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
{
if (slab_state >= FULL)
return enable_cpucache(cachep, gfp);
union freelist_init_state {
struct {
unsigned int pos;
- freelist_idx_t *list;
+ unsigned int *list;
unsigned int count;
unsigned int rand;
};
unsigned int rand;
/* Use best entropy available to define a random shift */
- get_random_bytes_arch(&rand, sizeof(rand));
+ rand = get_random_int();
/* Use a random state if the pre-computed list is not available */
if (!cachep->random_seq) {
return (state->list[state->pos++] + state->rand) % state->count;
}
+/* Swap two freelist entries */
+static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
+{
+ swap(((freelist_idx_t *)page->freelist)[a],
+ ((freelist_idx_t *)page->freelist)[b]);
+}
+
/*
* Shuffle the freelist initialization state based on pre-computed lists.
* return true if the list was successfully shuffled, false otherwise.
*/
static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
{
- unsigned int objfreelist = 0, i, count = cachep->num;
+ unsigned int objfreelist = 0, i, rand, count = cachep->num;
union freelist_init_state state;
bool precomputed;
* Later use a pre-computed list for speed.
*/
if (!precomputed) {
- freelist_randomize(&state.rnd_state, page->freelist, count);
+ for (i = 0; i < count; i++)
+ set_free_obj(page, i, i);
+
+ /* Fisher-Yates shuffle */
+ for (i = count - 1; i > 0; i--) {
+ rand = prandom_u32_state(&state.rnd_state);
+ rand %= (i + 1);
+ swap_free_obj(page, i, rand);
+ }
} else {
for (i = 0; i < count; i++)
set_free_obj(page, i, next_random_slot(&state));
}
for (i = 0; i < cachep->num; i++) {
+ objp = index_to_obj(cachep, page, i);
+ kasan_init_slab_obj(cachep, objp);
+
/* constructor could break poison info */
if (DEBUG == 0 && cachep->ctor) {
- objp = index_to_obj(cachep, page, i);
kasan_unpoison_object_data(cachep, objp);
cachep->ctor(objp);
kasan_poison_object_data(cachep, objp);
* critical path in kmem_cache_alloc().
*/
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
- pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
- BUG();
+ gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
+ flags &= ~GFP_SLAB_BUG_MASK;
+ pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
+ invalid_mask, &invalid_mask, flags, &flags);
+ dump_stack();
}
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
n->free_objects -= cachep->num;
page = list_last_entry(&n->slabs_free, struct page, lru);
- list_del(&page->lru);
- list_add(&page->lru, list);
+ list_move(&page->lru, list);
}
}
int shared = 0;
int batchcount = 0;
- err = cache_random_seq_create(cachep, gfp);
+ err = cache_random_seq_create(cachep, cachep->num, gfp);
if (err)
goto end;
module_init(slab_proc_init);
#endif
+ #ifdef CONFIG_HARDENED_USERCOPY
+ /*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+ const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page)
+ {
+ struct kmem_cache *cachep;
+ unsigned int objnr;
+ unsigned long offset;
+
+ /* Find and validate object. */
+ cachep = page->slab_cache;
+ objnr = obj_to_index(cachep, page, (void *)ptr);
+ BUG_ON(objnr >= cachep->num);
+
+ /* Find offset within object. */
+ offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+
+ /* Allow address range falling entirely within object size. */
+ if (offset <= cachep->object_size && n <= cachep->object_size - offset)
+ return NULL;
+
+ return cachep->name;
+ }
+ #endif /* CONFIG_HARDENED_USERCOPY */
+
/**
* ksize - get the actual amount of memory allocated for a given object
* @objp: Pointer to the object
#endif
}
-static inline void *fixup_red_left(struct kmem_cache *s, void *p)
+void *fixup_red_left(struct kmem_cache *s, void *p)
{
if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
p += s->red_left_pad;
*/
#if defined(CONFIG_SLUB_DEBUG_ON)
static int slub_debug = DEBUG_DEFAULT_FLAGS;
-#elif defined(CONFIG_KASAN)
-static int slub_debug = SLAB_STORE_USER;
#else
static int slub_debug;
#endif
if (s->flags & SLAB_STORE_USER)
off += 2 * sizeof(struct track);
+ off += kasan_metadata_size(s);
+
if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
print_section("Padding ", p + off, size_from_object(s) - off);
/* We also have user information there */
off += 2 * sizeof(struct track);
+ off += kasan_metadata_size(s);
+
if (size_from_object(s) == off)
return 1;
kasan_kfree_large(x);
}
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
+static inline void *slab_free_hook(struct kmem_cache *s, void *x)
{
+ void *freeptr;
+
kmemleak_free_recursive(x, s->flags);
/*
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(x, s->object_size);
+ freeptr = get_freepointer(s, x);
+ /*
+ * kasan_slab_free() may put x into memory quarantine, delaying its
+ * reuse. In this case the object's freelist pointer is changed.
+ */
kasan_slab_free(s, x);
+ return freeptr;
}
static inline void slab_free_freelist_hook(struct kmem_cache *s,
void *object = head;
void *tail_obj = tail ? : head;
+ void *freeptr;
do {
- slab_free_hook(s, object);
- } while ((object != tail_obj) &&
- (object = get_freepointer(s, object)));
+ freeptr = slab_free_hook(s, object);
+ } while ((object != tail_obj) && (object = freeptr));
#endif
}
void *object)
{
setup_object_debug(s, page, object);
+ kasan_init_slab_obj(s, object);
if (unlikely(s->ctor)) {
kasan_unpoison_object_data(s, object);
s->ctor(object);
return page;
}
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+/* Pre-initialize the random sequence cache */
+static int init_cache_random_seq(struct kmem_cache *s)
+{
+ int err;
+ unsigned long i, count = oo_objects(s->oo);
+
+ err = cache_random_seq_create(s, count, GFP_KERNEL);
+ if (err) {
+ pr_err("SLUB: Unable to initialize free list for %s\n",
+ s->name);
+ return err;
+ }
+
+ /* Transform to an offset on the set of pages */
+ if (s->random_seq) {
+ for (i = 0; i < count; i++)
+ s->random_seq[i] *= s->size;
+ }
+ return 0;
+}
+
+/* Initialize each random sequence freelist per cache */
+static void __init init_freelist_randomization(void)
+{
+ struct kmem_cache *s;
+
+ mutex_lock(&slab_mutex);
+
+ list_for_each_entry(s, &slab_caches, list)
+ init_cache_random_seq(s);
+
+ mutex_unlock(&slab_mutex);
+}
+
+/* Get the next entry on the pre-computed freelist randomized */
+static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
+ unsigned long *pos, void *start,
+ unsigned long page_limit,
+ unsigned long freelist_count)
+{
+ unsigned int idx;
+
+ /*
+ * If the target page allocation failed, the number of objects on the
+ * page might be smaller than the usual size defined by the cache.
+ */
+ do {
+ idx = s->random_seq[*pos];
+ *pos += 1;
+ if (*pos >= freelist_count)
+ *pos = 0;
+ } while (unlikely(idx >= page_limit));
+
+ return (char *)start + idx;
+}
+
+/* Shuffle the single linked freelist based on a random pre-computed sequence */
+static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
+{
+ void *start;
+ void *cur;
+ void *next;
+ unsigned long idx, pos, page_limit, freelist_count;
+
+ if (page->objects < 2 || !s->random_seq)
+ return false;
+
+ freelist_count = oo_objects(s->oo);
+ pos = get_random_int() % freelist_count;
+
+ page_limit = page->objects * s->size;
+ start = fixup_red_left(s, page_address(page));
+
+ /* First entry is used as the base of the freelist */
+ cur = next_freelist_entry(s, page, &pos, start, page_limit,
+ freelist_count);
+ page->freelist = cur;
+
+ for (idx = 1; idx < page->objects; idx++) {
+ setup_object(s, page, cur);
+ next = next_freelist_entry(s, page, &pos, start, page_limit,
+ freelist_count);
+ set_freepointer(s, cur, next);
+ cur = next;
+ }
+ setup_object(s, page, cur);
+ set_freepointer(s, cur, NULL);
+
+ return true;
+}
+#else
+static inline int init_cache_random_seq(struct kmem_cache *s)
+{
+ return 0;
+}
+static inline void init_freelist_randomization(void) { }
+static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
+{
+ return false;
+}
+#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
gfp_t alloc_gfp;
void *start, *p;
int idx, order;
+ bool shuffle;
flags &= gfp_allowed_mask;
kasan_poison_slab(page);
- for_each_object_idx(p, idx, s, start, page->objects) {
- setup_object(s, page, p);
- if (likely(idx < page->objects))
- set_freepointer(s, p, p + s->size);
- else
- set_freepointer(s, p, NULL);
+ shuffle = shuffle_freelist(s, page);
+
+ if (!shuffle) {
+ for_each_object_idx(p, idx, s, start, page->objects) {
+ setup_object(s, page, p);
+ if (likely(idx < page->objects))
+ set_freepointer(s, p, p + s->size);
+ else
+ set_freepointer(s, p, NULL);
+ }
+ page->freelist = fixup_red_left(s, start);
}
- page->freelist = fixup_red_left(s, start);
page->inuse = page->objects;
page->frozen = 1;
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
- pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
- BUG();
+ gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
+ flags &= ~GFP_SLAB_BUG_MASK;
+ pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
+ invalid_mask, &invalid_mask, flags, &flags);
}
return allocate_slab(s,
* same page) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
-static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
- void *head, void *tail, int cnt,
- unsigned long addr)
+static __always_inline void do_slab_free(struct kmem_cache *s,
+ struct page *page, void *head, void *tail,
+ int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
-
- slab_free_freelist_hook(s, head, tail);
-
redo:
/*
* Determine the currently cpus per cpu slab.
}
+static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int cnt,
+ unsigned long addr)
+{
+ slab_free_freelist_hook(s, head, tail);
+ /*
+ * slab_free_freelist_hook() could have put the items into quarantine.
+ * If so, no need to free them.
+ */
+ if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
+ return;
+ do_slab_free(s, page, head, tail, cnt, addr);
+}
+
+#ifdef CONFIG_KASAN
+void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
+{
+ do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
+}
+#endif
+
void kmem_cache_free(struct kmem_cache *s, void *x)
{
s = cache_from_obj(s, x);
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(object);
- __free_kmem_pages(page, compound_order(page));
+ __free_pages(page, compound_order(page));
p[size] = NULL; /* mark object processed */
return size;
}
void __kmem_cache_release(struct kmem_cache *s)
{
+ cache_random_seq_destroy(s);
free_percpu(s->cpu_slab);
free_kmem_cache_nodes(s);
}
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
unsigned long flags = s->flags;
- unsigned long size = s->object_size;
+ size_t size = s->object_size;
int order;
/*
* the object.
*/
size += 2 * sizeof(struct track);
+#endif
+ kasan_cache_create(s, &size, &s->flags);
+#ifdef CONFIG_SLUB_DEBUG
if (flags & SLAB_RED_ZONE) {
/*
* Add some empty padding so that we can catch
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
#endif
+
+ /* Initialize the pre-computed randomized freelist if slab is up */
+ if (slab_state >= UP) {
+ if (init_cache_random_seq(s))
+ goto error;
+ }
+
if (!init_kmem_cache_nodes(s))
goto error;
void *ptr = NULL;
flags |= __GFP_COMP | __GFP_NOTRACK;
- page = alloc_kmem_pages_node(node, flags, get_order(size));
+ page = alloc_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
EXPORT_SYMBOL(__kmalloc_node);
#endif
+ #ifdef CONFIG_HARDENED_USERCOPY
+ /*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+ const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page)
+ {
+ struct kmem_cache *s;
+ unsigned long offset;
+ size_t object_size;
+
+ /* Find object and usable object size. */
+ s = page->slab_cache;
+ object_size = slab_ksize(s);
+
+ /* Reject impossible pointers. */
+ if (ptr < page_address(page))
+ return s->name;
+
+ /* Find offset within object. */
+ offset = (ptr - page_address(page)) % s->size;
+
+ /* Adjust for redzone and reject if within the redzone. */
+ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
+ if (offset < s->red_left_pad)
+ return s->name;
+ offset -= s->red_left_pad;
+ }
+
+ /* Allow address range falling entirely within object size. */
+ if (offset <= object_size && n <= object_size - offset)
+ return NULL;
+
+ return s->name;
+ }
+ #endif /* CONFIG_HARDENED_USERCOPY */
+
static size_t __ksize(const void *object)
{
struct page *page;
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_kmem_pages(page, compound_order(page));
+ __free_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
setup_kmalloc_cache_index_table();
create_kmalloc_caches(0);
+ /* Setup random freelists for each cache */
+ init_freelist_randomization();
+
#ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier);
#endif