arch/x86/lib/copy_user_nocache_64.S

   1 /* Copyright 2002 Andi Kleen, SuSE Labs.
   2  * Subject to the GNU Public License v2.
   3  *
   4  * Functions to copy from and to user space.
   5  */
   6
   7 #include <linux/linkage.h>
   8 #include <asm/dwarf2.h>
   9
  10 #define FIX_ALIGNMENT 1
  11
  12 #include <asm/current.h>
  13 #include <asm/asm-offsets.h>
  14 #include <asm/thread_info.h>
  15 #include <asm/cpufeature.h>
  16
  17 /*
  18  * copy_user_nocache - Uncached memory copy with exception handling
  19  * This will force destination/source out of cache for more performance.
  20  *
  21  * Input:
  22  * rdi destination
  23  * rsi source
  24  * rdx count
  25  * rcx zero flag        when 1 zero on exception
  26  *
  27  * Output:
  28  * eax uncopied bytes or 0 if successful.
  29  */
  30 ENTRY(__copy_user_nocache)
  31         CFI_STARTPROC
  32         pushq %rbx
  33         CFI_ADJUST_CFA_OFFSET 8
  34         CFI_REL_OFFSET rbx, 0
  35         pushq %rcx              /* save zero flag */
  36         CFI_ADJUST_CFA_OFFSET 8
  37         CFI_REL_OFFSET rcx, 0
  38
  39         xorl %eax,%eax          /* zero for the exception handler */
  40
  41 #ifdef FIX_ALIGNMENT
  42         /* check for bad alignment of destination */
  43         movl %edi,%ecx
  44         andl $7,%ecx
  45         jnz  .Lbad_alignment
  46 .Lafter_bad_alignment:
  47 #endif
  48
  49         movq %rdx,%rcx
  50
  51         movl $64,%ebx
  52         shrq $6,%rdx
  53         decq %rdx
  54         js   .Lhandle_tail
  55
  56         .p2align 4
  57 .Lloop:
  58 .Ls1:   movq (%rsi),%r11
  59 .Ls2:   movq 1*8(%rsi),%r8
  60 .Ls3:   movq 2*8(%rsi),%r9
  61 .Ls4:   movq 3*8(%rsi),%r10
  62 .Ld1:   movnti %r11,(%rdi)
  63 .Ld2:   movnti %r8,1*8(%rdi)
  64 .Ld3:   movnti %r9,2*8(%rdi)
  65 .Ld4:   movnti %r10,3*8(%rdi)
  66
  67 .Ls5:   movq 4*8(%rsi),%r11
  68 .Ls6:   movq 5*8(%rsi),%r8
  69 .Ls7:   movq 6*8(%rsi),%r9
  70 .Ls8:   movq 7*8(%rsi),%r10
  71 .Ld5:   movnti %r11,4*8(%rdi)
  72 .Ld6:   movnti %r8,5*8(%rdi)
  73 .Ld7:   movnti %r9,6*8(%rdi)
  74 .Ld8:   movnti %r10,7*8(%rdi)
  75
  76         dec  %rdx
  77
  78         leaq 64(%rsi),%rsi
  79         leaq 64(%rdi),%rdi
  80
  81         jns  .Lloop
  82
  83         .p2align 4
  84 .Lhandle_tail:
  85         movl %ecx,%edx
  86         andl $63,%ecx
  87         shrl $3,%ecx
  88         jz   .Lhandle_7
  89         movl $8,%ebx
  90         .p2align 4
  91 .Lloop_8:
  92 .Ls9:   movq (%rsi),%r8
  93 .Ld9:   movnti %r8,(%rdi)
  94         decl %ecx
  95         leaq 8(%rdi),%rdi
  96         leaq 8(%rsi),%rsi
  97         jnz .Lloop_8
  98
  99 .Lhandle_7:
 100         movl %edx,%ecx
 101         andl $7,%ecx
 102         jz   .Lende
 103         .p2align 4
 104 .Lloop_1:
 105 .Ls10:  movb (%rsi),%bl
 106 .Ld10:  movb %bl,(%rdi)
 107         incq %rdi
 108         incq %rsi
 109         decl %ecx
 110         jnz .Lloop_1
 111
 112         CFI_REMEMBER_STATE
 113 .Lende:
 114         popq %rcx
 115         CFI_ADJUST_CFA_OFFSET -8
 116         CFI_RESTORE %rcx
 117         popq %rbx
 118         CFI_ADJUST_CFA_OFFSET -8
 119         CFI_RESTORE rbx
 120         sfence
 121         ret
 122         CFI_RESTORE_STATE
 123
 124 #ifdef FIX_ALIGNMENT
 125         /* align destination */
 126         .p2align 4
 127 .Lbad_alignment:
 128         movl $8,%r9d
 129         subl %ecx,%r9d
 130         movl %r9d,%ecx
 131         cmpq %r9,%rdx
 132         jz   .Lhandle_7
 133         js   .Lhandle_7
 134 .Lalign_1:
 135 .Ls11:  movb (%rsi),%bl
 136 .Ld11:  movb %bl,(%rdi)
 137         incq %rsi
 138         incq %rdi
 139         decl %ecx
 140         jnz .Lalign_1
 141         subq %r9,%rdx
 142         jmp .Lafter_bad_alignment
 143 #endif
 144
 145         /* table sorted by exception address */
 146         .section __ex_table,"a"
 147         .align 8
 148         .quad .Ls1,.Ls1e        /* .Ls[1-4] - 0 bytes copied */
 149         .quad .Ls2,.Ls1e
 150         .quad .Ls3,.Ls1e
 151         .quad .Ls4,.Ls1e
 152         .quad .Ld1,.Ls1e        /* .Ld[1-4] - 0..24 bytes coped */
 153         .quad .Ld2,.Ls2e
 154         .quad .Ld3,.Ls3e
 155         .quad .Ld4,.Ls4e
 156         .quad .Ls5,.Ls5e        /* .Ls[5-8] - 32 bytes copied */
 157         .quad .Ls6,.Ls5e
 158         .quad .Ls7,.Ls5e
 159         .quad .Ls8,.Ls5e
 160         .quad .Ld5,.Ls5e        /* .Ld[5-8] - 32..56 bytes copied */
 161         .quad .Ld6,.Ls6e
 162         .quad .Ld7,.Ls7e
 163         .quad .Ld8,.Ls8e
 164         .quad .Ls9,.Le_quad
 165         .quad .Ld9,.Le_quad
 166         .quad .Ls10,.Le_byte
 167         .quad .Ld10,.Le_byte
 168 #ifdef FIX_ALIGNMENT
 169         .quad .Ls11,.Lzero_rest
 170         .quad .Ld11,.Lzero_rest
 171 #endif
 172         .quad .Le5,.Le_zero
 173         .previous
 174
 175         /* eax: zero, ebx: 64 */
 176 .Ls1e:  addl $8,%eax    /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
 177 .Ls2e:  addl $8,%eax
 178 .Ls3e:  addl $8,%eax
 179 .Ls4e:  addl $8,%eax
 180 .Ls5e:  addl $8,%eax
 181 .Ls6e:  addl $8,%eax
 182 .Ls7e:  addl $8,%eax
 183 .Ls8e:  addl $8,%eax
 184         addq %rbx,%rdi  /* +64 */
 185         subq %rax,%rdi  /* correct destination with computed offset */
 186
 187         shlq $6,%rdx    /* loop counter * 64 (stride length) */
 188         addq %rax,%rdx  /* add offset to loopcnt */
 189         andl $63,%ecx   /* remaining bytes */
 190         addq %rcx,%rdx  /* add them */
 191         jmp .Lzero_rest
 192
 193         /* exception on quad word loop in tail handling */
 194         /* ecx: loopcnt/8, %edx: length, rdi: correct */
 195 .Le_quad:
 196         shll $3,%ecx
 197         andl $7,%edx
 198         addl %ecx,%edx
 199         /* edx: bytes to zero, rdi: dest, eax:zero */
 200 .Lzero_rest:
 201         cmpl $0,(%rsp)  /* zero flag set? */
 202         jz   .Le_zero
 203         movq %rdx,%rcx
 204 .Le_byte:
 205         xorl %eax,%eax
 206 .Le5:   rep
 207         stosb
 208         /* when there is another exception while zeroing the rest just return */
 209 .Le_zero:
 210         movq %rdx,%rax
 211         jmp .Lende
 212         CFI_ENDPROC
 213 ENDPROC(__copy_user_nocache)
 214
 215