Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* Copyright 2002 Andi Kleen */ |
038b0a6d | 2 | |
8d379dad JB |
3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | |
5 | #include <asm/cpufeature.h> | |
6 | ||
1da177e4 LT |
7 | /* |
8 | * memcpy - Copy a memory block. | |
9 | * | |
10 | * Input: | |
11 | * rdi destination | |
12 | * rsi source | |
13 | * rdx count | |
14 | * | |
15 | * Output: | |
16 | * rax original destination | |
17 | */ | |
18 | ||
8d379dad JB |
19 | ALIGN |
20 | memcpy_c: | |
21 | CFI_STARTPROC | |
22 | movq %rdi,%rax | |
23 | movl %edx,%ecx | |
24 | shrl $3,%ecx | |
25 | andl $7,%edx | |
26 | rep movsq | |
27 | movl %edx,%ecx | |
28 | rep movsb | |
29 | ret | |
30 | CFI_ENDPROC | |
31 | ENDPROC(memcpy_c) | |
32 | ||
33 | ENTRY(__memcpy) | |
34 | ENTRY(memcpy) | |
35 | CFI_STARTPROC | |
7bcd3f34 | 36 | pushq %rbx |
8d379dad JB |
37 | CFI_ADJUST_CFA_OFFSET 8 |
38 | CFI_REL_OFFSET rbx, 0 | |
7bcd3f34 AK |
39 | movq %rdi,%rax |
40 | ||
41 | movl %edx,%ecx | |
42 | shrl $6,%ecx | |
43 | jz .Lhandle_tail | |
44 | ||
45 | .p2align 4 | |
46 | .Lloop_64: | |
47 | decl %ecx | |
48 | ||
49 | movq (%rsi),%r11 | |
50 | movq 8(%rsi),%r8 | |
51 | ||
52 | movq %r11,(%rdi) | |
53 | movq %r8,1*8(%rdi) | |
54 | ||
55 | movq 2*8(%rsi),%r9 | |
56 | movq 3*8(%rsi),%r10 | |
57 | ||
58 | movq %r9,2*8(%rdi) | |
59 | movq %r10,3*8(%rdi) | |
60 | ||
61 | movq 4*8(%rsi),%r11 | |
62 | movq 5*8(%rsi),%r8 | |
63 | ||
64 | movq %r11,4*8(%rdi) | |
65 | movq %r8,5*8(%rdi) | |
66 | ||
67 | movq 6*8(%rsi),%r9 | |
68 | movq 7*8(%rsi),%r10 | |
69 | ||
70 | movq %r9,6*8(%rdi) | |
71 | movq %r10,7*8(%rdi) | |
72 | ||
73 | leaq 64(%rsi),%rsi | |
74 | leaq 64(%rdi),%rdi | |
75 | jnz .Lloop_64 | |
76 | ||
77 | .Lhandle_tail: | |
78 | movl %edx,%ecx | |
79 | andl $63,%ecx | |
80 | shrl $3,%ecx | |
81 | jz .Lhandle_7 | |
82 | .p2align 4 | |
83 | .Lloop_8: | |
84 | decl %ecx | |
85 | movq (%rsi),%r8 | |
86 | movq %r8,(%rdi) | |
87 | leaq 8(%rdi),%rdi | |
88 | leaq 8(%rsi),%rsi | |
89 | jnz .Lloop_8 | |
90 | ||
91 | .Lhandle_7: | |
92 | movl %edx,%ecx | |
93 | andl $7,%ecx | |
94 | jz .Lende | |
95 | .p2align 4 | |
96 | .Lloop_1: | |
97 | movb (%rsi),%r8b | |
98 | movb %r8b,(%rdi) | |
99 | incq %rdi | |
100 | incq %rsi | |
101 | decl %ecx | |
102 | jnz .Lloop_1 | |
103 | ||
104 | .Lende: | |
105 | popq %rbx | |
8d379dad JB |
106 | CFI_ADJUST_CFA_OFFSET -8 |
107 | CFI_RESTORE rbx | |
7bcd3f34 AK |
108 | ret |
109 | .Lfinal: | |
8d379dad JB |
110 | CFI_ENDPROC |
111 | ENDPROC(memcpy) | |
112 | ENDPROC(__memcpy) | |
7bcd3f34 AK |
113 | |
114 | /* Some CPUs run faster using the string copy instructions. | |
115 | It is also a lot simpler. Use this when possible */ | |
116 | ||
8d379dad JB |
117 | .section .altinstr_replacement,"ax" |
118 | 1: .byte 0xeb /* jmp <disp8> */ | |
119 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | |
120 | 2: | |
121 | .previous | |
7bcd3f34 AK |
122 | .section .altinstructions,"a" |
123 | .align 8 | |
8d379dad JB |
124 | .quad memcpy |
125 | .quad 1b | |
126 | .byte X86_FEATURE_REP_GOOD | |
b8d3f244 PV |
127 | /* Replace only beginning, memcpy is used to apply alternatives, so it |
128 | * is silly to overwrite itself with nops - reboot is only outcome... */ | |
129 | .byte 2b - 1b | |
8d379dad | 130 | .byte 2b - 1b |
7bcd3f34 | 131 | .previous |