Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* Copyright 2002 Andi Kleen */ |
2 | ||
8d379dad JB |
3 | #include <linux/config.h> |
4 | #include <linux/linkage.h> | |
5 | #include <asm/dwarf2.h> | |
6 | #include <asm/cpufeature.h> | |
7 | ||
1da177e4 LT |
8 | /* |
9 | * memcpy - Copy a memory block. | |
10 | * | |
11 | * Input: | |
12 | * rdi destination | |
13 | * rsi source | |
14 | * rdx count | |
15 | * | |
16 | * Output: | |
17 | * rax original destination | |
18 | */ | |
19 | ||
8d379dad JB |
20 | ALIGN |
21 | memcpy_c: | |
22 | CFI_STARTPROC | |
23 | movq %rdi,%rax | |
24 | movl %edx,%ecx | |
25 | shrl $3,%ecx | |
26 | andl $7,%edx | |
27 | rep movsq | |
28 | movl %edx,%ecx | |
29 | rep movsb | |
30 | ret | |
31 | CFI_ENDPROC | |
32 | ENDPROC(memcpy_c) | |
33 | ||
34 | ENTRY(__memcpy) | |
35 | ENTRY(memcpy) | |
36 | CFI_STARTPROC | |
7bcd3f34 | 37 | pushq %rbx |
8d379dad JB |
38 | CFI_ADJUST_CFA_OFFSET 8 |
39 | CFI_REL_OFFSET rbx, 0 | |
7bcd3f34 AK |
40 | movq %rdi,%rax |
41 | ||
42 | movl %edx,%ecx | |
43 | shrl $6,%ecx | |
44 | jz .Lhandle_tail | |
45 | ||
46 | .p2align 4 | |
47 | .Lloop_64: | |
48 | decl %ecx | |
49 | ||
50 | movq (%rsi),%r11 | |
51 | movq 8(%rsi),%r8 | |
52 | ||
53 | movq %r11,(%rdi) | |
54 | movq %r8,1*8(%rdi) | |
55 | ||
56 | movq 2*8(%rsi),%r9 | |
57 | movq 3*8(%rsi),%r10 | |
58 | ||
59 | movq %r9,2*8(%rdi) | |
60 | movq %r10,3*8(%rdi) | |
61 | ||
62 | movq 4*8(%rsi),%r11 | |
63 | movq 5*8(%rsi),%r8 | |
64 | ||
65 | movq %r11,4*8(%rdi) | |
66 | movq %r8,5*8(%rdi) | |
67 | ||
68 | movq 6*8(%rsi),%r9 | |
69 | movq 7*8(%rsi),%r10 | |
70 | ||
71 | movq %r9,6*8(%rdi) | |
72 | movq %r10,7*8(%rdi) | |
73 | ||
74 | leaq 64(%rsi),%rsi | |
75 | leaq 64(%rdi),%rdi | |
76 | jnz .Lloop_64 | |
77 | ||
78 | .Lhandle_tail: | |
79 | movl %edx,%ecx | |
80 | andl $63,%ecx | |
81 | shrl $3,%ecx | |
82 | jz .Lhandle_7 | |
83 | .p2align 4 | |
84 | .Lloop_8: | |
85 | decl %ecx | |
86 | movq (%rsi),%r8 | |
87 | movq %r8,(%rdi) | |
88 | leaq 8(%rdi),%rdi | |
89 | leaq 8(%rsi),%rsi | |
90 | jnz .Lloop_8 | |
91 | ||
92 | .Lhandle_7: | |
93 | movl %edx,%ecx | |
94 | andl $7,%ecx | |
95 | jz .Lende | |
96 | .p2align 4 | |
97 | .Lloop_1: | |
98 | movb (%rsi),%r8b | |
99 | movb %r8b,(%rdi) | |
100 | incq %rdi | |
101 | incq %rsi | |
102 | decl %ecx | |
103 | jnz .Lloop_1 | |
104 | ||
105 | .Lende: | |
106 | popq %rbx | |
8d379dad JB |
107 | CFI_ADJUST_CFA_OFFSET -8 |
108 | CFI_RESTORE rbx | |
7bcd3f34 AK |
109 | ret |
110 | .Lfinal: | |
8d379dad JB |
111 | CFI_ENDPROC |
112 | ENDPROC(memcpy) | |
113 | ENDPROC(__memcpy) | |
7bcd3f34 AK |
114 | |
115 | /* Some CPUs run faster using the string copy instructions. | |
116 | It is also a lot simpler. Use this when possible */ | |
117 | ||
8d379dad JB |
118 | .section .altinstr_replacement,"ax" |
119 | 1: .byte 0xeb /* jmp <disp8> */ | |
120 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | |
121 | 2: | |
122 | .previous | |
7bcd3f34 AK |
123 | .section .altinstructions,"a" |
124 | .align 8 | |
8d379dad JB |
125 | .quad memcpy |
126 | .quad 1b | |
127 | .byte X86_FEATURE_REP_GOOD | |
128 | .byte .Lfinal - memcpy | |
129 | .byte 2b - 1b | |
7bcd3f34 | 130 | .previous |