[PATCH] i383 numa: fix numaq/summit apicid conflict
[deliverable/linux.git] / arch / x86_64 / lib / memcpy.S
CommitLineData
1da177e4
LT
1/* Copyright 2002 Andi Kleen */
2
8d379dad
JB
3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6#include <asm/cpufeature.h>
7
1da177e4
LT
8/*
9 * memcpy - Copy a memory block.
10 *
11 * Input:
12 * rdi destination
13 * rsi source
14 * rdx count
15 *
16 * Output:
17 * rax original destination
18 */
19
8d379dad
JB
20 ALIGN
21memcpy_c:
22 CFI_STARTPROC
23 movq %rdi,%rax
24 movl %edx,%ecx
25 shrl $3,%ecx
26 andl $7,%edx
27 rep movsq
28 movl %edx,%ecx
29 rep movsb
30 ret
31 CFI_ENDPROC
32ENDPROC(memcpy_c)
33
34ENTRY(__memcpy)
35ENTRY(memcpy)
36 CFI_STARTPROC
7bcd3f34 37 pushq %rbx
8d379dad
JB
38 CFI_ADJUST_CFA_OFFSET 8
39 CFI_REL_OFFSET rbx, 0
7bcd3f34
AK
40 movq %rdi,%rax
41
42 movl %edx,%ecx
43 shrl $6,%ecx
44 jz .Lhandle_tail
45
46 .p2align 4
47.Lloop_64:
48 decl %ecx
49
50 movq (%rsi),%r11
51 movq 8(%rsi),%r8
52
53 movq %r11,(%rdi)
54 movq %r8,1*8(%rdi)
55
56 movq 2*8(%rsi),%r9
57 movq 3*8(%rsi),%r10
58
59 movq %r9,2*8(%rdi)
60 movq %r10,3*8(%rdi)
61
62 movq 4*8(%rsi),%r11
63 movq 5*8(%rsi),%r8
64
65 movq %r11,4*8(%rdi)
66 movq %r8,5*8(%rdi)
67
68 movq 6*8(%rsi),%r9
69 movq 7*8(%rsi),%r10
70
71 movq %r9,6*8(%rdi)
72 movq %r10,7*8(%rdi)
73
74 leaq 64(%rsi),%rsi
75 leaq 64(%rdi),%rdi
76 jnz .Lloop_64
77
78.Lhandle_tail:
79 movl %edx,%ecx
80 andl $63,%ecx
81 shrl $3,%ecx
82 jz .Lhandle_7
83 .p2align 4
84.Lloop_8:
85 decl %ecx
86 movq (%rsi),%r8
87 movq %r8,(%rdi)
88 leaq 8(%rdi),%rdi
89 leaq 8(%rsi),%rsi
90 jnz .Lloop_8
91
92.Lhandle_7:
93 movl %edx,%ecx
94 andl $7,%ecx
95 jz .Lende
96 .p2align 4
97.Lloop_1:
98 movb (%rsi),%r8b
99 movb %r8b,(%rdi)
100 incq %rdi
101 incq %rsi
102 decl %ecx
103 jnz .Lloop_1
104
105.Lende:
106 popq %rbx
8d379dad
JB
107 CFI_ADJUST_CFA_OFFSET -8
108 CFI_RESTORE rbx
7bcd3f34
AK
109 ret
110.Lfinal:
8d379dad
JB
111 CFI_ENDPROC
112ENDPROC(memcpy)
113ENDPROC(__memcpy)
7bcd3f34
AK
114
115 /* Some CPUs run faster using the string copy instructions.
116 It is also a lot simpler. Use this when possible */
117
8d379dad
JB
118 .section .altinstr_replacement,"ax"
1191: .byte 0xeb /* jmp <disp8> */
120 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1212:
122 .previous
7bcd3f34
AK
123 .section .altinstructions,"a"
124 .align 8
8d379dad
JB
125 .quad memcpy
126 .quad 1b
127 .byte X86_FEATURE_REP_GOOD
128 .byte .Lfinal - memcpy
129 .byte 2b - 1b
7bcd3f34 130 .previous
This page took 0.14427 seconds and 5 git commands to generate.