Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
2 | ||
8d379dad JB |
3 | #include <linux/config.h> |
4 | #include <linux/linkage.h> | |
5 | #include <asm/dwarf2.h> | |
6 | ||
7 | ALIGN | |
8 | copy_page_c: | |
9 | CFI_STARTPROC | |
10 | movl $4096/8,%ecx | |
11 | rep movsq | |
12 | ret | |
13 | CFI_ENDPROC | |
14 | ENDPROC(copy_page_c) | |
15 | ||
1da177e4 LT |
16 | /* Don't use streaming store because it's better when the target |
17 | ends up in cache. */ | |
18 | ||
19 | /* Could vary the prefetch distance based on SMP/UP */ | |
20 | ||
8d379dad JB |
21 | ENTRY(copy_page) |
22 | CFI_STARTPROC | |
7bcd3f34 | 23 | subq $3*8,%rsp |
8d379dad | 24 | CFI_ADJUST_CFA_OFFSET 3*8 |
7bcd3f34 | 25 | movq %rbx,(%rsp) |
8d379dad | 26 | CFI_REL_OFFSET rbx, 0 |
7bcd3f34 | 27 | movq %r12,1*8(%rsp) |
8d379dad | 28 | CFI_REL_OFFSET r12, 1*8 |
7bcd3f34 | 29 | movq %r13,2*8(%rsp) |
8d379dad | 30 | CFI_REL_OFFSET r13, 2*8 |
7bcd3f34 AK |
31 | |
32 | movl $(4096/64)-5,%ecx | |
33 | .p2align 4 | |
34 | .Loop64: | |
35 | dec %rcx | |
36 | ||
37 | movq (%rsi), %rax | |
38 | movq 8 (%rsi), %rbx | |
39 | movq 16 (%rsi), %rdx | |
40 | movq 24 (%rsi), %r8 | |
41 | movq 32 (%rsi), %r9 | |
42 | movq 40 (%rsi), %r10 | |
43 | movq 48 (%rsi), %r11 | |
44 | movq 56 (%rsi), %r12 | |
45 | ||
46 | prefetcht0 5*64(%rsi) | |
47 | ||
48 | movq %rax, (%rdi) | |
49 | movq %rbx, 8 (%rdi) | |
50 | movq %rdx, 16 (%rdi) | |
51 | movq %r8, 24 (%rdi) | |
52 | movq %r9, 32 (%rdi) | |
53 | movq %r10, 40 (%rdi) | |
54 | movq %r11, 48 (%rdi) | |
55 | movq %r12, 56 (%rdi) | |
56 | ||
57 | leaq 64 (%rsi), %rsi | |
58 | leaq 64 (%rdi), %rdi | |
59 | ||
60 | jnz .Loop64 | |
61 | ||
62 | movl $5,%ecx | |
63 | .p2align 4 | |
64 | .Loop2: | |
65 | decl %ecx | |
66 | ||
67 | movq (%rsi), %rax | |
68 | movq 8 (%rsi), %rbx | |
69 | movq 16 (%rsi), %rdx | |
70 | movq 24 (%rsi), %r8 | |
71 | movq 32 (%rsi), %r9 | |
72 | movq 40 (%rsi), %r10 | |
73 | movq 48 (%rsi), %r11 | |
74 | movq 56 (%rsi), %r12 | |
75 | ||
76 | movq %rax, (%rdi) | |
77 | movq %rbx, 8 (%rdi) | |
78 | movq %rdx, 16 (%rdi) | |
79 | movq %r8, 24 (%rdi) | |
80 | movq %r9, 32 (%rdi) | |
81 | movq %r10, 40 (%rdi) | |
82 | movq %r11, 48 (%rdi) | |
83 | movq %r12, 56 (%rdi) | |
84 | ||
85 | leaq 64(%rdi),%rdi | |
86 | leaq 64(%rsi),%rsi | |
87 | ||
88 | jnz .Loop2 | |
89 | ||
90 | movq (%rsp),%rbx | |
8d379dad | 91 | CFI_RESTORE rbx |
7bcd3f34 | 92 | movq 1*8(%rsp),%r12 |
8d379dad | 93 | CFI_RESTORE r12 |
7bcd3f34 | 94 | movq 2*8(%rsp),%r13 |
8d379dad | 95 | CFI_RESTORE r13 |
7bcd3f34 | 96 | addq $3*8,%rsp |
8d379dad | 97 | CFI_ADJUST_CFA_OFFSET -3*8 |
7bcd3f34 | 98 | ret |
8d379dad JB |
99 | .Lcopy_page_end: |
100 | CFI_ENDPROC | |
101 | ENDPROC(copy_page) | |
7bcd3f34 AK |
102 | |
103 | /* Some CPUs run faster using the string copy instructions. | |
104 | It is also a lot simpler. Use this when possible */ | |
105 | ||
106 | #include <asm/cpufeature.h> | |
107 | ||
8d379dad JB |
108 | .section .altinstr_replacement,"ax" |
109 | 1: .byte 0xeb /* jmp <disp8> */ | |
110 | .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ | |
111 | 2: | |
112 | .previous | |
7bcd3f34 AK |
113 | .section .altinstructions,"a" |
114 | .align 8 | |
8d379dad JB |
115 | .quad copy_page |
116 | .quad 1b | |
117 | .byte X86_FEATURE_REP_GOOD | |
118 | .byte .Lcopy_page_end - copy_page | |
119 | .byte 2b - 1b | |
7bcd3f34 | 120 | .previous |