Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
038b0a6d | 2 | |
8d379dad JB |
3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | |
59e97e4d | 5 | #include <asm/alternative-asm.h> |
8d379dad JB |
6 | |
7 | ALIGN | |
269833bd | 8 | copy_page_rep: |
8d379dad | 9 | CFI_STARTPROC |
269833bd ML |
10 | movl $4096/8, %ecx |
11 | rep movsq | |
8d379dad JB |
12 | ret |
13 | CFI_ENDPROC | |
269833bd | 14 | ENDPROC(copy_page_rep) |
8d379dad | 15 | |
269833bd ML |
16 | /* |
17 | * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. | |
18 | * Could vary the prefetch distance based on SMP/UP. | |
19 | */ | |
1da177e4 | 20 | |
8d379dad JB |
21 | ENTRY(copy_page) |
22 | CFI_STARTPROC | |
269833bd | 23 | subq $2*8, %rsp |
42693290 | 24 | CFI_ADJUST_CFA_OFFSET 2*8 |
269833bd | 25 | movq %rbx, (%rsp) |
8d379dad | 26 | CFI_REL_OFFSET rbx, 0 |
269833bd | 27 | movq %r12, 1*8(%rsp) |
8d379dad | 28 | CFI_REL_OFFSET r12, 1*8 |
7bcd3f34 | 29 | |
269833bd | 30 | movl $(4096/64)-5, %ecx |
7bcd3f34 AK |
31 | .p2align 4 |
32 | .Loop64: | |
269833bd ML |
33 | dec %rcx |
34 | movq 0x8*0(%rsi), %rax | |
35 | movq 0x8*1(%rsi), %rbx | |
36 | movq 0x8*2(%rsi), %rdx | |
37 | movq 0x8*3(%rsi), %r8 | |
38 | movq 0x8*4(%rsi), %r9 | |
39 | movq 0x8*5(%rsi), %r10 | |
40 | movq 0x8*6(%rsi), %r11 | |
41 | movq 0x8*7(%rsi), %r12 | |
7bcd3f34 AK |
42 | |
43 | prefetcht0 5*64(%rsi) | |
44 | ||
269833bd ML |
45 | movq %rax, 0x8*0(%rdi) |
46 | movq %rbx, 0x8*1(%rdi) | |
47 | movq %rdx, 0x8*2(%rdi) | |
48 | movq %r8, 0x8*3(%rdi) | |
49 | movq %r9, 0x8*4(%rdi) | |
50 | movq %r10, 0x8*5(%rdi) | |
51 | movq %r11, 0x8*6(%rdi) | |
52 | movq %r12, 0x8*7(%rdi) | |
7bcd3f34 | 53 | |
269833bd ML |
54 | leaq 64 (%rsi), %rsi |
55 | leaq 64 (%rdi), %rdi | |
7bcd3f34 | 56 | |
269833bd | 57 | jnz .Loop64 |
7bcd3f34 | 58 | |
269833bd | 59 | movl $5, %ecx |
7bcd3f34 AK |
60 | .p2align 4 |
61 | .Loop2: | |
269833bd ML |
62 | decl %ecx |
63 | ||
64 | movq 0x8*0(%rsi), %rax | |
65 | movq 0x8*1(%rsi), %rbx | |
66 | movq 0x8*2(%rsi), %rdx | |
67 | movq 0x8*3(%rsi), %r8 | |
68 | movq 0x8*4(%rsi), %r9 | |
69 | movq 0x8*5(%rsi), %r10 | |
70 | movq 0x8*6(%rsi), %r11 | |
71 | movq 0x8*7(%rsi), %r12 | |
72 | ||
73 | movq %rax, 0x8*0(%rdi) | |
74 | movq %rbx, 0x8*1(%rdi) | |
75 | movq %rdx, 0x8*2(%rdi) | |
76 | movq %r8, 0x8*3(%rdi) | |
77 | movq %r9, 0x8*4(%rdi) | |
78 | movq %r10, 0x8*5(%rdi) | |
79 | movq %r11, 0x8*6(%rdi) | |
80 | movq %r12, 0x8*7(%rdi) | |
81 | ||
82 | leaq 64(%rdi), %rdi | |
83 | leaq 64(%rsi), %rsi | |
7bcd3f34 AK |
84 | jnz .Loop2 |
85 | ||
269833bd | 86 | movq (%rsp), %rbx |
8d379dad | 87 | CFI_RESTORE rbx |
269833bd | 88 | movq 1*8(%rsp), %r12 |
8d379dad | 89 | CFI_RESTORE r12 |
269833bd | 90 | addq $2*8, %rsp |
42693290 | 91 | CFI_ADJUST_CFA_OFFSET -2*8 |
7bcd3f34 | 92 | ret |
8d379dad JB |
93 | .Lcopy_page_end: |
94 | CFI_ENDPROC | |
95 | ENDPROC(copy_page) | |
7bcd3f34 AK |
96 | |
97 | /* Some CPUs run faster using the string copy instructions. | |
98 | It is also a lot simpler. Use this when possible */ | |
99 | ||
100 | #include <asm/cpufeature.h> | |
101 | ||
8d379dad JB |
102 | .section .altinstr_replacement,"ax" |
103 | 1: .byte 0xeb /* jmp <disp8> */ | |
269833bd | 104 | .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ |
8d379dad JB |
105 | 2: |
106 | .previous | |
7bcd3f34 | 107 | .section .altinstructions,"a" |
59e97e4d AL |
108 | altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \ |
109 | .Lcopy_page_end-copy_page, 2b-1b | |
7bcd3f34 | 110 | .previous |