Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ |
8d379dad JB |
2 | |
3 | #include <linux/config.h> | |
4 | #include <linux/linkage.h> | |
5 | #include <asm/dwarf2.h> | |
6 | ||
1da177e4 LT |
7 | /* |
8 | * ISO C memset - set a memory block to a byte value. | |
9 | * | |
10 | * rdi destination | |
11 | * rsi value (char) | |
12 | * rdx count (bytes) | |
13 | * | |
14 | * rax original destination | |
15 | */ | |
8d379dad JB |
16 | ALIGN |
17 | memset_c: | |
18 | CFI_STARTPROC | |
19 | movq %rdi,%r9 | |
20 | movl %edx,%r8d | |
21 | andl $7,%r8d | |
22 | movl %edx,%ecx | |
23 | shrl $3,%ecx | |
24 | /* expand byte value */ | |
25 | movzbl %sil,%esi | |
26 | movabs $0x0101010101010101,%rax | |
27 | mulq %rsi /* with rax, clobbers rdx */ | |
28 | rep stosq | |
29 | movl %r8d,%ecx | |
30 | rep stosb | |
31 | movq %r9,%rax | |
32 | ret | |
33 | CFI_ENDPROC | |
34 | ENDPROC(memset_c) | |
35 | ||
36 | ENTRY(memset) | |
37 | ENTRY(__memset) | |
38 | CFI_STARTPROC | |
7bcd3f34 AK |
39 | movq %rdi,%r10 |
40 | movq %rdx,%r11 | |
41 | ||
42 | /* expand byte value */ | |
43 | movzbl %sil,%ecx | |
44 | movabs $0x0101010101010101,%rax | |
45 | mul %rcx /* with rax, clobbers rdx */ | |
46 | ||
47 | /* align dst */ | |
48 | movl %edi,%r9d | |
49 | andl $7,%r9d | |
50 | jnz .Lbad_alignment | |
8d379dad | 51 | CFI_REMEMBER_STATE |
7bcd3f34 AK |
52 | .Lafter_bad_alignment: |
53 | ||
54 | movl %r11d,%ecx | |
55 | shrl $6,%ecx | |
56 | jz .Lhandle_tail | |
57 | ||
58 | .p2align 4 | |
59 | .Lloop_64: | |
60 | decl %ecx | |
61 | movq %rax,(%rdi) | |
62 | movq %rax,8(%rdi) | |
63 | movq %rax,16(%rdi) | |
64 | movq %rax,24(%rdi) | |
65 | movq %rax,32(%rdi) | |
66 | movq %rax,40(%rdi) | |
67 | movq %rax,48(%rdi) | |
68 | movq %rax,56(%rdi) | |
69 | leaq 64(%rdi),%rdi | |
70 | jnz .Lloop_64 | |
71 | ||
72 | /* Handle tail in loops. The loops should be faster than hard | |
73 | to predict jump tables. */ | |
74 | .p2align 4 | |
75 | .Lhandle_tail: | |
76 | movl %r11d,%ecx | |
77 | andl $63&(~7),%ecx | |
78 | jz .Lhandle_7 | |
79 | shrl $3,%ecx | |
80 | .p2align 4 | |
81 | .Lloop_8: | |
82 | decl %ecx | |
83 | movq %rax,(%rdi) | |
84 | leaq 8(%rdi),%rdi | |
85 | jnz .Lloop_8 | |
86 | ||
87 | .Lhandle_7: | |
88 | movl %r11d,%ecx | |
89 | andl $7,%ecx | |
90 | jz .Lende | |
91 | .p2align 4 | |
92 | .Lloop_1: | |
93 | decl %ecx | |
94 | movb %al,(%rdi) | |
95 | leaq 1(%rdi),%rdi | |
96 | jnz .Lloop_1 | |
97 | ||
98 | .Lende: | |
99 | movq %r10,%rax | |
100 | ret | |
101 | ||
8d379dad | 102 | CFI_RESTORE_STATE |
7bcd3f34 AK |
103 | .Lbad_alignment: |
104 | cmpq $7,%r11 | |
105 | jbe .Lhandle_7 | |
106 | movq %rax,(%rdi) /* unaligned store */ | |
107 | movq $8,%r8 | |
108 | subq %r9,%r8 | |
109 | addq %r8,%rdi | |
110 | subq %r8,%r11 | |
111 | jmp .Lafter_bad_alignment | |
8d379dad JB |
112 | .Lfinal: |
113 | CFI_ENDPROC | |
114 | ENDPROC(memset) | |
115 | ENDPROC(__memset) | |
7bcd3f34 AK |
116 | |
117 | /* Some CPUs run faster using the string instructions. | |
118 | It is also a lot simpler. Use this when possible */ | |
119 | ||
120 | #include <asm/cpufeature.h> | |
121 | ||
8d379dad JB |
122 | .section .altinstr_replacement,"ax" |
123 | 1: .byte 0xeb /* jmp <disp8> */ | |
124 | .byte (memset_c - memset) - (2f - 1b) /* offset */ | |
125 | 2: | |
126 | .previous | |
7bcd3f34 AK |
127 | .section .altinstructions,"a" |
128 | .align 8 | |
8d379dad JB |
129 | .quad memset |
130 | .quad 1b | |
131 | .byte X86_FEATURE_REP_GOOD | |
132 | .byte .Lfinal - memset | |
133 | .byte 2b - 1b | |
7bcd3f34 | 134 | .previous |