/* * Copyright 2002, 2003 Andi Kleen, SuSE Labs. * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive * for more details. No warranty for anything given at all. */ #include #include #include /* * Checksum copy with exception handling. * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the * destination is zeroed. * * Input * rdi source * rsi destination * edx len (32bit) * ecx sum (32bit) * r8 src_err_ptr (int) * r9 dst_err_ptr (int) * * Output * eax 64bit sum. undefined in case of exception. * * Wrappers need to take care of valid exception sum and zeroing. * They also should align source or destination to 8 bytes. */ .macro source 10: _ASM_EXTABLE(10b, .Lbad_source) .endm .macro dest 20: _ASM_EXTABLE(20b, .Lbad_dest) .endm .macro ignore L=.Lignore 30: _ASM_EXTABLE(30b, \L) .endm ENTRY(csum_partial_copy_generic) cmpl $3*64, %edx jle .Lignore .Lignore: subq $7*8, %rsp movq %rbx, 2*8(%rsp) movq %r12, 3*8(%rsp) movq %r14, 4*8(%rsp) movq %r13, 5*8(%rsp) movq %r15, 6*8(%rsp) movq %r8, (%rsp) movq %r9, 1*8(%rsp) movl %ecx, %eax movl %edx, %ecx xorl %r9d, %r9d movq %rcx, %r12 shrq $6, %r12 jz .Lhandle_tail /* < 64 */ clc /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ /* r11: temp3, rdx: temp4, r12 loopcnt */ /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ .p2align 4 .Lloop: source movq (%rdi), %rbx source movq 8(%rdi), %r8 source movq 16(%rdi), %r11 source movq 24(%rdi), %rdx source movq 32(%rdi), %r10 source movq 40(%rdi), %r15 source movq 48(%rdi), %r14 source movq 56(%rdi), %r13 ignore 2f prefetcht0 5*64(%rdi) 2: adcq %rbx, %rax adcq %r8, %rax adcq %r11, %rax adcq %rdx, %rax adcq %r10, %rax adcq %r15, %rax adcq %r14, %rax adcq %r13, %rax decl %r12d dest movq %rbx, (%rsi) dest movq %r8, 8(%rsi) dest movq %r11, 16(%rsi) dest movq %rdx, 24(%rsi) dest movq %r10, 32(%rsi) dest movq %r15, 40(%rsi) dest movq %r14, 48(%rsi) dest movq %r13, 56(%rsi) 3: leaq 64(%rdi), %rdi leaq 64(%rsi), %rsi jnz .Lloop adcq %r9, %rax /* do last up to 56 bytes */ .Lhandle_tail: /* ecx: count */ movl %ecx, %r10d andl $63, %ecx shrl $3, %ecx jz .Lfold clc .p2align 4 .Lloop_8: source movq (%rdi), %rbx adcq %rbx, %rax decl %ecx dest movq %rbx, (%rsi) leaq 8(%rsi), %rsi /* preserve carry */ leaq 8(%rdi), %rdi jnz .Lloop_8 adcq %r9, %rax /* add in carry */ .Lfold: /* reduce checksum to 32bits */ movl %eax, %ebx shrq $32, %rax addl %ebx, %eax adcl %r9d, %eax /* do last up to 6 bytes */ .Lhandle_7: movl %r10d, %ecx andl $7, %ecx shrl $1, %ecx jz .Lhandle_1 movl $2, %edx xorl %ebx, %ebx clc .p2align 4 .Lloop_1: source movw (%rdi), %bx adcl %ebx, %eax decl %ecx dest movw %bx, (%rsi) leaq 2(%rdi), %rdi leaq 2(%rsi), %rsi jnz .Lloop_1 adcl %r9d, %eax /* add in carry */ /* handle last odd byte */ .Lhandle_1: testb $1, %r10b jz .Lende xorl %ebx, %ebx source movb (%rdi), %bl dest movb %bl, (%rsi) addl %ebx, %eax adcl %r9d, %eax /* carry */ .Lende: movq 2*8(%rsp), %rbx movq 3*8(%rsp), %r12 movq 4*8(%rsp), %r14 movq 5*8(%rsp), %r13 movq 6*8(%rsp), %r15 addq $7*8, %rsp ret /* Exception handlers. Very simple, zeroing is done in the wrappers */ .Lbad_source: movq (%rsp), %rax testq %rax, %rax jz .Lende movl $-EFAULT, (%rax) jmp .Lende .Lbad_dest: movq 8(%rsp), %rax testq %rax, %rax jz .Lende movl $-EFAULT, (%rax) jmp .Lende ENDPROC(csum_partial_copy_generic)