/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/arch/arm/lib/csumpartialcopygeneric.S * * Copyright (C) 1995-2001 Russell King */ #include <asm/assembler.h> /* * unsigned int * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) * r0 = src, r1 = dst, r2 = len, r3 = sum * Returns : r0 = checksum * * Note that 'tst' and 'teq' preserve the carry flag. */ src .req r0 dst .req r1 len .req r2 sum .req r3 .Lzero: mov r0, sum load_regs /* * Align an unaligned destination pointer. We know that * we have >= 8 bytes here, so we don't need to check * the length. Note that the source pointer hasn't been * aligned yet. */ .Ldst_unaligned: tst dst, #1 beq .Ldst_16bit load1b ip sub len, len, #1 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst dst, #2 reteq lr @ dst is now 32bit aligned .Ldst_16bit: load2b r8, ip sub len, len, #2 adcs sum, sum, r8, put_byte_0 strb r8, [dst], #1 adcs sum, sum, ip, put_byte_1 strb ip, [dst], #1 ret lr @ dst is now 32bit aligned /* * Handle 0 to 7 bytes, with any alignment of source and * destination pointers. Note that when we get here, C = 0 */ .Lless8: teq len, #0 @ check for zero count beq .Lzero /* we must have at least one byte. */ tst dst, #1 @ dst 16-bit aligned beq .Lless8_aligned /* Align dst */ load1b ip sub len, len, #1 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst len, #6 beq .Lless8_byteonly 1: load2b r8, ip sub len, len, #2 adcs sum, sum, r8, put_byte_0 strb r8, [dst], #1 adcs sum, sum, ip, put_byte_1 strb ip, [dst], #1 .Lless8_aligned: tst len, #6 bne 1b .Lless8_byteonly: tst len, #1 beq .Ldone load1b r8 adcs sum, sum, r8, put_byte_0 @ update checksum strb r8, [dst], #1 b .Ldone FN_ENTRY save_regs mov sum, #-1 cmp len, #8 @ Ensure that we have at least blo .Lless8 @ 8 bytes to copy. adds sum, sum, #0 @ C = 0 tst dst, #3 @ Test destination alignment blne .Ldst_unaligned @ align destination, return here /* * Ok, the dst pointer is now 32bit aligned, and we know * that we must have more than 4 bytes to copy. Note * that C contains the carry from the dst alignment above. */ tst src, #3 @ Test source alignment bne .Lsrc_not_aligned /* Routine for src & dst aligned */ bics ip, len, #15 beq 2f 1: load4l r4, r5, r6, r7 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 sub ip, ip, #16 teq ip, #0 bne 1b 2: ands ip, len, #12 beq 4f tst ip, #8 beq 3f load2l r4, r5 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 tst ip, #4 beq 4f 3: load1l r4 str r4, [dst], #4 adcs sum, sum, r4 4: ands len, len, #3 beq .Ldone load1l r4 tst len, #2 mov r5, r4, get_byte_0 beq .Lexit adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 .Lexit: tst len, #1 strbne r5, [dst], #1 andne r5, r5, #255 adcsne sum, sum, r5, put_byte_0 /* * If the dst pointer was not 16-bit aligned, we * need to rotate the checksum here to get around * the inefficient byte manipulations in the * architecture independent code. */ .Ldone: adc r0, sum, #0 ldr sum, [sp, #0] @ dst tst sum, #1 movne r0, r0, ror #8 load_regs .Lsrc_not_aligned: adc sum, sum, #0 @ include C from dst alignment and ip, src, #3 bic src, src, #3 load1l r5 cmp ip, #2 beq .Lsrc2_aligned bhi .Lsrc3_aligned mov r4, r5, lspull #8 @ C = 0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 orr r4, r4, r5, lspush #24 mov r5, r5, lspull #8 orr r5, r5, r6, lspush #24 mov r6, r6, lspull #8 orr r6, r6, r7, lspush #24 mov r7, r7, lspull #8 orr r7, r7, r8, lspush #24 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 mov r4, r8, lspull #8 sub ip, ip, #16 teq ip, #0 bne 1b 2: ands ip, len, #12 beq 4f tst ip, #8 beq 3f load2l r5, r6 orr r4, r4, r5, lspush #24 mov r5, r5, lspull #8 orr r5, r5, r6, lspush #24 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 mov r4, r6, lspull #8 tst ip, #4 beq 4f 3: load1l r5 orr r4, r4, r5, lspush #24 str r4, [dst], #4 adcs sum, sum, r4 mov r4, r5, lspull #8 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 tst len, #2 beq .Lexit adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 b .Lexit .Lsrc2_aligned: mov r4, r5, lspull #16 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 orr r4, r4, r5, lspush #16 mov r5, r5, lspull #16 orr r5, r5, r6, lspush #16 mov r6, r6, lspull #16 orr r6, r6, r7, lspush #16 mov r7, r7, lspull #16 orr r7, r7, r8, lspush #16 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 mov r4, r8, lspull #16 sub ip, ip, #16 teq ip, #0 bne 1b 2: ands ip, len, #12 beq 4f tst ip, #8 beq 3f load2l r5, r6 orr r4, r4, r5, lspush #16 mov r5, r5, lspull #16 orr r5, r5, r6, lspush #16 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 mov r4, r6, lspull #16 tst ip, #4 beq 4f 3: load1l r5 orr r4, r4, r5, lspush #16 str r4, [dst], #4 adcs sum, sum, r4 mov r4, r5, lspull #16 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 tst len, #2 beq .Lexit adcs sum, sum, r4 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 tst len, #1 beq .Ldone load1b r5 b .Lexit .Lsrc3_aligned: mov r4, r5, lspull #24 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 orr r4, r4, r5, lspush #8 mov r5, r5, lspull #24 orr r5, r5, r6, lspush #8 mov r6, r6, lspull #24 orr r6, r6, r7, lspush #8 mov r7, r7, lspull #24 orr r7, r7, r8, lspush #8 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 mov r4, r8, lspull #24 sub ip, ip, #16 teq ip, #0 bne 1b 2: ands ip, len, #12 beq 4f tst ip, #8 beq 3f load2l r5, r6 orr r4, r4, r5, lspush #8 mov r5, r5, lspull #24 orr r5, r5, r6, lspush #8 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 mov r4, r6, lspull #24 tst ip, #4 beq 4f 3: load1l r5 orr r4, r4, r5, lspush #8 str r4, [dst], #4 adcs sum, sum, r4 mov r4, r5, lspull #24 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 tst len, #2 beq .Lexit strb r5, [dst], #1 adcs sum, sum, r4 load1l r4 mov r5, r4, get_byte_0 strb r5, [dst], #1 adcs sum, sum, r4, lspush #24 mov r5, r4, get_byte_1 b .Lexit FN_EXIT