// SPDX-License-Identifier: GPL-2.0 /* * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 * * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> */ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> #include <crypto/internal/simd.h> #include <linux/cpufeature.h> #include <linux/crypto.h> #include <linux/jump_label.h> #include <linux/module.h> asmlinkage void poly1305_init_arm64(void *state, const u8 *key); asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { poly1305_init_arm64(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; } EXPORT_SYMBOL(poly1305_init_arch); static int neon_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); dctx->buflen = 0; dctx->rset = 0; dctx->sset = false; return 0; } static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, u32 len, u32 hibit, bool do_neon) { if (unlikely(!dctx->sset)) { if (!dctx->rset) { poly1305_init_arm64(&dctx->h, src); src += POLY1305_BLOCK_SIZE; len -= POLY1305_BLOCK_SIZE; dctx->rset = 1; } if (len >= POLY1305_BLOCK_SIZE) { dctx->s[0] = get_unaligned_le32(src + 0); dctx->s[1] = get_unaligned_le32(src + 4); dctx->s[2] = get_unaligned_le32(src + 8); dctx->s[3] = get_unaligned_le32(src + 12); src += POLY1305_BLOCK_SIZE; len -= POLY1305_BLOCK_SIZE; dctx->sset = true; } if (len < POLY1305_BLOCK_SIZE) return; } len &= ~(POLY1305_BLOCK_SIZE - 1); if (static_branch_likely(&have_neon) && likely(do_neon)) poly1305_blocks_neon(&dctx->h, src, len, hibit); else poly1305_blocks(&dctx->h, src, len, hibit); } static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, const u8 *src, u32 len, bool do_neon) { if (unlikely(dctx->buflen)) { u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); memcpy(dctx->buf + dctx->buflen, src, bytes); src += bytes; len -= bytes; dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { neon_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1, false); dctx->buflen = 0; } } if (likely(len >= POLY1305_BLOCK_SIZE)) { neon_poly1305_blocks(dctx, src, len, 1, do_neon); src += round_down(len, POLY1305_BLOCK_SIZE); len %= POLY1305_BLOCK_SIZE; } if (unlikely(len)) { dctx->buflen = len; memcpy(dctx->buf, src, len); } } static int neon_poly1305_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { bool do_neon = crypto_simd_usable() && srclen > 128; struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); if (static_branch_likely(&have_neon) && do_neon) kernel_neon_begin(); neon_poly1305_do_update(dctx, src, srclen, do_neon); if (static_branch_likely(&have_neon) && do_neon) kernel_neon_end(); return 0; } void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int nbytes) { if (unlikely(dctx->buflen)) { u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); memcpy(dctx->buf + dctx->buflen, src, bytes); src += bytes; nbytes -= bytes; dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); if (static_branch_likely(&have_neon) && crypto_simd_usable()) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K); kernel_neon_begin(); poly1305_blocks_neon(&dctx->h, src, todo, 1); kernel_neon_end(); len -= todo; src += todo; } while (len); } else { poly1305_blocks(&dctx->h, src, len, 1); src += len; } nbytes %= POLY1305_BLOCK_SIZE; } if (unlikely(nbytes)) { dctx->buflen = nbytes; memcpy(dctx->buf, src, nbytes); } } EXPORT_SYMBOL(poly1305_update_arch); void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) { if (unlikely(dctx->buflen)) { dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); } poly1305_emit(&dctx->h, dst, dctx->s); memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); if (unlikely(!dctx->sset)) return -ENOKEY; poly1305_final_arch(dctx, dst); return 0; } static struct shash_alg neon_poly1305_alg = { .init = neon_poly1305_init, .update = neon_poly1305_update, .final = neon_poly1305_final, .digestsize = POLY1305_DIGEST_SIZE, .descsize = sizeof(struct poly1305_desc_ctx), .base.cra_name = "poly1305", .base.cra_driver_name = "poly1305-neon", .base.cra_priority = 200, .base.cra_blocksize = POLY1305_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }; static int __init neon_poly1305_mod_init(void) { if (!cpu_have_named_feature(ASIMD)) return 0; static_branch_enable(&have_neon); return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&neon_poly1305_alg) : 0; } static void __exit neon_poly1305_mod_exit(void) { if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) crypto_unregister_shash(&neon_poly1305_alg); } module_init(neon_poly1305_mod_init); module_exit(neon_poly1305_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("poly1305"); MODULE_ALIAS_CRYPTO("poly1305-neon");