/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
 */
#ifndef _ASM_LOONGARCH_XOR_H
#define _ASM_LOONGARCH_XOR_H

#include <asm/cpu-features.h>
#include <asm/xor_simd.h>

#ifdef CONFIG_CPU_HAS_LSX
static struct xor_block_template xor_block_lsx = {
	.name = "lsx",
	.do_2 = xor_lsx_2,
	.do_3 = xor_lsx_3,
	.do_4 = xor_lsx_4,
	.do_5 = xor_lsx_5,
};

#define XOR_SPEED_LSX()					\
	do {						\
		if (cpu_has_lsx)			\
			xor_speed(&xor_block_lsx);	\
	} while (0)
#else /* CONFIG_CPU_HAS_LSX */
#define XOR_SPEED_LSX()
#endif /* CONFIG_CPU_HAS_LSX */

#ifdef CONFIG_CPU_HAS_LASX
static struct xor_block_template xor_block_lasx = {
	.name = "lasx",
	.do_2 = xor_lasx_2,
	.do_3 = xor_lasx_3,
	.do_4 = xor_lasx_4,
	.do_5 = xor_lasx_5,
};

#define XOR_SPEED_LASX()					\
	do {							\
		if (cpu_has_lasx)				\
			xor_speed(&xor_block_lasx);		\
	} while (0)
#else /* CONFIG_CPU_HAS_LASX */
#define XOR_SPEED_LASX()
#endif /* CONFIG_CPU_HAS_LASX */

/*
 * For grins, also test the generic routines.
 *
 * More importantly: it cannot be ruled out at this point of time, that some
 * future (maybe reduced) models could run the vector algorithms slower than
 * the scalar ones, maybe for errata or micro-op reasons. It may be
 * appropriate to revisit this after one or two more uarch generations.
 */
#include <asm-generic/xor.h>

#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES				\
do {							\
	xor_speed(&xor_block_8regs);			\
	xor_speed(&xor_block_8regs_p);			\
	xor_speed(&xor_block_32regs);			\
	xor_speed(&xor_block_32regs_p);			\
	XOR_SPEED_LSX();				\
	XOR_SPEED_LASX();				\
} while (0)

#endif /* _ASM_LOONGARCH_XOR_H */