/* SPDX-License-Identifier: GPL-2.0-only */ /* * FP/SIMD state saving and restoring macros * * Copyright (C) 2012 ARM Ltd. * Author: Catalin Marinas <catalin.marinas@arm.com> */ #include <asm/assembler.h> .macro fpsimd_save state, tmpnr stp q0, q1, [\state, #16 * 0] stp q2, q3, [\state, #16 * 2] stp q4, q5, [\state, #16 * 4] stp q6, q7, [\state, #16 * 6] stp q8, q9, [\state, #16 * 8] stp q10, q11, [\state, #16 * 10] stp q12, q13, [\state, #16 * 12] stp q14, q15, [\state, #16 * 14] stp q16, q17, [\state, #16 * 16] stp q18, q19, [\state, #16 * 18] stp q20, q21, [\state, #16 * 20] stp q22, q23, [\state, #16 * 22] stp q24, q25, [\state, #16 * 24] stp q26, q27, [\state, #16 * 26] stp q28, q29, [\state, #16 * 28] stp q30, q31, [\state, #16 * 30]! mrs x\tmpnr, fpsr str w\tmpnr, [\state, #16 * 2] mrs x\tmpnr, fpcr str w\tmpnr, [\state, #16 * 2 + 4] .endm .macro fpsimd_restore_fpcr state, tmp /* * Writes to fpcr may be self-synchronising, so avoid restoring * the register if it hasn't changed. */ mrs \tmp, fpcr cmp \tmp, \state b.eq 9999f msr fpcr, \state 9999: .endm /* Clobbers \state */ .macro fpsimd_restore state, tmpnr ldp q0, q1, [\state, #16 * 0] ldp q2, q3, [\state, #16 * 2] ldp q4, q5, [\state, #16 * 4] ldp q6, q7, [\state, #16 * 6] ldp q8, q9, [\state, #16 * 8] ldp q10, q11, [\state, #16 * 10] ldp q12, q13, [\state, #16 * 12] ldp q14, q15, [\state, #16 * 14] ldp q16, q17, [\state, #16 * 16] ldp q18, q19, [\state, #16 * 18] ldp q20, q21, [\state, #16 * 20] ldp q22, q23, [\state, #16 * 22] ldp q24, q25, [\state, #16 * 24] ldp q26, q27, [\state, #16 * 26] ldp q28, q29, [\state, #16 * 28] ldp q30, q31, [\state, #16 * 30]! ldr w\tmpnr, [\state, #16 * 2] msr fpsr, x\tmpnr ldr w\tmpnr, [\state, #16 * 2 + 4] fpsimd_restore_fpcr x\tmpnr, \state .endm /* Sanity-check macros to help avoid encoding garbage instructions */ .macro _check_general_reg nr .if (\nr) < 0 || (\nr) > 30 .error "Bad register number \nr." .endif .endm .macro _sve_check_zreg znr .if (\znr) < 0 || (\znr) > 31 .error "Bad Scalable Vector Extension vector register number \znr." .endif .endm .macro _sve_check_preg pnr .if (\pnr) < 0 || (\pnr) > 15 .error "Bad Scalable Vector Extension predicate register number \pnr." .endif .endm .macro _check_num n, min, max .if (\n) < (\min) || (\n) > (\max) .error "Number \n out of range [\min,\max]" .endif .endm .macro _sme_check_wv v .if (\v) < 12 || (\v) > 15 .error "Bad vector select register \v." .endif .endm /* SVE instruction encodings for non-SVE-capable assemblers */ /* (pre binutils 2.28, all kernel capable clang versions support SVE) */ /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */ .macro _sve_str_v nz, nxbase, offset=0 _sve_check_zreg \nz _check_general_reg \nxbase _check_num (\offset), -0x100, 0xff .inst 0xe5804000 \ | (\nz) \ | ((\nxbase) << 5) \ | (((\offset) & 7) << 10) \ | (((\offset) & 0x1f8) << 13) .endm /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */ .macro _sve_ldr_v nz, nxbase, offset=0 _sve_check_zreg \nz _check_general_reg \nxbase _check_num (\offset), -0x100, 0xff .inst 0x85804000 \ | (\nz) \ | ((\nxbase) << 5) \ | (((\offset) & 7) << 10) \ | (((\offset) & 0x1f8) << 13) .endm /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */ .macro _sve_str_p np, nxbase, offset=0 _sve_check_preg \np _check_general_reg \nxbase _check_num (\offset), -0x100, 0xff .inst 0xe5800000 \ | (\np) \ | ((\nxbase) << 5) \ | (((\offset) & 7) << 10) \ | (((\offset) & 0x1f8) << 13) .endm /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */ .macro _sve_ldr_p np, nxbase, offset=0 _sve_check_preg \np _check_general_reg \nxbase _check_num (\offset), -0x100, 0xff .inst 0x85800000 \ | (\np) \ | ((\nxbase) << 5) \ | (((\offset) & 7) << 10) \ | (((\offset) & 0x1f8) << 13) .endm /* RDVL X\nx, #\imm */ .macro _sve_rdvl nx, imm _check_general_reg \nx _check_num (\imm), -0x20, 0x1f .inst 0x04bf5000 \ | (\nx) \ | (((\imm) & 0x3f) << 5) .endm /* RDFFR (unpredicated): RDFFR P\np.B */ .macro _sve_rdffr np _sve_check_preg \np .inst 0x2519f000 \ | (\np) .endm /* WRFFR P\np.B */ .macro _sve_wrffr np _sve_check_preg \np .inst 0x25289000 \ | ((\np) << 5) .endm /* PFALSE P\np.B */ .macro _sve_pfalse np _sve_check_preg \np .inst 0x2518e400 \ | (\np) .endm /* SME instruction encodings for non-SME-capable assemblers */ /* (pre binutils 2.38/LLVM 13) */ /* RDSVL X\nx, #\imm */ .macro _sme_rdsvl nx, imm _check_general_reg \nx _check_num (\imm), -0x20, 0x1f .inst 0x04bf5800 \ | (\nx) \ | (((\imm) & 0x3f) << 5) .endm /* * STR (vector from ZA array): * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] */ .macro _sme_str_zav nw, nxbase, offset=0 _sme_check_wv \nw _check_general_reg \nxbase _check_num (\offset), -0x100, 0xff .inst 0xe1200000 \ | (((\nw) & 3) << 13) \ | ((\nxbase) << 5) \ | ((\offset) & 7) .endm /* * LDR (vector to ZA array): * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] */ .macro _sme_ldr_zav nw, nxbase, offset=0 _sme_check_wv \nw _check_general_reg \nxbase _check_num (\offset), -0x100, 0xff .inst 0xe1000000 \ | (((\nw) & 3) << 13) \ | ((\nxbase) << 5) \ | ((\offset) & 7) .endm /* * LDR (ZT0) * * LDR ZT0, nx */ .macro _ldr_zt nx _check_general_reg \nx .inst 0xe11f8000 \ | (\nx << 5) .endm /* * STR (ZT0) * * STR ZT0, nx */ .macro _str_zt nx _check_general_reg \nx .inst 0xe13f8000 \ | (\nx << 5) .endm /* * Zero the entire ZA array * ZERO ZA */ .macro zero_za .inst 0xc00800ff .endm .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from .else __for %\from, %((\from) + ((\to) - (\from)) / 2) __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to .endif .endm .macro _for var:req, from:req, to:req, insn:vararg .macro _for__body \var:req .noaltmacro \insn .altmacro .endm .altmacro __for \from, \to .noaltmacro .purgem _for__body .endm /* Update ZCR_EL1.LEN with the new VQ */ .macro sve_load_vq xvqminus1, xtmp, xtmp2 mrs_s \xtmp, SYS_ZCR_EL1 bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK orr \xtmp2, \xtmp2, \xvqminus1 cmp \xtmp2, \xtmp b.eq 921f msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising 921: .endm /* Update SMCR_EL1.LEN with the new VQ */ .macro sme_load_vq xvqminus1, xtmp, xtmp2 mrs_s \xtmp, SYS_SMCR_EL1 bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK orr \xtmp2, \xtmp2, \xvqminus1 cmp \xtmp2, \xtmp b.eq 921f msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising 921: .endm /* Preserve the first 128-bits of Znz and zero the rest. */ .macro _sve_flush_z nz _sve_check_zreg \nz mov v\nz\().16b, v\nz\().16b .endm .macro sve_flush_z _for n, 0, 31, _sve_flush_z \n .endm .macro sve_flush_p _for n, 0, 15, _sve_pfalse \n .endm .macro sve_flush_ffr _sve_wrffr 0 .endm .macro sve_save nxbase, xpfpsr, save_ffr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 cbz \save_ffr, 921f _sve_rdffr 0 b 922f 921: _sve_pfalse 0 // Zero out FFR 922: _sve_str_p 0, \nxbase _sve_ldr_p 0, \nxbase, -16 mrs x\nxtmp, fpsr str w\nxtmp, [\xpfpsr] mrs x\nxtmp, fpcr str w\nxtmp, [\xpfpsr, #4] .endm .macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 cbz \restore_ffr, 921f _sve_ldr_p 0, \nxbase _sve_wrffr 0 921: _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 ldr w\nxtmp, [\xpfpsr] msr fpsr, x\nxtmp ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm .macro sme_save_za nxbase, xvl, nw mov w\nw, #0 423: _sme_str_zav \nw, \nxbase add x\nxbase, x\nxbase, \xvl add x\nw, x\nw, #1 cmp \xvl, x\nw bne 423b .endm .macro sme_load_za nxbase, xvl, nw mov w\nw, #0 423: _sme_ldr_zav \nw, \nxbase add x\nxbase, x\nxbase, \xvl add x\nw, x\nw, #1 cmp \xvl, x\nw bne 423b .endm