#include "gaudiP.h"
#include "../include/hw_ip/mmu/mmu_general.h"
#include "../include/hw_ip/mmu/mmu_v1_1.h"
#include "../include/gaudi/gaudi_masks.h"
#include "../include/gaudi/gaudi_fw_if.h"
#include "../include/gaudi/gaudi_reg_map.h"
#include "../include/gaudi/gaudi_async_ids_map_extended.h"
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/firmware.h>
#include <linux/hwmon.h>
#include <linux/iommu.h>
#include <linux/seq_file.h>
#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
#define GAUDI_MAX_STRING_LEN 20
#define GAUDI_CB_POOL_CB_CNT 512
#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
BIT(GAUDI_ENGINE_ID_MME_0) |\
BIT(GAUDI_ENGINE_ID_MME_2) |\
GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
#define GAUDI_PLL_MAX 10
#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
#define MONITOR_SOB_STRING_SIZE 256
static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
GAUDI_QUEUE_ID_DMA_0_0,
GAUDI_QUEUE_ID_DMA_0_1,
GAUDI_QUEUE_ID_DMA_0_2,
GAUDI_QUEUE_ID_DMA_0_3,
GAUDI_QUEUE_ID_DMA_1_0,
GAUDI_QUEUE_ID_DMA_1_1,
GAUDI_QUEUE_ID_DMA_1_2,
GAUDI_QUEUE_ID_DMA_1_3
};
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
"gaudi cpu eq"
};
static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
};
static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
[0] = GAUDI_QUEUE_ID_DMA_0_0,
[1] = GAUDI_QUEUE_ID_DMA_0_1,
[2] = GAUDI_QUEUE_ID_DMA_0_2,
[3] = GAUDI_QUEUE_ID_DMA_0_3,
[4] = GAUDI_QUEUE_ID_DMA_1_0,
[5] = GAUDI_QUEUE_ID_DMA_1_1,
[6] = GAUDI_QUEUE_ID_DMA_1_2,
[7] = GAUDI_QUEUE_ID_DMA_1_3,
};
static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
[PACKET_WREG_32] = sizeof(struct packet_wreg32),
[PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
[PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
[PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
[PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
[PACKET_REPEAT] = sizeof(struct packet_repeat),
[PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
[PACKET_FENCE] = sizeof(struct packet_fence),
[PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
[PACKET_NOP] = sizeof(struct packet_nop),
[PACKET_STOP] = sizeof(struct packet_stop),
[PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
[PACKET_WAIT] = sizeof(struct packet_wait),
[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
};
static inline bool validate_packet_id(enum packet_id id)
{
switch (id) {
case PACKET_WREG_32:
case PACKET_WREG_BULK:
case PACKET_MSG_LONG:
case PACKET_MSG_SHORT:
case PACKET_CP_DMA:
case PACKET_REPEAT:
case PACKET_MSG_PROT:
case PACKET_FENCE:
case PACKET_LIN_DMA:
case PACKET_NOP:
case PACKET_STOP:
case PACKET_ARB_POINT:
case PACKET_WAIT:
case PACKET_LOAD_AND_EXE:
return true;
default:
return false;
}
}
static const char * const
gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
"tpc_address_exceed_slm",
"tpc_div_by_0",
"tpc_spu_mac_overflow",
"tpc_spu_addsub_overflow",
"tpc_spu_abs_overflow",
"tpc_spu_fp_dst_nan_inf",
"tpc_spu_fp_dst_denorm",
"tpc_vpu_mac_overflow",
"tpc_vpu_addsub_overflow",
"tpc_vpu_abs_overflow",
"tpc_vpu_fp_dst_nan_inf",
"tpc_vpu_fp_dst_denorm",
"tpc_assertions",
"tpc_illegal_instruction",
"tpc_pc_wrap_around",
"tpc_qm_sw_err",
"tpc_hbw_rresp_err",
"tpc_hbw_bresp_err",
"tpc_lbw_rresp_err",
"tpc_lbw_bresp_err"
};
static const char * const
gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
"PQ AXI HBW error",
"CQ AXI HBW error",
"CP AXI HBW error",
"CP error due to undefined OPCODE",
"CP encountered STOP OPCODE",
"CP AXI LBW error",
"CP WRREG32 or WRBULK returned error",
"N/A",
"FENCE 0 inc over max value and clipped",
"FENCE 1 inc over max value and clipped",
"FENCE 2 inc over max value and clipped",
"FENCE 3 inc over max value and clipped",
"FENCE 0 dec under min value and clipped",
"FENCE 1 dec under min value and clipped",
"FENCE 2 dec under min value and clipped",
"FENCE 3 dec under min value and clipped"
};
static const char * const
gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
"Choice push while full error",
"Choice Q watchdog error",
"MSG AXI LBW returned with error"
};
enum gaudi_sm_sei_cause {
GAUDI_SM_SEI_SO_OVERFLOW,
GAUDI_SM_SEI_LBW_4B_UNALIGNED,
GAUDI_SM_SEI_AXI_RESPONSE_ERR
};
static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
QUEUE_TYPE_EXT,
QUEUE_TYPE_EXT,
QUEUE_TYPE_EXT,
QUEUE_TYPE_EXT,
QUEUE_TYPE_EXT,
QUEUE_TYPE_EXT,
QUEUE_TYPE_EXT,
QUEUE_TYPE_EXT,
QUEUE_TYPE_CPU,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
QUEUE_TYPE_INT,
};
static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
{ .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
{ .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
{ .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
{ .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
{ .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
{ .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
{ .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
{ .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
{ .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
{ .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
};
static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
};
static s64 gaudi_state_dump_specs_props[] = {
[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
[SP_MON_OBJ_WR_ADDR_LOW] =
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
[SP_MON_OBJ_WR_ADDR_HIGH] =
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
[SP_FENCE0_CNT_OFFSET] =
mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
[SP_FENCE0_RDATA_OFFSET] =
mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
[SP_NUM_CORES] = 1,
};
static const char * const gaudi_sync_manager_names[] = {
"SYNC_MGR_E_N",
"SYNC_MGR_W_N",
"SYNC_MGR_E_S",
"SYNC_MGR_W_S",
NULL
};
struct ecc_info_extract_params {
u64 block_address;
u32 num_memories;
bool derr;
};
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
u64 phys_addr);
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
struct hl_cs_job *job);
static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
u32 size, u64 val);
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
u32 num_regs, u32 val);
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
u32 tpc_id);
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
static int gaudi_cpucp_info_get(struct hl_device *hdev);
static void gaudi_disable_clock_gating(struct hl_device *hdev);
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size, bool eb);
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
struct hl_gen_wait_properties *prop);
static inline enum hl_collective_mode
get_collective_mode(struct hl_device *hdev, u32 queue_id)
{
if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
return HL_COLLECTIVE_MASTER;
if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
return HL_COLLECTIVE_SLAVE;
if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
return HL_COLLECTIVE_SLAVE;
if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
return HL_COLLECTIVE_SLAVE;
return HL_COLLECTIVE_NOT_SUPPORTED;
}
static inline void set_default_power_values(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (hdev->card_type == cpucp_card_type_pmc) {
prop->max_power_default = MAX_POWER_DEFAULT_PMC;
if (prop->fw_security_enabled)
prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
else
prop->dc_power_default = DC_POWER_DEFAULT_PMC;
} else {
prop->max_power_default = MAX_POWER_DEFAULT_PCI;
prop->dc_power_default = DC_POWER_DEFAULT_PCI;
}
}
static int gaudi_set_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 num_sync_stream_queues = 0;
int i;
prop->max_queues = GAUDI_QUEUE_ID_SIZE;
prop->hw_queues_props = kcalloc(prop->max_queues,
sizeof(struct hw_queue_properties),
GFP_KERNEL);
if (!prop->hw_queues_props)
return -ENOMEM;
for (i = 0 ; i < prop->max_queues ; i++) {
if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].supports_sync_stream = 1;
prop->hw_queues_props[i].cb_alloc_flags =
CB_ALLOC_KERNEL;
num_sync_stream_queues++;
} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
prop->hw_queues_props[i].driver_only = 1;
prop->hw_queues_props[i].supports_sync_stream = 0;
prop->hw_queues_props[i].cb_alloc_flags =
CB_ALLOC_KERNEL;
} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].supports_sync_stream = 0;
prop->hw_queues_props[i].cb_alloc_flags =
CB_ALLOC_USER;
}
prop->hw_queues_props[i].collective_mode =
get_collective_mode(hdev, i);
}
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->collective_first_sob = 0;
prop->collective_first_mon = 0;
prop->sync_stream_first_sob =
ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
* QMAN_STREAMS * HL_RSVD_SOBS;
prop->sync_stream_first_mon =
(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
(NUMBER_OF_EXT_HW_QUEUES * 2);
prop->dram_base_address = DRAM_PHYS_BASE;
prop->dram_size = GAUDI_HBM_SIZE_32GB;
prop->dram_end_address = prop->dram_base_address +
prop->dram_size;
prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
prop->sram_base_address = SRAM_BASE_ADDR;
prop->sram_size = SRAM_SIZE;
prop->sram_end_address = prop->sram_base_address +
prop->sram_size;
prop->sram_user_base_address = prop->sram_base_address +
SRAM_USER_BASE_OFFSET;
prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
if (hdev->pldm)
prop->mmu_pgt_size = 0x800000;
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->dram_supports_virtual_memory = false;
prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
prop->pmmu.start_addr = VA_HOST_SPACE_START;
prop->pmmu.end_addr =
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
prop->dmmu.end_addr = VA_HOST_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
prop->num_of_events = GAUDI_EVENT_SIZE;
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
set_default_power_values(hdev);
prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
prop->sync_stream_first_sob +
(num_sync_stream_queues * HL_RSVD_SOBS);
prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
prop->sync_stream_first_mon +
(num_sync_stream_queues * HL_RSVD_MONS);
prop->first_available_user_msix_interrupt = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
prop->fw_cpu_boot_dev_sts0_valid = false;
prop->fw_cpu_boot_dev_sts1_valid = false;
prop->hard_reset_done_by_fw = false;
prop->gic_interrupts_enable = true;
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
prop->clk_pll_index = HL_GAUDI_MME_PLL;
prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
prop->use_get_power_for_reset_history = true;
prop->configurable_stop_on_err = true;
prop->set_max_power_on_device_init = true;
return 0;
}
static int gaudi_pci_bars_map(struct hl_device *hdev)
{
static const char * const name[] = {"SRAM", "CFG", "HBM"};
bool is_wc[3] = {false, false, true};
int rc;
rc = hl_pci_bars_map(hdev, name, is_wc);
if (rc)
return rc;
hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
(CFG_BASE - SPI_FLASH_BASE_ADDR);
return 0;
}
static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct hl_inbound_pci_region pci_region;
u64 old_addr = addr;
int rc;
if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
return old_addr;
if (hdev->asic_prop.iatu_done_by_fw)
return U64_MAX;
pci_region.mode = PCI_BAR_MATCH_MODE;
pci_region.bar = HBM_BAR_ID;
pci_region.addr = addr;
rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
if (rc)
return U64_MAX;
if (gaudi) {
old_addr = gaudi->hbm_bar_cur_addr;
gaudi->hbm_bar_cur_addr = addr;
}
return old_addr;
}
static int gaudi_init_iatu(struct hl_device *hdev)
{
struct hl_inbound_pci_region inbound_region;
struct hl_outbound_pci_region outbound_region;
int rc;
if (hdev->asic_prop.iatu_done_by_fw)
return 0;
inbound_region.mode = PCI_BAR_MATCH_MODE;
inbound_region.bar = SRAM_BAR_ID;
inbound_region.addr = SRAM_BASE_ADDR;
rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
if (rc)
goto done;
inbound_region.mode = PCI_BAR_MATCH_MODE;
inbound_region.bar = CFG_BAR_ID;
inbound_region.addr = SPI_FLASH_BASE_ADDR;
rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
if (rc)
goto done;
inbound_region.mode = PCI_BAR_MATCH_MODE;
inbound_region.bar = HBM_BAR_ID;
inbound_region.addr = DRAM_PHYS_BASE;
rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
if (rc)
goto done;
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
outbound_region.addr = HOST_PHYS_BASE;
outbound_region.size = HOST_PHYS_SIZE;
rc = hl_pci_set_outbound_region(hdev, &outbound_region);
done:
return rc;
}
static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
{
return RREG32(mmHW_STATE);
}
static int gaudi_early_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pci_dev *pdev = hdev->pdev;
u32 fw_boot_status;
int rc;
rc = gaudi_set_fixed_properties(hdev);
if (rc) {
dev_err(hdev->dev, "Failed setting fixed properties\n");
return rc;
}
if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
dev_err(hdev->dev,
"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
SRAM_BAR_ID,
(unsigned long long) pci_resource_len(pdev,
SRAM_BAR_ID),
SRAM_BAR_SIZE);
rc = -ENODEV;
goto free_queue_props;
}
if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
dev_err(hdev->dev,
"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
CFG_BAR_ID,
(unsigned long long) pci_resource_len(pdev,
CFG_BAR_ID),
CFG_BAR_SIZE);
rc = -ENODEV;
goto free_queue_props;
}
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
if (hdev->asic_prop.fw_security_enabled) {
hdev->asic_prop.iatu_done_by_fw = true;
hdev->asic_prop.gic_interrupts_enable = false;
goto pci_init;
}
rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
&fw_boot_status);
if (rc)
goto free_queue_props;
if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
hdev->asic_prop.iatu_done_by_fw = true;
pci_init:
rc = hl_pci_init(hdev);
if (rc)
goto free_queue_props;
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
mmCPU_BOOT_DEV_STS0,
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
mmCPU_BOOT_ERR1,
GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) {
if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true, false);
}
return 0;
pci_fini:
hl_pci_fini(hdev);
free_queue_props:
kfree(hdev->asic_prop.hw_queues_props);
return rc;
}
static int gaudi_early_fini(struct hl_device *hdev)
{
kfree(hdev->asic_prop.hw_queues_props);
hl_pci_fini(hdev);
return 0;
}
static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
int rc;
if (hdev->asic_prop.fw_security_enabled) {
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
if (rc)
return rc;
freq = pll_freq_arr[2];
} else {
div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
nr = RREG32(mmPSOC_CPU_PLL_NR);
nf = RREG32(mmPSOC_CPU_PLL_NF);
od = RREG32(mmPSOC_CPU_PLL_OD);
if (div_sel == DIV_SEL_REF_CLK ||
div_sel == DIV_SEL_DIVIDED_REF) {
if (div_sel == DIV_SEL_REF_CLK)
freq = PLL_REF_CLK;
else
freq = PLL_REF_CLK / (div_fctr + 1);
} else if (div_sel == DIV_SEL_PLL_CLK ||
div_sel == DIV_SEL_DIVIDED_PLL) {
pll_clk = PLL_REF_CLK * (nf + 1) /
((nr + 1) * (od + 1));
if (div_sel == DIV_SEL_PLL_CLK)
freq = pll_clk;
else
freq = pll_clk / (div_fctr + 1);
} else {
dev_warn(hdev->dev,
"Received invalid div select value: %d",
div_sel);
freq = 0;
}
}
prop->psoc_timestamp_frequency = freq;
prop->psoc_pci_pll_nr = nr;
prop->psoc_pci_pll_nf = nf;
prop->psoc_pci_pll_od = od;
prop->psoc_pci_pll_div_factor = div_fctr;
return 0;
}
static int _gaudi_init_tpc_mem(struct hl_device *hdev,
dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct packet_lin_dma *init_tpc_mem_pkt;
struct hl_cs_job *job;
struct hl_cb *cb;
u64 dst_addr;
u32 cb_size, ctl;
u8 tpc_id;
int rc;
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
if (!cb)
return -EFAULT;
init_tpc_mem_pkt = cb->kernel_address;
cb_size = sizeof(*init_tpc_mem_pkt);
memset(init_tpc_mem_pkt, 0, cb_size);
init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
dst_addr = (prop->sram_user_base_address &
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
goto release_cb;
}
job->id = 0;
job->user_cb = cb;
atomic_inc(&job->user_cb->cs_cnt);
job->user_cb_size = cb_size;
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
job->patched_cb = job->user_cb;
job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
hl_debugfs_add_job(hdev, job);
rc = gaudi_send_job_on_qman0(hdev, job);
if (rc)
goto free_job;
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
if (rc)
break;
}
free_job:
hl_userptr_delete_list(hdev, &job->userptr_list);
hl_debugfs_remove_job(hdev, job);
kfree(job);
atomic_dec(&cb->cs_cnt);
release_cb:
hl_cb_put(cb);
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
return rc;
}
static int gaudi_init_tpc_mem(struct hl_device *hdev)
{
const struct firmware *fw;
size_t fw_size;
void *cpu_addr;
dma_addr_t dma_handle;
int rc, count = 5;
again:
rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
if (rc == -EINTR && count-- > 0) {
msleep(50);
goto again;
}
if (rc) {
dev_err(hdev->dev, "Failed to load firmware file %s\n",
GAUDI_TPC_FW_FILE);
goto out;
}
fw_size = fw->size;
cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
&dma_handle, GFP_KERNEL | __GFP_ZERO);
if (!cpu_addr) {
dev_err(hdev->dev,
"Failed to allocate %zu of dma memory for TPC kernel\n",
fw_size);
rc = -ENOMEM;
goto out;
}
memcpy(cpu_addr, fw->data, fw_size);
rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
dma_handle);
out:
release_firmware(fw);
return rc;
}
static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_collective_properties *prop = &gaudi->collective_props;
struct hl_hw_queue *q;
u32 i, sob_id, sob_group_id, queue_id;
sob_group_id =
stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
q = &hdev->kernel_queues[queue_id + (4 * i)];
q->sync_stream_prop.collective_sob_id = sob_id + i;
}
queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
q = &hdev->kernel_queues[queue_id];
q->sync_stream_prop.collective_sob_id =
sob_id + NIC_NUMBER_OF_ENGINES;
queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
q = &hdev->kernel_queues[queue_id];
q->sync_stream_prop.collective_sob_id =
sob_id + NIC_NUMBER_OF_ENGINES;
}
static void gaudi_sob_group_hw_reset(struct kref *ref)
{
struct gaudi_hw_sob_group *hw_sob_group =
container_of(ref, struct gaudi_hw_sob_group, kref);
struct hl_device *hdev = hw_sob_group->hdev;
int i;
for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
kref_init(&hw_sob_group->kref);
}
static void gaudi_sob_group_reset_error(struct kref *ref)
{
struct gaudi_hw_sob_group *hw_sob_group =
container_of(ref, struct gaudi_hw_sob_group, kref);
struct hl_device *hdev = hw_sob_group->hdev;
dev_crit(hdev->dev,
"SOB release shouldn't be called here, base_sob_id: %d\n",
hw_sob_group->base_sob_id);
}
static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
{
struct gaudi_collective_properties *prop;
int i;
prop = &gaudi->collective_props;
memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
BIT(i % HL_MAX_SOBS_PER_MONITOR);
prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
BIT(i % HL_MAX_SOBS_PER_MONITOR);
}
static int gaudi_collective_init(struct hl_device *hdev)
{
u32 i, sob_id, reserved_sobs_per_group;
struct gaudi_collective_properties *prop;
struct gaudi_device *gaudi;
gaudi = hdev->asic_specific;
prop = &gaudi->collective_props;
sob_id = hdev->asic_prop.collective_first_sob;
reserved_sobs_per_group =
ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
prop->hw_sob_group[i].hdev = hdev;
prop->hw_sob_group[i].base_sob_id = sob_id;
sob_id += reserved_sobs_per_group;
gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
}
for (i = 0 ; i < QMAN_STREAMS; i++) {
prop->next_sob_group_val[i] = 1;
prop->curr_sob_group_idx[i] = 0;
gaudi_collective_map_sobs(hdev, i);
}
gaudi_collective_mstr_sob_mask_set(gaudi);
return 0;
}
static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_collective_properties *cprop = &gaudi->collective_props;
kref_put(&cprop->hw_sob_group[sob_group].kref,
gaudi_sob_group_hw_reset);
}
static void gaudi_collective_master_init_job(struct hl_device *hdev,
struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
{
u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
struct gaudi_collective_properties *cprop;
struct hl_gen_wait_properties wait_prop;
struct hl_sync_stream_properties *prop;
struct gaudi_device *gaudi;
gaudi = hdev->asic_specific;
cprop = &gaudi->collective_props;
queue_id = job->hw_queue_id;
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
master_sob_base =
cprop->hw_sob_group[sob_group_offset].base_sob_id;
master_monitor = prop->collective_mstr_mon_id[0];
cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
dev_dbg(hdev->dev,
"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
master_sob_base, cprop->mstr_sob_mask[0],
cprop->next_sob_group_val[stream],
master_monitor, queue_id);
wait_prop.data = (void *) job->patched_cb;
wait_prop.sob_base = master_sob_base;
wait_prop.sob_mask = cprop->mstr_sob_mask[0];
wait_prop.sob_val = cprop->next_sob_group_val[stream];
wait_prop.mon_id = master_monitor;
wait_prop.q_idx = queue_id;
wait_prop.size = cb_size;
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
master_sob_base += HL_MAX_SOBS_PER_MONITOR;
master_monitor = prop->collective_mstr_mon_id[1];
dev_dbg(hdev->dev,
"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
master_sob_base, cprop->mstr_sob_mask[1],
cprop->next_sob_group_val[stream],
master_monitor, queue_id);
wait_prop.sob_base = master_sob_base;
wait_prop.sob_mask = cprop->mstr_sob_mask[1];
wait_prop.mon_id = master_monitor;
wait_prop.size = cb_size;
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
}
static void gaudi_collective_slave_init_job(struct hl_device *hdev,
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
{
struct hl_gen_wait_properties wait_prop;
struct hl_sync_stream_properties *prop;
u32 queue_id, cb_size = 0;
queue_id = job->hw_queue_id;
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
if (job->cs->encaps_signals) {
hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
cs_cmpl);
dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
job->cs->sequence,
cs_cmpl->hw_sob->sob_id,
cs_cmpl->sob_val);
}
wait_prop.data = (void *) job->user_cb;
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
wait_prop.sob_mask = 0x1;
wait_prop.sob_val = cs_cmpl->sob_val;
wait_prop.mon_id = prop->collective_slave_mon_id;
wait_prop.q_idx = queue_id;
wait_prop.size = cb_size;
dev_dbg(hdev->dev,
"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
prop->collective_slave_mon_id, queue_id);
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
dev_dbg(hdev->dev,
"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
prop->collective_sob_id, queue_id);
cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
prop->collective_sob_id, cb_size, false);
}
static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
{
struct hl_cs_compl *signal_cs_cmpl =
container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
struct hl_cs_compl *cs_cmpl =
container_of(cs->fence, struct hl_cs_compl, base_fence);
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
struct gaudi_collective_properties *cprop;
u32 stream, queue_id, sob_group_offset;
struct gaudi_device *gaudi;
struct hl_device *hdev;
struct hl_cs_job *job;
struct hl_ctx *ctx;
ctx = cs->ctx;
hdev = ctx->hdev;
gaudi = hdev->asic_specific;
cprop = &gaudi->collective_props;
if (cs->encaps_signals) {
cs_cmpl->hw_sob = handle->hw_sob;
cs_cmpl->sob_val = 0;
} else {
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
}
spin_lock(&signal_cs_cmpl->lock);
if (completion_done(&cs->signal_fence->completion)) {
spin_unlock(&signal_cs_cmpl->lock);
return -EINVAL;
}
kref_get(&cs_cmpl->hw_sob->kref);
spin_unlock(&signal_cs_cmpl->lock);
job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
stream = job->hw_queue_id % 4;
sob_group_offset =
stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
list_for_each_entry(job, &cs->job_list, cs_node) {
queue_id = job->hw_queue_id;
if (hdev->kernel_queues[queue_id].collective_mode ==
HL_COLLECTIVE_MASTER)
gaudi_collective_master_init_job(hdev, job, stream,
sob_group_offset);
else
gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
}
cs_cmpl->sob_group = sob_group_offset;
kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
cprop->next_sob_group_val[stream]++;
if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
gaudi_sob_group_reset_error);
cprop->next_sob_group_val[stream] = 1;
cprop->curr_sob_group_idx[stream] =
(cprop->curr_sob_group_idx[stream] + 1) &
(HL_RSVD_SOBS - 1);
gaudi_collective_map_sobs(hdev, stream);
dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
cprop->curr_sob_group_idx[stream], stream);
}
mb();
hl_fence_put(cs->signal_fence);
cs->signal_fence = NULL;
return 0;
}
static int gaudi_collective_wait_create_job(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs,
enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
u32 encaps_signal_offset)
{
struct hw_queue_properties *hw_queue_prop;
struct hl_cs_counters_atomic *cntr;
struct hl_cs_job *job;
struct hl_cb *cb;
u32 cb_size;
bool patched_cb;
cntr = &hdev->aggregated_cs_counters;
if (mode == HL_COLLECTIVE_MASTER) {
cb_size = sizeof(struct packet_msg_short) * 8 +
sizeof(struct packet_fence) * 2 +
sizeof(struct packet_msg_prot) * 2;
patched_cb = true;
} else {
cb_size = sizeof(struct packet_msg_short) * 5 +
sizeof(struct packet_fence);
patched_cb = false;
}
hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
if (!job) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n");
return -ENOMEM;
}
cb = hl_cb_kernel_create(hdev, cb_size,
hdev->mmu_enable && !patched_cb);
if (!cb) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
kfree(job);
return -EFAULT;
}
job->id = 0;
job->cs = cs;
job->user_cb = cb;
atomic_inc(&job->user_cb->cs_cnt);
job->user_cb_size = cb_size;
job->hw_queue_id = queue_id;
if (cs->encaps_signals)
job->encaps_sig_wait_offset = encaps_signal_offset;
if (patched_cb)
job->patched_cb = job->user_cb;
else
job->patched_cb = NULL;
job->job_cb_size = job->user_cb_size;
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
if (hw_queue_prop->type == QUEUE_TYPE_EXT)
cs_get(cs);
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
list_add_tail(&job->cs_node, &cs->job_list);
hl_debugfs_add_job(hdev, job);
return 0;
}
static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs,
u32 wait_queue_id, u32 collective_engine_id,
u32 encaps_signal_offset)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct hw_queue_properties *hw_queue_prop;
u32 queue_id, collective_queue, num_jobs;
u32 stream, nic_queue, nic_idx = 0;
bool skip;
int i, rc = 0;
hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
dev_err(hdev->dev,
"Queue %d is not configured as collective master\n",
wait_queue_id);
return -EINVAL;
}
if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
dev_err(hdev->dev,
"Collective wait does not support engine %u\n",
collective_engine_id);
return -EINVAL;
}
stream = wait_queue_id % 4;
if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
else
collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
for (i = 0 ; i < num_jobs ; i++) {
if (i == 0) {
queue_id = wait_queue_id;
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
HL_COLLECTIVE_MASTER, queue_id,
wait_queue_id, encaps_signal_offset);
} else {
if (nic_idx < NIC_NUMBER_OF_ENGINES) {
if (gaudi->hw_cap_initialized &
BIT(HW_CAP_NIC_SHIFT + nic_idx))
skip = false;
else
skip = true;
queue_id = nic_queue;
nic_queue += 4;
nic_idx++;
if (skip)
continue;
} else {
queue_id = collective_queue;
}
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
HL_COLLECTIVE_SLAVE, queue_id,
wait_queue_id, encaps_signal_offset);
}
if (rc)
return rc;
}
return rc;
}
static int gaudi_late_init(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
rc = gaudi->cpucp_info_get(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to get cpucp info\n");
return rc;
}
if ((hdev->card_type == cpucp_card_type_pci) &&
(hdev->nic_ports_mask & 0x3)) {
dev_info(hdev->dev,
"PCI card detected, only 8 ports are enabled\n");
hdev->nic_ports_mask &= ~0x3;
WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
}
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
if (rc) {
dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
return rc;
}
rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
if (rc)
goto disable_pci_access;
rc = gaudi_fetch_psoc_frequency(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
goto disable_pci_access;
}
rc = gaudi_mmu_clear_pgt_range(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
goto disable_pci_access;
}
rc = gaudi_init_tpc_mem(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize TPC memories\n");
goto disable_pci_access;
}
rc = gaudi_collective_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to init collective\n");
goto disable_pci_access;
}
gaudi_mmu_prepare(hdev, 1);
hl_fw_set_pll_profile(hdev);
return 0;
disable_pci_access:
hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
return rc;
}
static void gaudi_late_fini(struct hl_device *hdev)
{
const struct hwmon_channel_info **channel_info_arr;
int i = 0;
if (!hdev->hl_chip_info->info)
return;
channel_info_arr = hdev->hl_chip_info->info;
while (channel_info_arr[i]) {
kfree(channel_info_arr[i]->config);
kfree(channel_info_arr[i]);
i++;
}
kfree(channel_info_arr);
hdev->hl_chip_info->info = NULL;
}
static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
{
dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
int i, j, rc = 0;
for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
virt_addr_arr[i] =
hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
HL_CPU_ACCESSIBLE_MEM_SIZE,
&dma_addr_arr[i],
GFP_KERNEL | __GFP_ZERO);
if (!virt_addr_arr[i]) {
rc = -ENOMEM;
goto free_dma_mem_arr;
}
end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
GAUDI_CPU_PCI_MSB_ADDR(end_addr))
break;
}
if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
dev_err(hdev->dev,
"MSB of CPU accessible DMA memory are not identical in all range\n");
rc = -EFAULT;
goto free_dma_mem_arr;
}
hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
hdev->cpu_accessible_dma_address = dma_addr_arr[i];
hdev->cpu_pci_msb_addr =
GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
if (!hdev->asic_prop.fw_security_enabled)
GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
free_dma_mem_arr:
for (j = 0 ; j < i ; j++)
hdev->asic_funcs->asic_dma_free_coherent(hdev,
HL_CPU_ACCESSIBLE_MEM_SIZE,
virt_addr_arr[j],
dma_addr_arr[j]);
return rc;
}
static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_internal_qman_info *q;
u32 i;
for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
q = &gaudi->internal_qmans[i];
if (!q->pq_kernel_addr)
continue;
hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
q->pq_kernel_addr,
q->pq_dma_addr);
}
}
static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_internal_qman_info *q;
int rc, i;
for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
continue;
q = &gaudi->internal_qmans[i];
switch (i) {
case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
break;
case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
q->pq_size = MME_QMAN_SIZE_IN_BYTES;
break;
case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
break;
case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
break;
default:
dev_err(hdev->dev, "Bad internal queue index %d", i);
rc = -EINVAL;
goto free_internal_qmans_pq_mem;
}
q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
hdev, q->pq_size,
&q->pq_dma_addr,
GFP_KERNEL | __GFP_ZERO);
if (!q->pq_kernel_addr) {
rc = -ENOMEM;
goto free_internal_qmans_pq_mem;
}
}
return 0;
free_internal_qmans_pq_mem:
gaudi_free_internal_qmans_pq_mem(hdev);
return rc;
}
static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pci_mem_region *region;
region = &hdev->pci_mem_region[PCI_REGION_CFG];
region->region_base = CFG_BASE;
region->region_size = CFG_SIZE;
region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
region->bar_size = CFG_BAR_SIZE;
region->bar_id = CFG_BAR_ID;
region->used = 1;
region = &hdev->pci_mem_region[PCI_REGION_SRAM];
region->region_base = SRAM_BASE_ADDR;
region->region_size = SRAM_SIZE;
region->offset_in_bar = 0;
region->bar_size = SRAM_BAR_SIZE;
region->bar_id = SRAM_BAR_ID;
region->used = 1;
region = &hdev->pci_mem_region[PCI_REGION_DRAM];
region->region_base = DRAM_PHYS_BASE;
region->region_size = hdev->asic_prop.dram_size;
region->offset_in_bar = 0;
region->bar_size = prop->dram_pci_bar_size;
region->bar_id = HBM_BAR_ID;
region->used = 1;
region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
region->region_base = PSOC_SCRATCHPAD_ADDR;
region->region_size = PSOC_SCRATCHPAD_SIZE;
region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
region->bar_size = CFG_BAR_SIZE;
region->bar_id = CFG_BAR_ID;
region->used = 1;
}
static int gaudi_sw_init(struct hl_device *hdev)
{
struct gaudi_device *gaudi;
u32 i, event_id = 0;
int rc;
gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
if (!gaudi)
return -ENOMEM;
for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
if (gaudi_irq_map_table[i].valid) {
if (event_id == GAUDI_EVENT_SIZE) {
dev_err(hdev->dev,
"Event array exceeds the limit of %u events\n",
GAUDI_EVENT_SIZE);
rc = -EINVAL;
goto free_gaudi_device;
}
gaudi->events[event_id++] =
gaudi_irq_map_table[i].fc_id;
}
}
gaudi->cpucp_info_get = gaudi_cpucp_info_get;
hdev->asic_specific = gaudi;
hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
if (!hdev->dma_pool) {
dev_err(hdev->dev, "failed to create DMA pool\n");
rc = -ENOMEM;
goto free_gaudi_device;
}
rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
if (rc)
goto free_dma_pool;
hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
if (!hdev->cpu_accessible_dma_pool) {
dev_err(hdev->dev,
"Failed to create CPU accessible DMA pool\n");
rc = -ENOMEM;
goto free_cpu_dma_mem;
}
rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
(uintptr_t) hdev->cpu_accessible_dma_mem,
HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
if (rc) {
dev_err(hdev->dev,
"Failed to add memory to CPU accessible DMA pool\n");
rc = -EFAULT;
goto free_cpu_accessible_dma_pool;
}
rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
if (rc)
goto free_cpu_accessible_dma_pool;
spin_lock_init(&gaudi->hw_queues_lock);
hdev->supports_sync_stream = true;
hdev->supports_coresight = true;
hdev->supports_staged_submission = true;
hdev->supports_wait_for_multi_cs = true;
hdev->asic_funcs->set_pci_memory_regions(hdev);
hdev->stream_master_qid_arr =
hdev->asic_funcs->get_stream_master_qid_arr();
hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
return 0;
free_cpu_accessible_dma_pool:
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
free_cpu_dma_mem:
if (!hdev->asic_prop.fw_security_enabled)
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
hdev->cpu_pci_msb_addr);
hdev->asic_funcs->asic_dma_free_coherent(hdev,
HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
free_dma_pool:
dma_pool_destroy(hdev->dma_pool);
free_gaudi_device:
kfree(gaudi);
return rc;
}
static int gaudi_sw_fini(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
gaudi_free_internal_qmans_pq_mem(hdev);
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
if (!hdev->asic_prop.fw_security_enabled)
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
hdev->cpu_pci_msb_addr);
hdev->asic_funcs->asic_dma_free_coherent(hdev,
HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
dma_pool_destroy(hdev->dma_pool);
kfree(gaudi);
return 0;
}
static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
{
struct hl_device *hdev = arg;
int i;
if (hdev->disabled)
return IRQ_HANDLED;
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
hl_irq_handler_eq(irq, &hdev->event_queue);
return IRQ_HANDLED;
}
static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
bool cpu_eq)
{
int msi_vec;
if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
GAUDI_EVENT_QUEUE_MSI_IDX);
msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
(nr + NIC_NUMBER_OF_ENGINES + 1);
return pci_irq_vector(hdev->pdev, msi_vec);
}
static int gaudi_enable_msi_single(struct hl_device *hdev)
{
int rc, irq;
dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
irq = gaudi_pci_irq_vector(hdev, 0, false);
rc = request_irq(irq, gaudi_irq_handler_single, 0,
"gaudi single msi", hdev);
if (rc)
dev_err(hdev->dev,
"Failed to request single MSI IRQ\n");
return rc;
}
static int gaudi_enable_msi_multi(struct hl_device *hdev)
{
int cq_cnt = hdev->asic_prop.completion_queues_count;
int rc, i, irq_cnt_init, irq;
for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
irq = gaudi_pci_irq_vector(hdev, i, false);
rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
&hdev->completion_queue[i]);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_irqs;
}
}
irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
&hdev->event_queue);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_irqs;
}
return 0;
free_irqs:
for (i = 0 ; i < irq_cnt_init ; i++)
free_irq(gaudi_pci_irq_vector(hdev, i, false),
&hdev->completion_queue[i]);
return rc;
}
static int gaudi_enable_msi(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
if (gaudi->hw_cap_initialized & HW_CAP_MSI)
return 0;
rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
if (rc < 0) {
dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
return rc;
}
if (rc < NUMBER_OF_INTERRUPTS) {
gaudi->multi_msi_mode = false;
rc = gaudi_enable_msi_single(hdev);
} else {
gaudi->multi_msi_mode = true;
rc = gaudi_enable_msi_multi(hdev);
}
if (rc)
goto free_pci_irq_vectors;
gaudi->hw_cap_initialized |= HW_CAP_MSI;
return 0;
free_pci_irq_vectors:
pci_free_irq_vectors(hdev->pdev);
return rc;
}
static void gaudi_sync_irqs(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
int i, cq_cnt = hdev->asic_prop.completion_queues_count;
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
return;
if (gaudi->multi_msi_mode) {
for (i = 0 ; i < cq_cnt ; i++)
synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
synchronize_irq(gaudi_pci_irq_vector(hdev,
GAUDI_EVENT_QUEUE_MSI_IDX,
true));
} else {
synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
}
}
static void gaudi_disable_msi(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
return;
gaudi_sync_irqs(hdev);
if (gaudi->multi_msi_mode) {
irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
true);
free_irq(irq, &hdev->event_queue);
for (i = 0 ; i < cq_cnt ; i++) {
irq = gaudi_pci_irq_vector(hdev, i, false);
free_irq(irq, &hdev->completion_queue[i]);
}
} else {
free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
}
pci_free_irq_vectors(hdev->pdev);
gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
}
static void gaudi_init_scrambler_sram(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
return;
if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
return;
if (!hdev->sram_scrambler_enable)
return;
WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
}
static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
return;
if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
return;
if (!hdev->dram_scrambler_enable)
return;
WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
}
static void gaudi_init_e2e(struct hl_device *hdev)
{
if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_E2E_CRED_EN)
return;
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
if (!hdev->dram_scrambler_enable) {
WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
}
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
}
static void gaudi_init_hbm_cred(struct hl_device *hdev)
{
u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_HBM_CRED_EN)
return;
hbm0_wr = 0x33333333;
hbm0_rd = 0x77777777;
hbm1_wr = 0x55555555;
hbm1_rd = 0xDDDDDDDD;
WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
}
static void gaudi_init_golden_registers(struct hl_device *hdev)
{
u32 tpc_offset;
int tpc_id, i;
gaudi_init_e2e(hdev);
gaudi_init_hbm_cred(hdev);
for (tpc_id = 0, tpc_offset = 0;
tpc_id < TPC_NUMBER_OF_ENGINES;
tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
ICACHE_FETCH_LINE_NUM, 2);
}
for (i = 0 ; i < 128 ; i += 8)
writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
}
static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
int qman_id, dma_addr_t qman_pq_addr)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
u32 q_off, dma_qm_offset;
u32 dma_qm_err_cfg, irq_handler_offset;
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
mtr_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
q_off = dma_qm_offset + qman_id * 4;
WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_SRC_OFFSET);
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_DST_OFFSET);
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
if (qman_id == 0) {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
if (hdev->stop_on_err)
dma_qm_err_cfg |=
PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
lower_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
upper_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
dma_id);
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
QMAN_EXTERNAL_MAKE_TRUSTED);
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
}
}
static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
u32 irq_handler_offset;
WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
if (hdev->stop_on_err)
dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
lower_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
upper_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
WREG32(mmDMA0_CORE_PROT + dma_offset,
1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
}
static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
u32 enable_mask)
{
u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
}
static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct hl_hw_queue *q;
int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
return;
for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
dma_id = gaudi_dma_assignment[i];
if (dma_id > 1) {
cpu_skip = 1;
nic_skip = NIC_NUMBER_OF_ENGINES;
} else {
cpu_skip = 0;
nic_skip = 0;
}
for (j = 0 ; j < QMAN_STREAMS ; j++) {
q_idx = 4 * dma_id + j + cpu_skip;
q = &hdev->kernel_queues[q_idx];
q->cq_id = cq_id++;
q->msi_vec = nic_skip + cpu_skip + msi_vec++;
gaudi_init_pci_dma_qman(hdev, dma_id, j,
q->bus_address);
}
gaudi_init_dma_core(hdev, dma_id);
gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
}
gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
}
static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
int qman_id, u64 qman_base_addr)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
u32 dma_qm_err_cfg, irq_handler_offset;
u32 q_off, dma_qm_offset;
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
mtr_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
q_off = dma_qm_offset + qman_id * 4;
if (qman_id < 4) {
WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
lower_32_bits(qman_base_addr));
WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
upper_32_bits(qman_base_addr));
WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
QMAN_CPDMA_SIZE_OFFSET);
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_CPDMA_SRC_OFFSET);
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_CPDMA_DST_OFFSET);
} else {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
QMAN_LDMA_SIZE_OFFSET);
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_SRC_OFFSET);
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_DST_OFFSET);
dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
if (hdev->stop_on_err)
dma_qm_err_cfg |=
HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
lower_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
upper_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
dma_id);
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
QMAN_INTERNAL_MAKE_TRUSTED);
}
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
mtr_base_ws_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
mtr_base_ws_hi);
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
so_base_ws_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
so_base_ws_hi);
}
}
static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_internal_qman_info *q;
u64 qman_base_addr;
int i, j, dma_id, internal_q_index;
if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
return;
for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
for (j = 0 ; j < QMAN_STREAMS ; j++) {
internal_q_index = dma_id * QMAN_STREAMS + j + 1;
q = &gaudi->internal_qmans[internal_q_index];
qman_base_addr = (u64) q->pq_dma_addr;
gaudi_init_hbm_dma_qman(hdev, dma_id, j,
qman_base_addr);
}
gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
gaudi_init_dma_core(hdev, dma_id);
gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
}
gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
}
static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
int qman_id, u64 qman_base_addr)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 mtr_base_lo, mtr_base_hi;
u32 so_base_lo, so_base_hi;
u32 irq_handler_offset;
u32 q_off, mme_id;
u32 mme_qm_err_cfg;
mtr_base_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
q_off = mme_offset + qman_id * 4;
if (qman_id < 4) {
WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
lower_32_bits(qman_base_addr));
WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
upper_32_bits(qman_base_addr));
WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
QMAN_CPDMA_SIZE_OFFSET);
WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_CPDMA_SRC_OFFSET);
WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_CPDMA_DST_OFFSET);
} else {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
QMAN_LDMA_SIZE_OFFSET);
WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_SRC_OFFSET);
WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_DST_OFFSET);
mme_id = mme_offset /
(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
if (hdev->stop_on_err)
mme_qm_err_cfg |=
MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
lower_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
upper_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
mme_id);
WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
QM_ARB_ERR_MSG_EN_MASK);
WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
QMAN_INTERNAL_MAKE_TRUSTED);
}
WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
}
static void gaudi_init_mme_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_internal_qman_info *q;
u64 qman_base_addr;
u32 mme_offset;
int i, internal_q_index;
if (gaudi->hw_cap_initialized & HW_CAP_MME)
return;
mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
q = &gaudi->internal_qmans[internal_q_index];
qman_base_addr = (u64) q->pq_dma_addr;
gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
qman_base_addr);
if (i == 3)
mme_offset = 0;
}
mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
gaudi_init_mme_qman(hdev, 0, 4, 0);
WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
gaudi->hw_cap_initialized |= HW_CAP_MME;
}
static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
int qman_id, u64 qman_base_addr)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
u32 tpc_qm_err_cfg, irq_handler_offset;
u32 q_off, tpc_id;
mtr_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
q_off = tpc_offset + qman_id * 4;
tpc_id = tpc_offset /
(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
if (qman_id < 4) {
WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
lower_32_bits(qman_base_addr));
WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
upper_32_bits(qman_base_addr));
WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
QMAN_CPDMA_SIZE_OFFSET);
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_CPDMA_SRC_OFFSET);
WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_CPDMA_DST_OFFSET);
} else {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
QMAN_LDMA_SIZE_OFFSET);
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_SRC_OFFSET);
WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_DST_OFFSET);
tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
if (hdev->stop_on_err)
tpc_qm_err_cfg |=
TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
lower_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
upper_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
tpc_id);
WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
QM_ARB_ERR_MSG_EN_MASK);
WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
QMAN_INTERNAL_MAKE_TRUSTED);
}
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
if (tpc_id == 6) {
WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
mtr_base_ws_lo);
WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
mtr_base_ws_hi);
WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
so_base_ws_lo);
WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
so_base_ws_hi);
}
}
static void gaudi_init_tpc_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_internal_qman_info *q;
u64 qman_base_addr;
u32 so_base_hi, tpc_offset = 0;
u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
int i, tpc_id, internal_q_index;
if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
return;
so_base_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
for (i = 0 ; i < QMAN_STREAMS ; i++) {
internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
tpc_id * QMAN_STREAMS + i;
q = &gaudi->internal_qmans[internal_q_index];
qman_base_addr = (u64) q->pq_dma_addr;
gaudi_init_tpc_qman(hdev, tpc_offset, i,
qman_base_addr);
if (i == 3) {
gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
QMAN_TPC_ENABLE);
}
}
WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
so_base_hi);
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
gaudi->hw_cap_initialized |=
FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
}
}
static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
int qman_id, u64 qman_base_addr, int nic_id)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
u32 nic_qm_err_cfg, irq_handler_offset;
u32 q_off;
mtr_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_en_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_en_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
so_base_ws_lo = lower_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
so_base_ws_hi = upper_32_bits(CFG_BASE +
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
q_off = nic_offset + qman_id * 4;
WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
QMAN_LDMA_SIZE_OFFSET);
WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_SRC_OFFSET);
WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
QMAN_LDMA_DST_OFFSET);
WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
if (qman_id == 0) {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
if (hdev->stop_on_err)
nic_qm_err_cfg |=
NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
lower_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
upper_32_bits(CFG_BASE + irq_handler_offset));
WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
nic_id);
WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
QM_ARB_ERR_MSG_EN_MASK);
WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
QMAN_INTERNAL_MAKE_TRUSTED);
}
}
static void gaudi_init_nic_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_internal_qman_info *q;
u64 qman_base_addr;
u32 nic_offset = 0;
u32 nic_delta_between_qmans =
mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
u32 nic_delta_between_nics =
mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
int i, nic_id, internal_q_index;
if (!hdev->nic_ports_mask)
return;
if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
return;
dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
if (!(hdev->nic_ports_mask & (1 << nic_id))) {
nic_offset += nic_delta_between_qmans;
if (nic_id & 1) {
nic_offset -= (nic_delta_between_qmans * 2);
nic_offset += nic_delta_between_nics;
}
continue;
}
for (i = 0 ; i < QMAN_STREAMS ; i++) {
internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
nic_id * QMAN_STREAMS + i;
q = &gaudi->internal_qmans[internal_q_index];
qman_base_addr = (u64) q->pq_dma_addr;
gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
qman_base_addr, nic_id);
}
WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
nic_offset += nic_delta_between_qmans;
if (nic_id & 1) {
nic_offset -= (nic_delta_between_qmans * 2);
nic_offset += nic_delta_between_nics;
}
gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
}
}
static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
return;
WREG32(mmDMA0_QM_GLBL_CFG0, 0);
WREG32(mmDMA1_QM_GLBL_CFG0, 0);
WREG32(mmDMA5_QM_GLBL_CFG0, 0);
}
static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
return;
WREG32(mmDMA2_QM_GLBL_CFG0, 0);
WREG32(mmDMA3_QM_GLBL_CFG0, 0);
WREG32(mmDMA4_QM_GLBL_CFG0, 0);
WREG32(mmDMA6_QM_GLBL_CFG0, 0);
WREG32(mmDMA7_QM_GLBL_CFG0, 0);
}
static void gaudi_disable_mme_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
return;
WREG32(mmMME2_QM_GLBL_CFG0, 0);
WREG32(mmMME0_QM_GLBL_CFG0, 0);
}
static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 tpc_offset = 0;
int tpc_id;
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
return;
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
}
}
static void gaudi_disable_nic_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 nic_mask, nic_offset = 0;
u32 nic_delta_between_qmans =
mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
u32 nic_delta_between_nics =
mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
int nic_id;
for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
if (gaudi->hw_cap_initialized & nic_mask)
WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
nic_offset += nic_delta_between_qmans;
if (nic_id & 1) {
nic_offset -= (nic_delta_between_qmans * 2);
nic_offset += nic_delta_between_nics;
}
}
}
static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
return;
WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
}
static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
return;
WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
}
static void gaudi_stop_mme_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
return;
WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
}
static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
return;
WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
}
static void gaudi_stop_nic_qmans(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
WREG32(mmNIC0_QM0_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
WREG32(mmNIC0_QM1_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
WREG32(mmNIC1_QM0_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
WREG32(mmNIC1_QM1_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
WREG32(mmNIC2_QM0_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
WREG32(mmNIC2_QM1_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
WREG32(mmNIC3_QM0_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
WREG32(mmNIC3_QM1_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
WREG32(mmNIC4_QM0_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
WREG32(mmNIC4_QM1_GLBL_CFG1,
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
}
static void gaudi_pci_dma_stall(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
return;
WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
}
static void gaudi_hbm_dma_stall(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
return;
WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
}
static void gaudi_mme_stall(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
return;
WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
}
static void gaudi_tpc_stall(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
return;
WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
}
static void gaudi_disable_clock_gating(struct hl_device *hdev)
{
u32 qman_offset;
int i;
if (hdev->asic_prop.fw_security_enabled)
return;
for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
}
WREG32(mmMME0_QM_CGM_CFG, 0);
WREG32(mmMME0_QM_CGM_CFG1, 0);
WREG32(mmMME2_QM_CGM_CFG, 0);
WREG32(mmMME2_QM_CGM_CFG1, 0);
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
}
}
static void gaudi_enable_timestamp(struct hl_device *hdev)
{
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
}
static void gaudi_disable_timestamp(struct hl_device *hdev)
{
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
}
static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
{
u32 wait_timeout_ms;
dev_info(hdev->dev,
"Halting compute engines and disabling interrupts\n");
if (hdev->pldm)
wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
else
wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
if (fw_reset)
goto skip_engines;
gaudi_stop_nic_qmans(hdev);
gaudi_stop_mme_qmans(hdev);
gaudi_stop_tpc_qmans(hdev);
gaudi_stop_hbm_dma_qmans(hdev);
gaudi_stop_pci_dma_qmans(hdev);
msleep(wait_timeout_ms);
gaudi_pci_dma_stall(hdev);
gaudi_hbm_dma_stall(hdev);
gaudi_tpc_stall(hdev);
gaudi_mme_stall(hdev);
msleep(wait_timeout_ms);
gaudi_disable_nic_qmans(hdev);
gaudi_disable_mme_qmans(hdev);
gaudi_disable_tpc_qmans(hdev);
gaudi_disable_hbm_dma_qmans(hdev);
gaudi_disable_pci_dma_qmans(hdev);
gaudi_disable_timestamp(hdev);
skip_engines:
gaudi_disable_msi(hdev);
}
static int gaudi_mmu_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u64 hop0_addr;
int rc, i;
if (!hdev->mmu_enable)
return 0;
if (gaudi->hw_cap_initialized & HW_CAP_MMU)
return 0;
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
(i * prop->mmu_hop_table_size);
rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
if (rc) {
dev_err(hdev->dev,
"failed to set hop0 addr for asid %d\n", i);
goto err;
}
}
WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
hl_mmu_invalidate_cache(hdev, true, 0);
WREG32(mmMMU_UP_MMU_ENABLE, 1);
WREG32(mmMMU_UP_SPI_MASK, 0xF);
WREG32(mmSTLB_HOP_CONFIGURATION,
hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
gaudi->mmu_cache_inv_pi = 1;
gaudi->hw_cap_initialized |= HW_CAP_MMU;
return 0;
err:
return rc;
}
static int gaudi_load_firmware_to_device(struct hl_device *hdev)
{
void __iomem *dst;
dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
}
static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
{
void __iomem *dst;
dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
}
static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
{
struct dynamic_fw_load_mgr *dynamic_loader;
struct cpu_dyn_regs *dyn_regs;
dynamic_loader = &hdev->fw_loader.dynamic_loader;
dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
dyn_regs->kmd_msg_to_cpu =
cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
dyn_regs->cpu_cmd_status_to_host =
cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
}
static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
{
struct static_fw_load_mgr *static_loader;
static_loader = &hdev->fw_loader.static_loader;
static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
static_loader->cpu_reset_wait_msec = hdev->pldm ?
GAUDI_PLDM_RESET_WAIT_MSEC :
GAUDI_CPU_RESET_WAIT_MSEC;
}
static void gaudi_init_firmware_loader(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
fw_loader->fw_comp_loaded = FW_TYPE_NONE;
fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
fw_loader->skip_bmc = !hdev->bmc_enable;
fw_loader->sram_bar_id = SRAM_BAR_ID;
fw_loader->dram_bar_id = HBM_BAR_ID;
if (prop->dynamic_fw_load)
gaudi_init_dynamic_firmware_loader(hdev);
else
gaudi_init_static_firmware_loader(hdev);
}
static int gaudi_init_cpu(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
return 0;
if (gaudi->hw_cap_initialized & HW_CAP_CPU)
return 0;
if (!hdev->asic_prop.fw_security_enabled)
WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
rc = hl_fw_init_cpu(hdev);
if (rc)
return rc;
gaudi->hw_cap_initialized |= HW_CAP_CPU;
return 0;
}
static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u32 status, irq_handler_offset;
struct hl_eq *eq;
struct hl_hw_queue *cpu_pq =
&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
int err;
if (!hdev->cpu_queues_enable)
return 0;
if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
return 0;
eq = &hdev->event_queue;
WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
lower_32_bits(hdev->cpu_accessible_dma_address));
WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
upper_32_bits(hdev->cpu_accessible_dma_address));
WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
WREG32(mmCPU_IF_PF_PQ_PI, 0);
if (gaudi->multi_msi_mode)
WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
else
WREG32(mmCPU_IF_QUEUE_INIT,
PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
irq_handler_offset = prop->gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
WREG32(irq_handler_offset,
gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
err = hl_poll_timeout(
hdev,
mmCPU_IF_QUEUE_INIT,
status,
(status == PQ_INIT_STATUS_READY_FOR_HOST),
1000,
cpu_timeout);
if (err) {
dev_err(hdev->dev,
"Failed to communicate with Device CPU (CPU-CP timeout)\n");
return -EIO;
}
if (prop->fw_cpu_boot_dev_sts0_valid)
prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
if (prop->fw_cpu_boot_dev_sts1_valid)
prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
}
static void gaudi_pre_hw_init(struct hl_device *hdev)
{
RREG32(mmHW_STATE);
if (!hdev->asic_prop.fw_security_enabled) {
WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
}
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
}
static int gaudi_hw_init(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
gaudi_pre_hw_init(hdev);
if (hdev->asic_prop.iatu_done_by_fw)
gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
dev_err(hdev->dev,
"failed to map HBM bar to DRAM base address\n");
return -EIO;
}
rc = gaudi_init_cpu(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize CPU\n");
return rc;
}
gaudi_disable_clock_gating(hdev);
gaudi_init_scrambler_sram(hdev);
gaudi_init_scrambler_hbm(hdev);
gaudi_init_golden_registers(hdev);
rc = gaudi_mmu_init(hdev);
if (rc)
return rc;
gaudi_init_security(hdev);
gaudi_init_pci_dma_qmans(hdev);
gaudi_init_hbm_dma_qmans(hdev);
gaudi_init_mme_qmans(hdev);
gaudi_init_tpc_qmans(hdev);
gaudi_init_nic_qmans(hdev);
gaudi_enable_timestamp(hdev);
rc = gaudi_enable_msi(hdev);
if (rc)
goto disable_queues;
rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
if (rc) {
dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
rc);
goto disable_msi;
}
RREG32(mmHW_STATE);
return 0;
disable_msi:
gaudi_disable_msi(hdev);
disable_queues:
gaudi_disable_mme_qmans(hdev);
gaudi_disable_pci_dma_qmans(hdev);
return rc;
}
static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
struct gaudi_device *gaudi = hdev->asic_specific;
bool driver_performs_reset;
if (!hard_reset) {
dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
return;
}
if (hdev->pldm) {
reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
} else {
reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
}
if (fw_reset) {
dev_info(hdev->dev,
"Firmware performs HARD reset, going to wait %dms\n",
reset_timeout_ms);
goto skip_reset;
}
driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
!hdev->asic_prop.hard_reset_done_by_fw);
if (driver_performs_reset)
WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_halt_irq);
WREG32(irq_handler_offset,
gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
if (hdev->asic_prop.hard_reset_done_by_fw)
hl_fw_ask_hard_reset_without_linux(hdev);
else
hl_fw_ask_halt_machine_without_linux(hdev);
}
} else {
if (hdev->asic_prop.hard_reset_done_by_fw)
hl_fw_ask_hard_reset_without_linux(hdev);
else
hl_fw_ask_halt_machine_without_linux(hdev);
}
if (driver_performs_reset) {
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
(CFG_RST_H_DMA_MASK |
CFG_RST_H_MME_MASK |
CFG_RST_H_SM_MASK |
CFG_RST_H_TPC_7_MASK));
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
(CFG_RST_H_HBM_MASK |
CFG_RST_H_TPC_7_MASK |
CFG_RST_H_NIC_MASK |
CFG_RST_H_SM_MASK |
CFG_RST_H_DMA_MASK |
CFG_RST_H_MME_MASK |
CFG_RST_H_CPU_MASK |
CFG_RST_H_MMU_MASK));
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
(CFG_RST_L_IF_MASK |
CFG_RST_L_PSOC_MASK |
CFG_RST_L_TPC_MASK));
msleep(cpu_timeout_ms);
WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
dev_info(hdev->dev,
"Issued HARD reset command, going to wait %dms\n",
reset_timeout_ms);
} else {
dev_info(hdev->dev,
"Firmware performs HARD reset, going to wait %dms\n",
reset_timeout_ms);
}
skip_reset:
msleep(reset_timeout_ms);
status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
dev_err(hdev->dev,
"Timeout while waiting for device to reset 0x%x\n",
status);
if (gaudi) {
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
HW_CAP_HBM_SCRAMBLER);
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
hdev->device_cpu_is_halted = false;
}
}
static int gaudi_suspend(struct hl_device *hdev)
{
int rc;
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
if (rc)
dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
return rc;
}
static int gaudi_resume(struct hl_device *hdev)
{
return gaudi_init_iatu(hdev);
}
static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size)
{
int rc;
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE;
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
(dma_addr - HOST_PHYS_BASE), size);
if (rc)
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
return rc;
}
static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
struct gaudi_device *gaudi = hdev->asic_specific;
bool invalid_queue = false;
int dma_id;
switch (hw_queue_id) {
case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_CPU_PQ:
if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
db_reg_offset = mmCPU_IF_PF_PQ_PI;
else
invalid_queue = true;
break;
case GAUDI_QUEUE_ID_MME_0_0:
db_reg_offset = mmMME2_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_MME_0_1:
db_reg_offset = mmMME2_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_MME_0_2:
db_reg_offset = mmMME2_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_MME_0_3:
db_reg_offset = mmMME2_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_MME_1_0:
db_reg_offset = mmMME0_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_MME_1_1:
db_reg_offset = mmMME0_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_MME_1_2:
db_reg_offset = mmMME0_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_MME_1_3:
db_reg_offset = mmMME0_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_0_0:
db_reg_offset = mmTPC0_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_0_1:
db_reg_offset = mmTPC0_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_0_2:
db_reg_offset = mmTPC0_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_0_3:
db_reg_offset = mmTPC0_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_1_0:
db_reg_offset = mmTPC1_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_1_1:
db_reg_offset = mmTPC1_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_1_2:
db_reg_offset = mmTPC1_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_1_3:
db_reg_offset = mmTPC1_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_2_0:
db_reg_offset = mmTPC2_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_2_1:
db_reg_offset = mmTPC2_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_2_2:
db_reg_offset = mmTPC2_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_2_3:
db_reg_offset = mmTPC2_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_3_0:
db_reg_offset = mmTPC3_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_3_1:
db_reg_offset = mmTPC3_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_3_2:
db_reg_offset = mmTPC3_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_3_3:
db_reg_offset = mmTPC3_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_4_0:
db_reg_offset = mmTPC4_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_4_1:
db_reg_offset = mmTPC4_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_4_2:
db_reg_offset = mmTPC4_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_4_3:
db_reg_offset = mmTPC4_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_5_0:
db_reg_offset = mmTPC5_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_5_1:
db_reg_offset = mmTPC5_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_5_2:
db_reg_offset = mmTPC5_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_5_3:
db_reg_offset = mmTPC5_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_6_0:
db_reg_offset = mmTPC6_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_6_1:
db_reg_offset = mmTPC6_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_6_2:
db_reg_offset = mmTPC6_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_6_3:
db_reg_offset = mmTPC6_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_TPC_7_0:
db_reg_offset = mmTPC7_QM_PQ_PI_0;
break;
case GAUDI_QUEUE_ID_TPC_7_1:
db_reg_offset = mmTPC7_QM_PQ_PI_1;
break;
case GAUDI_QUEUE_ID_TPC_7_2:
db_reg_offset = mmTPC7_QM_PQ_PI_2;
break;
case GAUDI_QUEUE_ID_TPC_7_3:
db_reg_offset = mmTPC7_QM_PQ_PI_3;
break;
case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
break;
case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
invalid_queue = true;
q_off = ((hw_queue_id - 1) & 0x3) * 4;
db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
break;
default:
invalid_queue = true;
}
if (invalid_queue) {
dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
hw_queue_id);
return;
}
db_value = pi;
WREG32(db_reg_offset, db_value);
if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
mb();
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
WREG32(irq_handler_offset,
gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
}
}
static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
struct hl_bd *bd)
{
__le64 *pbd = (__le64 *) bd;
pqe[0] = pbd[0];
pqe[1] = pbd[1];
}
static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags)
{
void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
dma_handle, flags);
if (kernel_addr)
*dma_handle += HOST_PHYS_BASE;
return kernel_addr;
}
static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle)
{
dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
}
static int gaudi_hbm_scrubbing(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 cur_addr = DRAM_BASE_ADDR_USER;
u32 val;
u32 chunk_size;
int rc, dma_id;
while (cur_addr < prop->dram_end_address) {
for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
chunk_size =
min((u64)SZ_2G, prop->dram_end_address - cur_addr);
dev_dbg(hdev->dev,
"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
cur_addr, cur_addr + chunk_size);
WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
lower_32_bits(cur_addr));
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
upper_32_bits(cur_addr));
WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
chunk_size);
WREG32(mmDMA0_CORE_COMMIT + dma_offset,
((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
cur_addr += chunk_size;
if (cur_addr == prop->dram_end_address)
break;
}
for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
rc = hl_poll_timeout(
hdev,
mmDMA0_CORE_STS0 + dma_offset,
val,
((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
1000,
HBM_SCRUBBING_TIMEOUT_US);
if (rc) {
dev_err(hdev->dev,
"DMA Timeout during HBM scrubbing of DMA #%d\n",
dma_id);
return -EIO;
}
}
}
return 0;
}
static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc = 0;
u64 val = 0;
if (!hdev->memory_scrub)
return 0;
if (!addr && !size) {
rc = hl_poll_timeout(
hdev,
mmDMA0_CORE_STS0,
val,
(hdev->asic_funcs->is_device_idle(hdev, NULL,
0, NULL)),
1000,
HBM_SCRUBBING_TIMEOUT_US);
if (rc) {
dev_err(hdev->dev, "waiting for idle timeout\n");
return -EIO;
}
addr = prop->sram_user_base_address;
size = hdev->pldm ? 0x10000 :
(prop->sram_size - SRAM_USER_BASE_OFFSET);
val = 0x7777777777777777ull;
rc = gaudi_memset_device_memory(hdev, addr, size, val);
if (rc) {
dev_err(hdev->dev,
"Failed to clear SRAM in mem scrub all\n");
return rc;
}
rc = gaudi_hbm_scrubbing(hdev);
if (rc)
dev_err(hdev->dev,
"Failed to clear HBM in mem scrub all\n");
}
return rc;
}
static void *gaudi_get_int_queue_base(struct hl_device *hdev,
u32 queue_id, dma_addr_t *dma_handle,
u16 *queue_len)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct gaudi_internal_qman_info *q;
if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
return NULL;
}
q = &gaudi->internal_qmans[queue_id];
*dma_handle = q->pq_dma_addr;
*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
return q->pq_kernel_addr;
}
static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
u16 len, u32 timeout, u64 *result)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
if (result)
*result = 0;
return 0;
}
if (!timeout)
timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
timeout, result);
}
static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
{
struct packet_msg_prot *fence_pkt;
dma_addr_t pkt_dma_addr;
u32 fence_val, tmp, timeout_usec;
dma_addr_t fence_dma_addr;
u32 *fence_ptr;
int rc;
if (hdev->pldm)
timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
else
timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
fence_val = GAUDI_QMAN0_FENCE_VAL;
fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
&fence_dma_addr);
if (!fence_ptr) {
dev_err(hdev->dev,
"Failed to allocate memory for H/W queue %d testing\n",
hw_queue_id);
return -ENOMEM;
}
*fence_ptr = 0;
fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
sizeof(struct packet_msg_prot),
GFP_KERNEL, &pkt_dma_addr);
if (!fence_pkt) {
dev_err(hdev->dev,
"Failed to allocate packet for H/W queue %d testing\n",
hw_queue_id);
rc = -ENOMEM;
goto free_fence_ptr;
}
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
fence_pkt->ctl = cpu_to_le32(tmp);
fence_pkt->value = cpu_to_le32(fence_val);
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
sizeof(struct packet_msg_prot),
pkt_dma_addr);
if (rc) {
dev_err(hdev->dev,
"Failed to send fence packet to H/W queue %d\n",
hw_queue_id);
goto free_pkt;
}
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
1000, timeout_usec, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
if (rc == -ETIMEDOUT) {
dev_err(hdev->dev,
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
rc = -EIO;
}
free_pkt:
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
pkt_dma_addr);
free_fence_ptr:
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
fence_dma_addr);
return rc;
}
static int gaudi_test_cpu_queue(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
return hl_fw_test_cpu_queue(hdev);
}
static int gaudi_test_queues(struct hl_device *hdev)
{
int i, rc, ret_val = 0;
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
rc = gaudi_test_queue(hdev, i);
if (rc)
ret_val = -EINVAL;
}
}
rc = gaudi_test_cpu_queue(hdev);
if (rc)
ret_val = -EINVAL;
return ret_val;
}
static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
gfp_t mem_flags, dma_addr_t *dma_handle)
{
void *kernel_addr;
if (size > GAUDI_DMA_POOL_BLK_SIZE)
return NULL;
kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
if (kernel_addr)
*dma_handle += HOST_PHYS_BASE;
return kernel_addr;
}
static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
dma_addr_t dma_addr)
{
dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
}
static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
size_t size, dma_addr_t *dma_handle)
{
return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
}
static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
size_t size, void *vaddr)
{
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
return -ENOMEM;
for_each_sg(sgl, sg, nents, i)
sg->dma_address += HOST_PHYS_BASE;
return 0;
}
static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nents, i)
sg->dma_address -= HOST_PHYS_BASE;
dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
}
static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
struct sg_table *sgt)
{
struct scatterlist *sg, *sg_next_iter;
u32 count, dma_desc_cnt;
u64 len, len_next;
dma_addr_t addr, addr_next;
dma_desc_cnt = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) {
len = sg_dma_len(sg);
addr = sg_dma_address(sg);
if (len == 0)
break;
while ((count + 1) < sgt->nents) {
sg_next_iter = sg_next(sg);
len_next = sg_dma_len(sg_next_iter);
addr_next = sg_dma_address(sg_next_iter);
if (len_next == 0)
break;
if ((addr + len == addr_next) &&
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
len += len_next;
count++;
sg = sg_next_iter;
} else {
break;
}
}
dma_desc_cnt++;
}
return dma_desc_cnt * sizeof(struct packet_lin_dma);
}
static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
struct hl_cs_parser *parser,
struct packet_lin_dma *user_dma_pkt,
u64 addr, enum dma_data_direction dir)
{
struct hl_userptr *userptr;
int rc;
if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
parser->job_userptr_list, &userptr))
goto already_pinned;
userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
if (!userptr)
return -ENOMEM;
rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
userptr);
if (rc)
goto free_userptr;
list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
userptr->sgt->nents, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
}
userptr->dma_mapped = true;
userptr->dir = dir;
already_pinned:
parser->patched_cb_size +=
gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
return 0;
unpin_memory:
list_del(&userptr->job_node);
hl_unpin_host_memory(hdev, userptr);
free_userptr:
kfree(userptr);
return rc;
}
static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
struct hl_cs_parser *parser,
struct packet_lin_dma *user_dma_pkt,
bool src_in_host)
{
enum dma_data_direction dir;
bool skip_host_mem_pin = false, user_memset;
u64 addr;
int rc = 0;
user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
if (src_in_host) {
if (user_memset)
skip_host_mem_pin = true;
dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
dir = DMA_TO_DEVICE;
addr = le64_to_cpu(user_dma_pkt->src_addr);
} else {
dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
dir = DMA_FROM_DEVICE;
addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
}
if (skip_host_mem_pin)
parser->patched_cb_size += sizeof(*user_dma_pkt);
else
rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
addr, dir);
return rc;
}
static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser,
struct packet_lin_dma *user_dma_pkt)
{
bool src_in_host = false;
u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
dev_dbg(hdev->dev, "DMA packet details:\n");
dev_dbg(hdev->dev, "source == 0x%llx\n",
le64_to_cpu(user_dma_pkt->src_addr));
dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
if (!le32_to_cpu(user_dma_pkt->tsize)) {
parser->patched_cb_size += sizeof(*user_dma_pkt);
return 0;
}
if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
src_in_host = true;
return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
src_in_host);
}
static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
struct hl_cs_parser *parser,
struct packet_load_and_exe *user_pkt)
{
u32 cfg;
cfg = le32_to_cpu(user_pkt->cfg);
if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
dev_err(hdev->dev,
"User not allowed to use Load and Execute\n");
return -EPERM;
}
parser->patched_cb_size += sizeof(struct packet_load_and_exe);
return 0;
}
static int gaudi_validate_cb(struct hl_device *hdev,
struct hl_cs_parser *parser, bool is_mmu)
{
u32 cb_parsed_length = 0;
int rc = 0;
parser->patched_cb_size = 0;
while (cb_parsed_length < parser->user_cb_size) {
enum packet_id pkt_id;
u16 pkt_size;
struct gaudi_packet *user_pkt;
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
pkt_id = (enum packet_id) (
(le64_to_cpu(user_pkt->header) &
PACKET_HEADER_PACKET_ID_MASK) >>
PACKET_HEADER_PACKET_ID_SHIFT);
if (!validate_packet_id(pkt_id)) {
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
rc = -EINVAL;
break;
}
pkt_size = gaudi_packet_sizes[pkt_id];
cb_parsed_length += pkt_size;
if (cb_parsed_length > parser->user_cb_size) {
dev_err(hdev->dev,
"packet 0x%x is out of CB boundary\n", pkt_id);
rc = -EINVAL;
break;
}
switch (pkt_id) {
case PACKET_MSG_PROT:
dev_err(hdev->dev,
"User not allowed to use MSG_PROT\n");
rc = -EPERM;
break;
case PACKET_CP_DMA:
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
rc = -EPERM;
break;
case PACKET_STOP:
dev_err(hdev->dev, "User not allowed to use STOP\n");
rc = -EPERM;
break;
case PACKET_WREG_BULK:
dev_err(hdev->dev,
"User not allowed to use WREG_BULK\n");
rc = -EPERM;
break;
case PACKET_LOAD_AND_EXE:
rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
(struct packet_load_and_exe *) user_pkt);
break;
case PACKET_LIN_DMA:
parser->contains_dma_pkt = true;
if (is_mmu)
parser->patched_cb_size += pkt_size;
else
rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
(struct packet_lin_dma *) user_pkt);
break;
case PACKET_WREG_32:
case PACKET_MSG_LONG:
case PACKET_MSG_SHORT:
case PACKET_REPEAT:
case PACKET_FENCE:
case PACKET_NOP:
case PACKET_ARB_POINT:
parser->patched_cb_size += pkt_size;
break;
default:
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
pkt_id);
rc = -EINVAL;
break;
}
if (rc)
break;
}
if (parser->completion)
parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
return rc;
}
static int gaudi_patch_dma_packet(struct hl_device *hdev,
struct hl_cs_parser *parser,
struct packet_lin_dma *user_dma_pkt,
struct packet_lin_dma *new_dma_pkt,
u32 *new_dma_pkt_size)
{
struct hl_userptr *userptr;
struct scatterlist *sg, *sg_next_iter;
u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
u64 len, len_next;
dma_addr_t dma_addr, dma_addr_next;
u64 device_memory_addr, addr;
enum dma_data_direction dir;
struct sg_table *sgt;
bool src_in_host = false;
bool skip_host_mem_pin = false;
bool user_memset;
ctl = le32_to_cpu(user_dma_pkt->ctl);
if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
src_in_host = true;
user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
if (src_in_host) {
addr = le64_to_cpu(user_dma_pkt->src_addr);
device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
dir = DMA_TO_DEVICE;
if (user_memset)
skip_host_mem_pin = true;
} else {
addr = le64_to_cpu(user_dma_pkt->dst_addr);
device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
dir = DMA_FROM_DEVICE;
}
if ((!skip_host_mem_pin) &&
(!hl_userptr_is_pinned(hdev, addr,
le32_to_cpu(user_dma_pkt->tsize),
parser->job_userptr_list, &userptr))) {
dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
addr, user_dma_pkt->tsize);
return -EFAULT;
}
if ((user_memset) && (dir == DMA_TO_DEVICE)) {
memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
*new_dma_pkt_size = sizeof(*user_dma_pkt);
return 0;
}
user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
sgt = userptr->sgt;
dma_desc_cnt = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) {
len = sg_dma_len(sg);
dma_addr = sg_dma_address(sg);
if (len == 0)
break;
while ((count + 1) < sgt->nents) {
sg_next_iter = sg_next(sg);
len_next = sg_dma_len(sg_next_iter);
dma_addr_next = sg_dma_address(sg_next_iter);
if (len_next == 0)
break;
if ((dma_addr + len == dma_addr_next) &&
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
len += len_next;
count++;
sg = sg_next_iter;
} else {
break;
}
}
ctl = le32_to_cpu(user_dma_pkt->ctl);
if (likely(dma_desc_cnt))
ctl &= ~GAUDI_PKT_CTL_EB_MASK;
ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
new_dma_pkt->ctl = cpu_to_le32(ctl);
new_dma_pkt->tsize = cpu_to_le32(len);
if (dir == DMA_TO_DEVICE) {
new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
} else {
new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
}
if (!user_memset)
device_memory_addr += len;
dma_desc_cnt++;
new_dma_pkt++;
}
if (!dma_desc_cnt) {
dev_err(hdev->dev,
"Error of 0 SG entries when patching DMA packet\n");
return -EFAULT;
}
new_dma_pkt--;
new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
return 0;
}
static int gaudi_patch_cb(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
u32 cb_parsed_length = 0;
u32 cb_patched_cur_length = 0;
int rc = 0;
while (cb_parsed_length < parser->user_cb_size) {
enum packet_id pkt_id;
u16 pkt_size;
u32 new_pkt_size = 0;
struct gaudi_packet *user_pkt, *kernel_pkt;
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
kernel_pkt = parser->patched_cb->kernel_address +
cb_patched_cur_length;
pkt_id = (enum packet_id) (
(le64_to_cpu(user_pkt->header) &
PACKET_HEADER_PACKET_ID_MASK) >>
PACKET_HEADER_PACKET_ID_SHIFT);
if (!validate_packet_id(pkt_id)) {
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
rc = -EINVAL;
break;
}
pkt_size = gaudi_packet_sizes[pkt_id];
cb_parsed_length += pkt_size;
if (cb_parsed_length > parser->user_cb_size) {
dev_err(hdev->dev,
"packet 0x%x is out of CB boundary\n", pkt_id);
rc = -EINVAL;
break;
}
switch (pkt_id) {
case PACKET_LIN_DMA:
rc = gaudi_patch_dma_packet(hdev, parser,
(struct packet_lin_dma *) user_pkt,
(struct packet_lin_dma *) kernel_pkt,
&new_pkt_size);
cb_patched_cur_length += new_pkt_size;
break;
case PACKET_MSG_PROT:
dev_err(hdev->dev,
"User not allowed to use MSG_PROT\n");
rc = -EPERM;
break;
case PACKET_CP_DMA:
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
rc = -EPERM;
break;
case PACKET_STOP:
dev_err(hdev->dev, "User not allowed to use STOP\n");
rc = -EPERM;
break;
case PACKET_WREG_32:
case PACKET_WREG_BULK:
case PACKET_MSG_LONG:
case PACKET_MSG_SHORT:
case PACKET_REPEAT:
case PACKET_FENCE:
case PACKET_NOP:
case PACKET_ARB_POINT:
case PACKET_LOAD_AND_EXE:
memcpy(kernel_pkt, user_pkt, pkt_size);
cb_patched_cur_length += pkt_size;
break;
default:
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
pkt_id);
rc = -EINVAL;
break;
}
if (rc)
break;
}
return rc;
}
static int gaudi_parse_cb_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
u64 patched_cb_handle;
u32 patched_cb_size;
struct hl_cb *user_cb;
int rc;
if (parser->completion)
parser->patched_cb_size = parser->user_cb_size +
sizeof(struct packet_msg_prot) * 2;
else
parser->patched_cb_size = parser->user_cb_size;
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
&patched_cb_handle);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n",
rc);
return rc;
}
patched_cb_handle >>= PAGE_SHIFT;
parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
(u32) patched_cb_handle);
if (!parser->patched_cb) {
dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
(u32) patched_cb_handle);
rc = -EFAULT;
goto out;
}
memcpy(parser->patched_cb->kernel_address,
parser->user_cb->kernel_address,
parser->user_cb_size);
patched_cb_size = parser->patched_cb_size;
user_cb = parser->user_cb;
parser->user_cb = parser->patched_cb;
rc = gaudi_validate_cb(hdev, parser, true);
parser->user_cb = user_cb;
if (rc) {
hl_cb_put(parser->patched_cb);
goto out;
}
if (patched_cb_size != parser->patched_cb_size) {
dev_err(hdev->dev, "user CB size mismatch\n");
hl_cb_put(parser->patched_cb);
rc = -EINVAL;
goto out;
}
out:
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
patched_cb_handle << PAGE_SHIFT);
return rc;
}
static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
u64 patched_cb_handle;
int rc;
rc = gaudi_validate_cb(hdev, parser, false);
if (rc)
goto free_userptr;
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
&patched_cb_handle);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n", rc);
goto free_userptr;
}
patched_cb_handle >>= PAGE_SHIFT;
parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
(u32) patched_cb_handle);
if (!parser->patched_cb) {
dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
(u32) patched_cb_handle);
rc = -EFAULT;
goto out;
}
rc = gaudi_patch_cb(hdev, parser);
if (rc)
hl_cb_put(parser->patched_cb);
out:
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
patched_cb_handle << PAGE_SHIFT);
free_userptr:
if (rc)
hl_userptr_delete_list(hdev, parser->job_userptr_list);
return rc;
}
static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
(!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
dev_err(hdev->dev, "h/w queue %d is disabled\n",
parser->hw_queue_id);
return -EINVAL;
}
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
parser->user_cb_size,
asic_prop->sram_user_base_address,
asic_prop->sram_end_address))
return 0;
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
parser->user_cb_size,
asic_prop->dram_user_base_address,
asic_prop->dram_end_address))
return 0;
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
parser->user_cb_size,
asic_prop->pmmu.start_addr,
asic_prop->pmmu.end_addr))
return 0;
dev_err(hdev->dev,
"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
parser->user_cb, parser->user_cb_size);
return -EFAULT;
}
static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (parser->queue_type == QUEUE_TYPE_INT)
return gaudi_parse_cb_no_ext_queue(hdev, parser);
if (gaudi->hw_cap_initialized & HW_CAP_MMU)
return gaudi_parse_cb_mmu(hdev, parser);
else
return gaudi_parse_cb_no_mmu(hdev, parser);
}
static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
void *kernel_address, u32 len,
u64 cq_addr, u32 cq_val, u32 msi_vec,
bool eb)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct packet_msg_prot *cq_pkt;
u64 msi_addr;
u32 tmp;
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
if (eb)
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
cq_pkt->ctl = cpu_to_le32(tmp);
cq_pkt->value = cpu_to_le32(cq_val);
cq_pkt->addr = cpu_to_le64(cq_addr);
cq_pkt++;
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
cq_pkt->ctl = cpu_to_le32(tmp);
cq_pkt->value = cpu_to_le32(1);
if (gaudi->multi_msi_mode)
msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
else
msi_addr = mmPCIE_CORE_MSI_REQ;
cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
}
static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
{
WREG32(mmCPU_IF_EQ_RD_OFFS, val);
}
static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
u32 size, u64 val)
{
struct packet_lin_dma *lin_dma_pkt;
struct hl_cs_job *job;
u32 cb_size, ctl, err_cause;
struct hl_cb *cb;
u64 id;
int rc;
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
if (!cb)
return -EFAULT;
lin_dma_pkt = cb->kernel_address;
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
cb_size = sizeof(*lin_dma_pkt);
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
lin_dma_pkt->ctl = cpu_to_le32(ctl);
lin_dma_pkt->src_addr = cpu_to_le64(val);
lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
lin_dma_pkt->tsize = cpu_to_le32(size);
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
goto release_cb;
}
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
if (err_cause && !hdev->init_done) {
dev_dbg(hdev->dev,
"Clearing DMA0 engine from errors (cause 0x%x)\n",
err_cause);
WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
}
job->id = 0;
job->user_cb = cb;
atomic_inc(&job->user_cb->cs_cnt);
job->user_cb_size = cb_size;
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
job->patched_cb = job->user_cb;
job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
hl_debugfs_add_job(hdev, job);
rc = gaudi_send_job_on_qman0(hdev, job);
hl_debugfs_remove_job(hdev, job);
kfree(job);
atomic_dec(&cb->cs_cnt);
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
if (err_cause) {
dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
rc = -EIO;
if (!hdev->init_done) {
dev_dbg(hdev->dev,
"Clearing DMA0 engine from errors (cause 0x%x)\n",
err_cause);
WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
}
}
release_cb:
id = cb->id;
hl_cb_put(cb);
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
return rc;
}
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
u32 num_regs, u32 val)
{
struct packet_msg_long *pkt;
struct hl_cs_job *job;
u32 cb_size, ctl;
struct hl_cb *cb;
int i, rc;
cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
if (cb_size > SZ_2M) {
dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
return -ENOMEM;
}
cb = hl_cb_kernel_create(hdev, cb_size, false);
if (!cb)
return -EFAULT;
pkt = cb->kernel_address;
ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
for (i = 0; i < num_regs ; i++, pkt++) {
pkt->ctl = cpu_to_le32(ctl);
pkt->value = cpu_to_le32(val);
pkt->addr = cpu_to_le64(reg_base + (i * 4));
}
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
goto release_cb;
}
job->id = 0;
job->user_cb = cb;
atomic_inc(&job->user_cb->cs_cnt);
job->user_cb_size = cb_size;
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
job->patched_cb = job->user_cb;
job->job_cb_size = cb_size;
hl_debugfs_add_job(hdev, job);
rc = gaudi_send_job_on_qman0(hdev, job);
hl_debugfs_remove_job(hdev, job);
kfree(job);
atomic_dec(&cb->cs_cnt);
release_cb:
hl_cb_put(cb);
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
return rc;
}
static int gaudi_restore_sm_registers(struct hl_device *hdev)
{
u64 base_addr;
u32 num_regs;
int rc;
base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
num_regs = NUM_OF_SOB_IN_BLOCK;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
num_regs = NUM_OF_SOB_IN_BLOCK;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
num_regs = NUM_OF_SOB_IN_BLOCK;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
num_regs = NUM_OF_MONITORS_IN_BLOCK;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
num_regs = NUM_OF_MONITORS_IN_BLOCK;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
num_regs = NUM_OF_MONITORS_IN_BLOCK;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
if (rc) {
dev_err(hdev->dev, "failed resetting SM registers");
return -ENOMEM;
}
return 0;
}
static void gaudi_restore_dma_registers(struct hl_device *hdev)
{
u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
int i;
for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
u64 sob_addr = CFG_BASE +
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
(i * sob_delta);
u32 dma_offset = i * DMA_CORE_OFFSET;
WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
lower_32_bits(sob_addr));
WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
upper_32_bits(sob_addr));
WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
if (i > 1)
WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
0x00000001);
}
}
static void gaudi_restore_qm_registers(struct hl_device *hdev)
{
u32 qman_offset;
int i;
for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
qman_offset = i * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
}
for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
}
for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
qman_offset = i * TPC_QMAN_OFFSET;
WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
}
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
}
}
static int gaudi_restore_user_registers(struct hl_device *hdev)
{
int rc;
rc = gaudi_restore_sm_registers(hdev);
if (rc)
return rc;
gaudi_restore_dma_registers(hdev);
gaudi_restore_qm_registers(hdev);
return 0;
}
static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
{
return 0;
}
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
u64 addr = prop->mmu_pgt_addr;
u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
return 0;
return gaudi_memset_device_memory(hdev, addr, size, 0);
}
static void gaudi_restore_phase_topology(struct hl_device *hdev)
{
}
static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
bool user_address, u32 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
*val = RREG32(addr - CFG_BASE);
} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
*val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
return rc;
}
static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
bool user_address, u32 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
WREG32(addr - CFG_BASE, val);
} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
return rc;
}
static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
bool user_address, u64 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
u32 val_l = RREG32(addr - CFG_BASE);
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
*val = (((u64) val_h) << 32) | val_l;
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
*val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
return rc;
}
static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
bool user_address, u64 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 hbm_bar_addr, host_phys_end;
int rc = 0;
host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
WREG32(addr - CFG_BASE, lower_32_bits(val));
WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
} else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
if (hbm_bar_addr != U64_MAX) {
writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
user_address && !iommu_present(&pci_bus_type)) {
*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
return rc;
}
static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
u32 size_to_dma, dma_addr_t dma_addr)
{
u32 err_cause, val;
u64 dma_offset;
int rc;
dma_offset = dma_id * DMA_CORE_OFFSET;
WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
WREG32(mmDMA0_CORE_COMMIT + dma_offset,
(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
rc = hl_poll_timeout(
hdev,
mmDMA0_CORE_STS0 + dma_offset,
val,
((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
0,
1000000);
if (rc) {
dev_err(hdev->dev,
"DMA %d timed-out during reading of 0x%llx\n",
dma_id, addr);
return -EIO;
}
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
if (err_cause) {
dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
dev_dbg(hdev->dev,
"Clearing DMA0 engine from errors (cause 0x%x)\n",
err_cause);
WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
return -EIO;
}
return 0;
}
static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
void *blob_addr)
{
u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
u32 qm_glbl_sts0, qm_cgm_sts;
u64 dma_offset, qm_offset;
dma_addr_t dma_addr;
void *kernel_addr;
bool is_eng_idle;
int rc = 0, dma_id;
kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
hdev, SZ_2M,
&dma_addr,
GFP_KERNEL | __GFP_ZERO);
if (!kernel_addr)
return -ENOMEM;
hdev->asic_funcs->hw_queues_lock(hdev);
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
dma_offset = dma_id * DMA_CORE_OFFSET;
qm_offset = dma_id * DMA_QMAN_OFFSET;
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
IS_DMA_IDLE(dma_core_sts0);
if (!is_eng_idle) {
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
dma_offset = dma_id * DMA_CORE_OFFSET;
qm_offset = dma_id * DMA_QMAN_OFFSET;
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
IS_DMA_IDLE(dma_core_sts0);
if (!is_eng_idle) {
dev_err_ratelimited(hdev->dev,
"Can't read via DMA because it is BUSY\n");
rc = -EAGAIN;
goto out;
}
}
cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
if (err_cause) {
dev_dbg(hdev->dev,
"Clearing DMA0 engine from errors (cause 0x%x)\n",
err_cause);
WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
}
pos = 0;
size_left = size;
size_to_dma = SZ_2M;
while (size_left > 0) {
if (size_left < SZ_2M)
size_to_dma = size_left;
rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
dma_addr);
if (rc)
break;
memcpy(blob_addr + pos, kernel_addr, size_to_dma);
if (size_left <= SZ_2M)
break;
pos += SZ_2M;
addr += SZ_2M;
size_left -= SZ_2M;
}
WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
~BIT(DMA0_CORE_PROT_VAL_SHIFT));
WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
out:
hdev->asic_funcs->hw_queues_unlock(hdev);
hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
dma_addr);
return rc;
}
static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (hdev->reset_info.hard_reset_pending)
return U64_MAX;
return readq(hdev->pcie_bar[HBM_BAR_ID] +
(addr - gaudi->hbm_bar_cur_addr));
}
static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (hdev->reset_info.hard_reset_pending)
return;
writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
(addr - gaudi->hbm_bar_cur_addr));
}
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
{
WREG32_AND(reg, ~0x7FF);
WREG32_OR(reg, asid);
}
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
return;
if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
dev_crit(hdev->dev, "asid %u is too big\n", asid);
return;
}
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2,